From e2d33ffce4d4e5fe99b7b4c3d33ca1167122be63 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:17:59 +0000
Subject: [PATCH 1/9] fix(models): persist
 context_length/threshold_tokens/last_prompt_tokens in Session model (#1318
 split)

From PR #1341.

Co-authored-by: fxd-jason <wujiachen7@gmail.com>
---
 api/models.py | 9 +++++++++
 api/routes.py | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/api/models.py b/api/models.py
index 981d0ccf..bf1dda8e 100644
--- a/api/models.py
+++ b/api/models.py
@@ -318,6 +318,8 @@ class Session:
                  context_messages=None,
                  compression_anchor_visible_idx=None,
                  compression_anchor_message_key=None,
+                 context_length=None, threshold_tokens=None,
+                 last_prompt_tokens=None,
                  **kwargs):
         self.session_id = session_id or uuid.uuid4().hex[:12]
         self.title = title
@@ -342,6 +344,9 @@ class Session:
         self.context_messages = context_messages if isinstance(context_messages, list) else []
         self.compression_anchor_visible_idx = compression_anchor_visible_idx
         self.compression_anchor_message_key = compression_anchor_message_key
+        self.context_length = context_length
+        self.threshold_tokens = threshold_tokens
+        self.last_prompt_tokens = last_prompt_tokens
         self._metadata_message_count = None
 
     @property
@@ -361,6 +366,7 @@ class Session:
             'personality', 'active_stream_id',
             'pending_user_message', 'pending_attachments', 'pending_started_at',
             'compression_anchor_visible_idx', 'compression_anchor_message_key',
+            'context_length', 'threshold_tokens', 'last_prompt_tokens',
         ]
         meta = {k: getattr(self, k, None) for k in METADATA_FIELDS}
         meta['messages'] = self.messages
@@ -452,6 +458,9 @@ class Session:
             'personality': self.personality,
             'compression_anchor_visible_idx': self.compression_anchor_visible_idx,
             'compression_anchor_message_key': self.compression_anchor_message_key,
+            'context_length': self.context_length,
+            'threshold_tokens': self.threshold_tokens,
+            'last_prompt_tokens': self.last_prompt_tokens,
             'active_stream_id': self.active_stream_id,
             'is_streaming': _is_streaming_session(
                 self.active_stream_id, active_stream_ids
diff --git a/api/routes.py b/api/routes.py
index 7ec990f5..81815204 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -921,6 +921,9 @@ def handle_get(handler, parsed) -> bool:
                 "pending_user_message": getattr(s, "pending_user_message", None),
                 "pending_attachments": getattr(s, "pending_attachments", []) if load_messages else [],
                 "pending_started_at": getattr(s, "pending_started_at", None),
+                "context_length": getattr(s, "context_length", 0) or 0,
+                "threshold_tokens": getattr(s, "threshold_tokens", 0) or 0,
+                "last_prompt_tokens": getattr(s, "last_prompt_tokens", 0) or 0,
             }
             # Signal to the frontend that older messages were omitted.
             # For msg_before paging, compare against the filtered set,

From 09e12e3c6062810f0e4165f55170f5d9ca8c8ac9 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:18:00 +0000
Subject: [PATCH 2/9] fix(streaming): handle list fallback_providers config in
 addition to single fallback_model dict

From PR #1339.

Co-authored-by: Jim Dawdy <jimdawdy@Jims-MacBook-Pro.local>
---
 api/streaming.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/api/streaming.py b/api/streaming.py
index 3d3b778a..9987401c 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -1701,19 +1701,25 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
             _toolsets = _resolve_cli_toolsets(_cfg)
 
             # Fallback model from profile config (e.g. for rate-limit recovery)
-            _fallback = _cfg.get('fallback_model') or None
+            _fallback = _cfg.get('fallback_model') or _cfg.get('fallback_providers') or None
+            _fallback_resolved = None
             if _fallback:
-                # Resolve the fallback through our provider logic too
-                fb_model = _fallback.get('model', '')
-                fb_provider = _fallback.get('provider', '')
-                fb_base_url = _fallback.get('base_url')
-                _fallback_resolved = {
-                    'model': fb_model,
-                    'provider': fb_provider,
-                    'base_url': fb_base_url,
-                }
-            else:
-                _fallback_resolved = None
+                # Normalize: support both single dict (legacy) and list (chained fallback).
+                # Use the first valid entry as the fallback passed to AIAgent.
+                _fb_entry = None
+                if isinstance(_fallback, list):
+                    for _entry in _fallback:
+                        if isinstance(_entry, dict) and _entry.get('model'):
+                            _fb_entry = _entry
+                            break
+                elif isinstance(_fallback, dict) and _fallback.get('model'):
+                    _fb_entry = _fallback
+                if _fb_entry:
+                    _fallback_resolved = {
+                        'model': _fb_entry.get('model', ''),
+                        'provider': _fb_entry.get('provider', ''),
+                        'base_url': _fb_entry.get('base_url'),
+                    }
 
             # Build kwargs defensively — guard newer params so the WebUI
             # degrades gracefully when run against an older hermes-agent build.

From fbe84d26e6af49bd1f80f66074615b4ebb4123c7 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:18:01 +0000
Subject: [PATCH 3/9] fix(ui+pwa): avoid stale Mermaid render errors and bust
 cached static asset URLs on every release

From PR #1337.

Co-authored-by: Dennis Soong <dso2ng@gmail.com>
---
 api/routes.py                          |  9 ++++--
 static/index.html                      | 24 +++++++-------
 static/sw.js                           |  4 +++
 static/ui.js                           | 22 ++++++++++---
 tests/test_issue347.py                 | 15 +++++++++
 tests/test_onboarding_static.py        |  2 +-
 tests/test_pwa_manifest_sw.py          | 25 ++++++++++++++
 tests/test_renderer_js_behaviour.py    | 45 ++++++++++++++++++++++++++
 tests/test_service_worker_api_cache.py |  6 ++++
 tests/test_sprint9.py                  | 16 ++++-----
 10 files changed, 140 insertions(+), 28 deletions(-)

diff --git a/api/routes.py b/api/routes.py
index 81815204..871a5783 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -717,9 +717,12 @@ def handle_get(handler, parsed) -> bool:
     """Handle all GET routes. Returns True if handled, False for 404."""
 
     if parsed.path in ("/", "/index.html"):
+        from urllib.parse import quote
+        from api.updates import WEBUI_VERSION
+        version_token = quote(WEBUI_VERSION, safe="")
         return t(
             handler,
-            _INDEX_HTML_PATH.read_text(encoding="utf-8"),
+            _INDEX_HTML_PATH.read_text(encoding="utf-8").replace("__WEBUI_VERSION__", version_token),
             content_type="text/html; charset=utf-8",
         )
 
@@ -776,9 +779,11 @@ def handle_get(handler, parsed) -> bool:
         if sw_path.exists():
             # Inject the current git-derived version as the cache name so the
             # service worker cache busts automatically on every new deploy.
+            from urllib.parse import quote
             from api.updates import WEBUI_VERSION
+            version_token = quote(WEBUI_VERSION, safe="")
             text = sw_path.read_text(encoding="utf-8").replace(
-                "__CACHE_VERSION__", WEBUI_VERSION
+                "__CACHE_VERSION__", version_token
             )
             data = text.encode("utf-8")
             handler.send_response(200)
diff --git a/static/index.html b/static/index.html
index b4441ceb..8b34284a 100644
--- a/static/index.html
+++ b/static/index.html
@@ -47,7 +47,7 @@
   <script>
     if ('serviceWorker' in navigator) {
       window.addEventListener('load', function() {
-        navigator.serviceWorker.register('sw.js').catch(function(err) {
+        navigator.serviceWorker.register('sw.js?v=__WEBUI_VERSION__').catch(function(err) {
           console.warn('[pwa] Service worker registration failed:', err);
         });
       });
@@ -951,16 +951,16 @@
   </div>
 </div>
 <div class="toast" id="toast"></div>
-<script src="static/i18n.js" defer></script>
-<script src="static/icons.js" defer></script>
-<script src="static/ui.js" defer></script>
-<script src="static/workspace.js" defer></script>
-<script src="static/terminal.js" defer></script>
-<script src="static/sessions.js" defer></script>
-<script src="static/commands.js" defer></script>
-<script src="static/messages.js" defer></script>
-<script src="static/panels.js" defer></script>
-<script src="static/onboarding.js" defer></script>
-<script src="static/boot.js" defer></script>
+<script src="static/i18n.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/icons.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/ui.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/workspace.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/terminal.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/sessions.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/commands.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/messages.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/panels.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/onboarding.js?v=__WEBUI_VERSION__" defer></script>
+<script src="static/boot.js?v=__WEBUI_VERSION__" defer></script>
 </body>
 </html>
diff --git a/static/sw.js b/static/sw.js
index a141d4d3..58fa2c46 100644
--- a/static/sw.js
+++ b/static/sw.js
@@ -64,6 +64,10 @@ self.addEventListener('fetch', (event) => {
   // Never intercept cross-origin requests
   if (url.origin !== self.location.origin) return;
 
+  // Never intercept the service worker script itself. Returning a cached sw.js
+  // prevents the browser from seeing a new cache version after local patches.
+  if (url.pathname.endsWith('/sw.js')) return;
+
   // API and streaming endpoints — always go to network.
   // The WebUI may be mounted under a subpath such as /hermes/, so API
   // requests can look like /hermes/api/sessions rather than /api/sessions.
diff --git a/static/ui.js b/static/ui.js
index cffc8a31..cdd95429 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -1085,12 +1085,17 @@ function renderMd(raw){
   const fence_stash=[];
   s=s.replace(/```([\s\S]*?)```/g,(_,raw)=>{
     const m=raw.match(/^(\w[\w+-]*)\n?([\s\S]*)$/);
-    if(m&&m[1].trim().toLowerCase()==='mermaid'){
+    const lang=m?(m[1]||'').trim().toLowerCase():'';
+    const code=m?m[2]:raw.replace(/^\n?/,'');
+    const codeLines=code.split('\n');
+    const firstCodeLine=codeLines.find(line=>line.trim())||'';
+    const firstMermaidLine=codeLines.map(line=>line.trim()).find(line=>line&&!line.startsWith('%%'))||'';
+    const looksLikeLineNumberedToolOutput=/^\s*\d+\|/.test(firstCodeLine);
+    const looksLikeMermaidStart=firstMermaidLine==='---'||/^(graph|flowchart|sequenceDiagram|classDiagram|classDiagram-v2|stateDiagram|stateDiagram-v2|erDiagram|journey|gantt|pie|gitGraph|mindmap|timeline|quadrantChart|requirementDiagram|C4Context|C4Container|C4Component|C4Dynamic|c4Context|c4Container|c4Component|c4Dynamic|sankey-beta|block-beta|packet-beta|xychart-beta|kanban|architecture-beta)\b/.test(firstMermaidLine);
+    if(lang==='mermaid'&&!looksLikeLineNumberedToolOutput&&looksLikeMermaidStart){
       const id='mermaid-'+Math.random().toString(36).slice(2,10);
-      _preBlock_stash.push(`<div class="mermaid-block" data-mermaid-id="${id}">${esc(m[2].trim())}</div>`);
+      _preBlock_stash.push(`<div class="mermaid-block" data-mermaid-id="${id}">${esc(code.trim())}</div>`);
     } else {
-      const lang=m?(m[1]||'').trim().toLowerCase():'';
-      const code=m?m[2]:raw.replace(/^\n?/,'');
       const h=lang?`<div class="pre-header">${esc(lang)}</div>`:'';
       const langAttr=lang?` class="language-${esc(lang)}"`:'';
       // For diff/patch blocks, wrap each line in a colored span
@@ -4130,10 +4135,17 @@ function renderMermaidBlocks(){
     const id=block.dataset.mermaidId||('m-'+Math.random().toString(36).slice(2));
     try{
       const {svg}=await mermaid.render(id,code);
+      const tmp=document.getElementById('d'+id);
+      if(tmp) tmp.remove();
       block.innerHTML=svg;
       block.classList.add('mermaid-rendered');
     }catch(e){
-      // Fall back to showing as a code block
+      const tmp=document.getElementById('d'+id);
+      if(tmp) tmp.remove();
+      // Fall back to showing as a code block. Remove the mermaid marker so a
+      // later render pass cannot retry this already-failed block.
+      block.classList.remove('mermaid-block');
+      block.classList.add('prewrap');
       block.innerHTML=`<div class="pre-header">mermaid</div><pre><code>${esc(code)}</code></pre>`;
     }
   });
diff --git a/tests/test_issue347.py b/tests/test_issue347.py
index f1c05ac3..9a0c65d0 100644
--- a/tests/test_issue347.py
+++ b/tests/test_issue347.py
@@ -105,6 +105,21 @@ def test_render_katex_blocks_wired_into_raf():
         'renderKatexBlocks() not found in any requestAnimationFrame call — math will not render'
 
 
+def test_mermaid_render_failure_removes_temporary_error_dom():
+    """Failed Mermaid renders must not leave Mermaid's body-level syntax-error SVG visible."""
+    fn_start = UI_JS.find('function renderMermaidBlocks()')
+    assert fn_start != -1, 'renderMermaidBlocks() function not found in ui.js'
+    fn = UI_JS[fn_start:fn_start + 2200]
+    cleanup = "const tmp=document.getElementById('d'+id);\n      if(tmp) tmp.remove();"
+    assert cleanup in fn, (
+        "renderMermaidBlocks() must remove Mermaid's temporary d<id> container; "
+        "otherwise rejected renders leave a visible 'Syntax error in text' SVG in every tab."
+    )
+    assert fn.count(cleanup) >= 2, (
+        "Mermaid temporary DOM cleanup must run after both successful and failed renders."
+    )
+
+
 # ── index.html ────────────────────────────────────────────────────────────────
 
 def test_katex_css_in_index_html():
diff --git a/tests/test_onboarding_static.py b/tests/test_onboarding_static.py
index f61f3a99..b0d7bb95 100644
--- a/tests/test_onboarding_static.py
+++ b/tests/test_onboarding_static.py
@@ -13,7 +13,7 @@ def test_index_contains_onboarding_overlay_markup():
     assert 'id="onboardingOverlay"' in html
     assert 'id="onboardingBody"' in html
     assert 'id="onboardingNextBtn"' in html
-    assert 'src="static/onboarding.js"' in html
+    assert 'src="static/onboarding.js?v=__WEBUI_VERSION__"' in html
 
 
 def test_onboarding_css_rules_exist():
diff --git a/tests/test_pwa_manifest_sw.py b/tests/test_pwa_manifest_sw.py
index 34897335..82e956ff 100644
--- a/tests/test_pwa_manifest_sw.py
+++ b/tests/test_pwa_manifest_sw.py
@@ -124,6 +124,16 @@ class TestPWARoutes:
             "sw.js route must import and use WEBUI_VERSION for cache busting"
         )
 
+    def test_sw_route_url_encodes_cache_version(self):
+        src = ROUTES.read_text(encoding="utf-8")
+        idx = src.find('"/sw.js"')
+        assert idx != -1, "routes.py must handle /sw.js"
+        block = src[idx:idx + 1200]
+        assert "quote(WEBUI_VERSION, safe=\"\")" in block, (
+            "sw.js route must URL-encode the injected cache version so unusual git tags "
+            "cannot break the JavaScript string literal"
+        )
+
     def test_sw_route_sets_service_worker_allowed(self):
         src = ROUTES.read_text(encoding="utf-8")
         idx = src.find('"/sw.js"')
@@ -145,6 +155,21 @@ class TestIndexHtmlIntegration:
             "index.html must register the service worker"
         )
 
+    def test_index_uses_version_placeholders_for_static_assets(self):
+        src = INDEX.read_text(encoding="utf-8")
+        assert "sw.js?v=__WEBUI_VERSION__" in src
+        assert "static/ui.js?v=__WEBUI_VERSION__" in src
+
+    def test_index_route_url_encodes_asset_version(self):
+        src = ROUTES.read_text(encoding="utf-8")
+        idx = src.find('parsed.path in ("/", "/index.html")')
+        assert idx != -1, "routes.py must handle / and /index.html"
+        block = src[idx:idx + 800]
+        assert "quote(WEBUI_VERSION, safe=\"\")" in block, (
+            "index route must URL-encode the cache-busting version token before "
+            "injecting it into script src attributes and service worker registration"
+        )
+
     def test_index_has_ios_pwa_meta_tags(self):
         src = INDEX.read_text(encoding="utf-8")
         assert "apple-mobile-web-app-capable" in src, (
diff --git a/tests/test_renderer_js_behaviour.py b/tests/test_renderer_js_behaviour.py
index 6b7c3f09..102c69df 100644
--- a/tests/test_renderer_js_behaviour.py
+++ b/tests/test_renderer_js_behaviour.py
@@ -503,6 +503,51 @@ class TestBlockquoteEntityEncodedInput:
         assert "<pre>" in out, f"Fenced code inside entity-encoded blockquote must render: {out!r}"
 
 
+class TestMermaidToolOutputGuard:
+    """Line-numbered tool excerpts must not be auto-rendered as Mermaid."""
+
+    def test_line_numbered_mermaid_fence_renders_as_code_block(self, driver_path):
+        src = "```mermaid\n23|flowchart TB\n24|    A --> B\n```"
+        out = _render(driver_path, src)
+        assert 'class="mermaid-block"' not in out, (
+            f"Line-numbered read_file excerpts are not valid Mermaid and must not auto-render: {out!r}"
+        )
+        assert '<div class="pre-header">mermaid</div>' in out
+        assert '<pre><code class="language-mermaid">' in out
+        assert '23|flowchart TB' in out
+
+    def test_valid_mermaid_fence_still_creates_mermaid_block(self, driver_path):
+        out = _render(driver_path, "```mermaid\nflowchart TB\n    A --> B\n```")
+        assert 'class="mermaid-block"' in out, (
+            f"Valid Mermaid fences should still be queued for Mermaid rendering: {out!r}"
+        )
+        assert 'flowchart TB' in out
+
+    def test_valid_mermaid_c4_fence_still_creates_mermaid_block(self, driver_path):
+        out = _render(driver_path, "```mermaid\nC4Context\n    title System Context\n```")
+        assert 'class="mermaid-block"' in out, (
+            f"Valid C4 Mermaid fences should still be queued for Mermaid rendering: {out!r}"
+        )
+        assert 'C4Context' in out
+
+    def test_valid_mermaid_frontmatter_fence_still_creates_mermaid_block(self, driver_path):
+        out = _render(driver_path, "```mermaid\n---\ntitle: Demo\n---\nflowchart TB\n    A --> B\n```")
+        assert 'class="mermaid-block"' in out, (
+            f"Valid Mermaid fences with frontmatter should still be queued for Mermaid rendering: {out!r}"
+        )
+        assert 'title: Demo' in out
+
+    def test_prose_mention_of_mermaid_fence_renders_as_code_block(self, driver_path):
+        src = "```mermaid\n` fence should not be auto-rendered too aggressively.\n\nSome prose, not a diagram.\n```"
+        out = _render(driver_path, src)
+        assert 'class="mermaid-block"' not in out, (
+            f"Prose captured by a mermaid fence is not valid Mermaid and must not auto-render: {out!r}"
+        )
+        assert '<div class="pre-header">mermaid</div>' in out
+        assert '<pre><code class="language-mermaid">' in out
+        assert 'Some prose, not a diagram.' in out
+
+
 class TestRawPreCodePreservation:
     """Raw <pre><code> HTML from model output should remain structurally intact."""
 
diff --git a/tests/test_service_worker_api_cache.py b/tests/test_service_worker_api_cache.py
index 6900524f..3118357f 100644
--- a/tests/test_service_worker_api_cache.py
+++ b/tests/test_service_worker_api_cache.py
@@ -29,3 +29,9 @@ def test_service_worker_excludes_subpath_mounted_health_routes_from_cache():
 def test_service_worker_documents_api_routes_are_never_cached():
     assert "API and streaming endpoints" in SW_SRC
     assert "always go to network" in SW_SRC
+
+
+def test_service_worker_does_not_intercept_its_own_script():
+    assert "url.pathname.endsWith('/sw.js')" in SW_SRC, (
+        "service worker must bypass /sw.js so a stale cached worker cannot block cache-version updates"
+    )
diff --git a/tests/test_sprint9.py b/tests/test_sprint9.py
index 78711972..7a74e29a 100644
--- a/tests/test_sprint9.py
+++ b/tests/test_sprint9.py
@@ -68,19 +68,19 @@ def test_app_js_no_longer_referenced_in_html(cleanup_test_sessions):
     """index.html must not reference the old monolithic app.js."""
     html = get_text("/")
     assert 'src="static/app.js"' not in html
-    # All 6 modules must be present
+    # All split modules must be present with the server-injected cache-busting version query.
     for module in ["ui.js", "workspace.js", "sessions.js", "messages.js", "panels.js", "boot.js"]:
-        assert f'src="static/{module}"' in html, f"Missing {module} in index.html"
+        assert f'src="static/{module}?v=' in html, f"Missing versioned {module} in index.html"
 
 def test_module_load_order_correct(cleanup_test_sessions):
     """ui.js must appear before sessions.js which must appear before boot.js."""
     html = get_text("/")
-    ui_pos = html.find('src="static/ui.js"')
-    ws_pos = html.find('src="static/workspace.js"')
-    sess_pos = html.find('src="static/sessions.js"')
-    msg_pos = html.find('src="static/messages.js"')
-    panels_pos = html.find('src="static/panels.js"')
-    boot_pos = html.find('src="static/boot.js"')
+    ui_pos = html.find('src="static/ui.js?v=')
+    ws_pos = html.find('src="static/workspace.js?v=')
+    sess_pos = html.find('src="static/sessions.js?v=')
+    msg_pos = html.find('src="static/messages.js?v=')
+    panels_pos = html.find('src="static/panels.js?v=')
+    boot_pos = html.find('src="static/boot.js?v=')
     assert ui_pos < ws_pos < sess_pos < msg_pos < panels_pos < boot_pos
 
 def test_no_duplicate_function_definitions(cleanup_test_sessions):

From 1fa740d32f37ef23c5992e01fb48810f60b15522 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:18:02 +0000
Subject: [PATCH 4/9] feat(chat): render fenced code blocks in user messages
 (#1325)

From PR #1335.

Co-authored-by: bergeouss <bergeouss@users.noreply.github.com>
---
 static/ui.js                        |  39 ++++++++-
 tests/test_1325_user_fenced_code.py | 123 ++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_1325_user_fenced_code.py

diff --git a/static/ui.js b/static/ui.js
index cdd95429..c6bc2104 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -51,6 +51,43 @@ function _setCompressionSessionLock(sid){
 }
 const esc=s=>String(s??'').replace(/[&<>"']/g,c=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
 
+/**
+ * Render fenced code blocks inside user messages.
+ * Extracts ```…``` fences, replaces them with placeholders,
+ * escapes remaining text as plain HTML, then restores code blocks
+ * with the same <pre><code> pipeline used by renderMd().
+ * All non-fenced text stays escaped (no bold/italic/link interpretation).
+ */
+function _renderUserFencedBlocks(text){
+  const stash=[];
+  let s=String(text||'');
+  // Extract fenced code blocks → stash, replace with null-token placeholder
+  s=s.replace(/```([a-zA-Z0-9_+-]*)\n([\s\S]*?)```/g,(_,lang,code)=>{
+    lang=(lang||'').trim().toLowerCase();
+    // Remove one trailing newline if present (the fence consumes its own)
+    if(code.endsWith('\n')) code=code.slice(0,-1);
+    const h=lang?`<div class="pre-header">${esc(lang)}</div>`:'';
+    const langAttr=lang?` class="language-${esc(lang)}"`:'';
+    if(lang==='diff'||lang==='patch'){
+      const colored=esc(code).split('\n').map(line=>{
+        if(line.startsWith('@@')) return `<span class="diff-line diff-hunk">${line}</span>`;
+        if(line.startsWith('+')) return `<span class="diff-line diff-plus">${line}</span>`;
+        if(line.startsWith('-')) return `<span class="diff-line diff-minus">${line}</span>`;
+        return `<span class="diff-line">${line}</span>`;
+      }).join('\n');
+      stash.push(`${h}<pre class="diff-block"><code${langAttr}>${colored}</code></pre>`);
+    } else {
+      stash.push(`${h}<pre><code${langAttr}>${esc(code)}</code></pre>`);
+    }
+    return '\x00UF'+(stash.length-1)+'\x00';
+  });
+  // Escape remaining plain text and convert newlines to <br>
+  s=esc(s).replace(/\n/g,'<br>');
+  // Restore stashed code blocks
+  s=s.replace(/\x00UF(\d+)\x00/g,(_,i)=>stash[+i]);
+  return s;
+}
+
 /* ── Image lightbox — click any .msg-media-img to enlarge ─────────────────── */
 function _openImgLightbox(src, alt) {
   const lb = document.createElement('div');
@@ -3047,7 +3084,7 @@ function renderMessages(){
         return _renderAttachmentHtml(fname,fileUrl);
       }).join('')}</div>`;
     }
-    const bodyHtml = isUser ? esc(String(content)).replace(/\n/g,'<br>') : renderMd(_stripXmlToolCallsDisplay(String(content)));
+    const bodyHtml = isUser ? _renderUserFencedBlocks(content) : renderMd(_stripXmlToolCallsDisplay(String(content)));
     const isEditableUser=isUser&&rawIdx===lastUserRawIdx;
     const editBtn  = isEditableUser ? `<button class="msg-action-btn" title="${t('edit_message')}" onclick="editMessage(this)">${li('pencil',13)}</button>` : '';
     const undoBtn  = isLastAssistant ? `<button class="msg-action-btn" title="${t('undo_exchange')}" onclick="undoLastExchange()">${li('undo',13)}</button>` : '';
diff --git a/tests/test_1325_user_fenced_code.py b/tests/test_1325_user_fenced_code.py
new file mode 100644
index 00000000..80c1be39
--- /dev/null
+++ b/tests/test_1325_user_fenced_code.py
@@ -0,0 +1,123 @@
+"""Tests for issue #1325 — fenced code blocks in user message bubbles."""
+import os
+import subprocess
+import tempfile
+
+UI_JS = os.path.join(os.path.dirname(__file__), '..', 'static', 'ui.js')
+
+
+def _extract_js_functions():
+    """Extract esc and _renderUserFencedBlocks from ui.js by line numbers."""
+    lines = open(UI_JS).read().split('\n')
+    # esc is on line 52 (0-indexed: 51)
+    esc_def = lines[51]
+    # _renderUserFencedBlocks starts at line 61 (0-indexed: 60)
+    # Find the end by matching closing brace at column 0
+    fn_lines = []
+    i = 60  # 0-indexed
+    depth = 0
+    while i < len(lines):
+        fn_lines.append(lines[i])
+        depth += lines[i].count('{') - lines[i].count('}')
+        if depth <= 0:
+            break
+        i += 1
+    fn_def = '\n'.join(fn_lines)
+    return esc_def, fn_def
+
+
+def _run_user_render(text_input):
+    """Return the HTML output of _renderUserFencedBlocks for the given input text."""
+    import json
+    esc_def, fn_def = _extract_js_functions()
+    js_code = esc_def + '\n' + fn_def + '\n'
+    js_code += 'var input = JSON.parse(process.argv[2]);\n'
+    js_code += 'process.stdout.write(_renderUserFencedBlocks(input));\n'
+    tf = tempfile.NamedTemporaryFile(mode='w', suffix='.js', delete=False, encoding='utf-8')
+    tf.write(js_code)
+    tf.close()
+    try:
+        result = subprocess.run(
+            ['node', tf.name, json.dumps(text_input)],
+            capture_output=True, text=True, timeout=10
+        )
+        if result.returncode != 0:
+            raise RuntimeError(f"node error: {result.stderr}")
+        return result.stdout
+    finally:
+        os.unlink(tf.name)
+
+
+class TestUserFencedBlocks:
+    """Fenced code blocks in user messages should render as <pre><code>."""
+
+    def test_simple_fenced_block(self):
+        out = _run_user_render("hello\n```python\nprint(1)\n```\nworld")
+        assert '<pre><code class="language-python">' in out
+        assert 'print(1)' in out
+        # Newlines around the fenced block become <br> (same as original plain-text path)
+        assert 'hello<br>' in out
+        assert '<br>world' in out
+
+    def test_fenced_block_escaped_html(self):
+        """HTML in code blocks should be escaped."""
+        out = _run_user_render("```html\n<div>hi</div>\n```")
+        assert '&lt;div&gt;' in out
+        # No raw <div> in code content
+        assert '<div>' not in out.replace('&lt;div&gt;', '').replace('&gt;', '')
+
+    def test_plain_text_not_interpreted_as_markdown(self):
+        """Bold/italic/links in non-fenced text should stay escaped."""
+        out = _run_user_render("**bold** and *italic* and <script>alert(1)</script>")
+        assert '**bold**' in out
+        assert '*italic*' in out
+        assert '&lt;script&gt;' in out
+        assert '<strong>' not in out
+
+    def test_language_header_shown(self):
+        out = _run_user_render("```javascript\nconst x = 1;\n```")
+        assert 'class="pre-header"' in out
+        assert 'javascript' in out
+
+    def test_no_language_no_header(self):
+        out = _run_user_render("```\nsome code\n```")
+        assert 'class="pre-header"' not in out
+        assert '<pre><code>' in out
+        assert 'some code' in out
+
+    def test_diff_block_colored(self):
+        out = _run_user_render("```diff\n+added\n-removed\n```")
+        assert 'diff-block' in out
+        assert 'diff-plus' in out
+        assert 'diff-minus' in out
+
+    def test_multiple_fenced_blocks(self):
+        out = _run_user_render("first\n```python\n1\n```\nmiddle\n```js\n2\n```\nlast")
+        assert 'language-python' in out
+        assert 'language-js' in out
+        assert 'first<br>' in out
+        assert '<br>last' in out
+
+    def test_fenced_block_with_ampersand(self):
+        out = _run_user_render("```python\nx & y\n```")
+        assert 'x &amp; y' in out
+
+    def test_empty_code_block(self):
+        out = _run_user_render("```\n```")
+        assert '<pre><code>' in out
+
+    def test_special_chars_outside_blocks_escaped(self):
+        out = _run_user_render("a < b > c & d")
+        assert 'a &lt; b &gt; c &amp; d' in out
+
+    def test_links_not_rendered_in_plain_text(self):
+        """URLs in plain text should NOT become clickable links."""
+        out = _run_user_render("Check https://example.com for details")
+        assert '<a ' not in out
+        assert 'https://example.com' in out
+
+    def test_inline_backticks_not_touched(self):
+        """Inline backticks (single backtick, not fenced block) should remain escaped as text."""
+        out = _run_user_render("use `var x = 1` here")
+        assert '`var x = 1`' in out
+        assert '<code>' not in out

From d4b055c30b3e2423e33c1d777b60ff687e730d23 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:18:41 +0000
Subject: [PATCH 5/9] fix(streaming+ui): preserve user message on cancel +
 persist activity-panel expand state (#1298)

From PR #1338. Already independently APPROVED by nesquena before being absorbed into v0.50.246.

CHANGELOG entries from this PR were dropped during squash (the v0.50.245 section is already
shipped); they will be re-added under [v0.50.246] in the release commit.

Co-authored-by: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
---
 api/streaming.py                            |  49 +++
 static/ui.js                                |  47 ++-
 tests/test_issue1298_cancel_and_activity.py | 326 ++++++++++++++++++++
 3 files changed, 417 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_issue1298_cancel_and_activity.py

diff --git a/api/streaming.py b/api/streaming.py
index 9987401c..775cf784 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -2586,6 +2586,55 @@ def cancel_stream(stream_id: str) -> bool:
         with _get_session_agent_lock(_cancel_session_id):
             try:
                 _cs = get_session(_cancel_session_id)
+                # ── Preserve the user's typed message before clearing pending state (#1298) ──
+                # The agent's internal messages list (where the user message was appended at
+                # the start of run_conversation()) may not have been merged back into
+                # _cs.messages yet — cancel_stream() races with the streaming thread's final
+                # _merge_display_messages_after_agent_result() call. Without this guard, the
+                # user's message is lost: pending_user_message gets cleared below, and
+                # _cs.messages still only contains messages from prior turns. The reporter
+                # of #1298 sees their typed text vanish from chat after clicking Stop.
+                #
+                # Recovery rule: if pending_user_message is set AND the latest message in
+                # _cs.messages isn't already a matching user turn, synthesize one. The
+                # match check guards against double-append when the streaming thread DID
+                # reach its merge step before cancel_stream() got the session lock.
+                #
+                # Wrapped in its own try/except so an unexpected _cs.messages shape (e.g.
+                # in unit tests using Mock sessions) cannot escape and skip the rest of
+                # the cleanup.
+                try:
+                    _pending_user = getattr(_cs, 'pending_user_message', None)
+                    _pending_atts_raw = getattr(_cs, 'pending_attachments', None)
+                    _pending_atts = list(_pending_atts_raw) if isinstance(_pending_atts_raw, (list, tuple)) else []
+                    _msgs_for_recovery = _cs.messages if isinstance(_cs.messages, list) else None
+                    if _pending_user and _msgs_for_recovery is not None:
+                        _last_user = None
+                        for _m in reversed(_msgs_for_recovery):
+                            if isinstance(_m, dict) and _m.get('role') == 'user':
+                                _last_user = _m
+                                break
+                        _already_persisted = False
+                        if _last_user is not None:
+                            _last_content = _last_user.get('content')
+                            if isinstance(_last_content, str):
+                                # Tolerate the workspace prefix the streaming thread prepends.
+                                if _pending_user in _last_content or _last_content in _pending_user:
+                                    _already_persisted = True
+                        if not _already_persisted:
+                            _user_turn: dict = {
+                                'role': 'user',
+                                'content': _pending_user,
+                                'timestamp': int(time.time()),
+                            }
+                            if _pending_atts:
+                                _user_turn['attachments'] = _pending_atts
+                            _msgs_for_recovery.append(_user_turn)
+                except Exception:
+                    logger.debug(
+                        "Failed to recover pending user message on cancel for %s",
+                        _cancel_session_id,
+                    )
                 _cs.active_stream_id = None
                 _cs.pending_user_message = None
                 _cs.pending_attachments = []
diff --git a/static/ui.js b/static/ui.js
index c6bc2104..5f6e2fc9 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -2644,6 +2644,30 @@ function _thinkingActivityNode(text){
   row.innerHTML=_thinkingCardHtml(text);
   return row;
 }
+// ── Activity-group user expand intent (#1298) ──────────────────────────────
+// When the user manually expands the live "Activity" dropdown during streaming,
+// preserve that intent across the destroy/recreate cycle that fires on every
+// thinking/tool event. Without this, ensureActivityGroup() re-creates the group
+// with the default collapsed state and finalizeThinkingCard() force-collapses
+// it whenever the assistant transitions from thinking → tool → thinking, so
+// the panel snaps shut every few seconds while the user is trying to read it.
+//
+// The tracker is a singleton boolean: there is at most one live activity group
+// at a time (selector .tool-call-group[data-live-tool-call-group="1"]). It is
+// set to true when the user clicks the summary to expand, false when they
+// click to collapse, and cleared back to undefined when the live group is
+// finalized into a settled assistant turn (the live attribute is removed in
+// _convertLiveActivityGroupToSettled / when liveAssistantTurn loses its id).
+let _liveActivityUserExpanded;
+function _onLiveActivityToggle(group){
+  if(!group) return;
+  // Only track explicit user clicks on the live group, not programmatic toggles.
+  if(group.getAttribute('data-live-tool-call-group')!=='1') return;
+  _liveActivityUserExpanded = !group.classList.contains('tool-call-group-collapsed');
+}
+function _clearLiveActivityUserIntent(){
+  _liveActivityUserExpanded = undefined;
+}
 function ensureActivityGroup(inner, opts){
   opts=opts||{};
   if(!inner) return null;
@@ -2652,12 +2676,16 @@ function ensureActivityGroup(inner, opts){
   let group=inner.querySelector(selector);
   if(!group){
     group=document.createElement('div');
-    const collapsed=opts.collapsed!==false;
+    let collapsed=opts.collapsed!==false;
+    // Restore the user's explicit expand intent when recreating the live
+    // activity group within the same turn (#1298).
+    if(live && _liveActivityUserExpanded === true) collapsed=false;
+    else if(live && _liveActivityUserExpanded === false) collapsed=true;
     group.className='tool-call-group agent-activity-group'+(collapsed?' tool-call-group-collapsed':'');
     group.setAttribute('data-tool-call-group','1');
     group.setAttribute('data-agent-activity-group','1');
     if(live) group.setAttribute('data-live-tool-call-group','1');
-    group.innerHTML=`<button type="button" class="tool-call-group-summary" aria-expanded="${collapsed?'false':'true'}" onclick="const g=this.closest('.tool-call-group');const c=g.classList.toggle('tool-call-group-collapsed');this.setAttribute('aria-expanded',String(!c));"><span class="tool-call-group-chevron">${li('chevron-right',12)}</span><span class="tool-call-group-label">Activity</span><span class="tool-call-group-list">tools / thinking</span><span class="tool-call-group-count">0</span></button><div class="tool-call-group-body"></div>`;
+    group.innerHTML=`<button type="button" class="tool-call-group-summary" aria-expanded="${collapsed?'false':'true'}" onclick="const g=this.closest('.tool-call-group');const c=g.classList.toggle('tool-call-group-collapsed');this.setAttribute('aria-expanded',String(!c));if(typeof _onLiveActivityToggle==='function')_onLiveActivityToggle(g);"><span class="tool-call-group-chevron">${li('chevron-right',12)}</span><span class="tool-call-group-label">Activity</span><span class="tool-call-group-list">tools / thinking</span><span class="tool-call-group-count">0</span></button><div class="tool-call-group-body"></div>`;
     const anchor=opts.anchor||null;
     if(anchor&&anchor.parentElement===inner) anchor.insertAdjacentElement('afterend', group);
     else inner.appendChild(group);
@@ -3576,6 +3604,9 @@ function appendLiveToolCard(tc){
 function clearLiveToolCards(){
   const inner=_assistantTurnBlocks($('liveAssistantTurn'));
   if(inner) inner.querySelectorAll('.tool-call-group[data-live-tool-call-group],.tool-card-row[data-live-tid]').forEach(el=>el.remove());
+  // Reset the per-turn user expand intent so the next turn starts at the
+  // default collapsed state (#1298).
+  if(typeof _clearLiveActivityUserIntent==='function') _clearLiveActivityUserIntent();
   // Legacy #liveToolCards container cleanup — kept for safety in case any
   // leftover cards were inserted there before this refactor took effect.
   const container=$('liveToolCards');
@@ -4262,9 +4293,15 @@ function finalizeThinkingCard(){
   const turn=$('liveAssistantTurn');
   const group=turn&&turn.querySelector('.tool-call-group[data-live-tool-call-group="1"]');
   if(group){
-    group.classList.add('tool-call-group-collapsed');
-    const summary=group.querySelector('.tool-call-group-summary');
-    if(summary) summary.setAttribute('aria-expanded','false');
+    // Respect the user's explicit expand intent (#1298) — only force-collapse
+    // when the user has not manually expanded this turn's activity group, or
+    // has manually collapsed it. Otherwise the panel snaps shut whenever new
+    // activity arrives, even mid-read.
+    if(_liveActivityUserExpanded !== true){
+      group.classList.add('tool-call-group-collapsed');
+      const summary=group.querySelector('.tool-call-group-summary');
+      if(summary) summary.setAttribute('aria-expanded','false');
+    }
     const active=group.querySelector('.agent-activity-thinking[data-thinking-active="1"]');
     if(active) active.removeAttribute('data-thinking-active');
     _syncToolCallGroupSummary(group);
diff --git a/tests/test_issue1298_cancel_and_activity.py b/tests/test_issue1298_cancel_and_activity.py
new file mode 100644
index 00000000..fe4b1b96
--- /dev/null
+++ b/tests/test_issue1298_cancel_and_activity.py
@@ -0,0 +1,326 @@
+"""Regression tests for #1298 — Activity panel UI state and Stop/Cancel data loss.
+
+Two distinct bugs reported in YanTianlong-01's bug report on v0.50.240:
+
+  1. The expanded Activity list collapses automatically when new activity arrives.
+  2. The latest user message disappears after clicking Stop/Cancel during streaming.
+
+Bug 2 is server-side data loss (the message is gone from session JSON, not just
+the in-memory client copy) caused by cancel_stream() clearing pending_user_message
+without first persisting it to s.messages. This test suite locks down both fixes.
+"""
+import pathlib
+import queue
+import re
+import threading
+from unittest.mock import Mock
+
+import pytest
+
+import api.config as config
+import api.models as models
+import api.streaming as streaming
+from api.models import Session
+from api.streaming import cancel_stream
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+
+# ── Fixtures ────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def _isolate_session_dir(tmp_path, monkeypatch):
+    """Redirect SESSION_DIR / SESSION_INDEX_FILE to an isolated temp dir."""
+    session_dir = tmp_path / "sessions"
+    session_dir.mkdir()
+    index_file = session_dir / "_index.json"
+    monkeypatch.setattr(models, "SESSION_DIR", session_dir)
+    monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file)
+    models.SESSIONS.clear()
+    yield
+    models.SESSIONS.clear()
+
+
+@pytest.fixture(autouse=True)
+def _isolate_stream_state():
+    config.STREAMS.clear()
+    config.CANCEL_FLAGS.clear()
+    config.AGENT_INSTANCES.clear()
+    config.STREAM_PARTIAL_TEXT.clear()
+    yield
+    config.STREAMS.clear()
+    config.CANCEL_FLAGS.clear()
+    config.AGENT_INSTANCES.clear()
+    config.STREAM_PARTIAL_TEXT.clear()
+
+
+@pytest.fixture(autouse=True)
+def _isolate_agent_locks():
+    config.SESSION_AGENT_LOCKS.clear()
+    yield
+    config.SESSION_AGENT_LOCKS.clear()
+
+
+def _make_pending_session(session_id="cancel_sid_1298",
+                          pending_msg="Help me debug this issue",
+                          messages=None,
+                          attachments=None):
+    """Build a session in mid-stream state: pending_user_message set, messages may be empty."""
+    s = Session(
+        session_id=session_id,
+        title="Test Session",
+        messages=messages or [],
+    )
+    s.pending_user_message = pending_msg
+    s.pending_attachments = list(attachments or [])
+    s.pending_started_at = None
+    s.active_stream_id = "stream_1298"
+    s.save()
+    models.SESSIONS[session_id] = s
+    return s
+
+
+def _setup_cancel_stream_state(session_id, stream_id="stream_1298"):
+    """Wire up STREAMS/CANCEL_FLAGS/AGENT_INSTANCES so cancel_stream() can run."""
+    config.STREAMS[stream_id] = queue.Queue()
+    config.CANCEL_FLAGS[stream_id] = threading.Event()
+    mock_agent = Mock()
+    mock_agent.session_id = session_id
+    mock_agent.interrupt = Mock()
+    config.AGENT_INSTANCES[stream_id] = mock_agent
+    return stream_id, mock_agent
+
+
+# ── Server-side: cancel preserves pending_user_message in s.messages ────────
+
+class TestIssue1298CancelPreservesUserMessage:
+    """Issue 2: Latest user message disappears after Stop/Cancel during streaming.
+
+    Root cause: cancel_stream() at api/streaming.py:2575+ clears
+    s.pending_user_message before the streaming thread's
+    _merge_display_messages_after_agent_result() has a chance to merge the
+    user turn into s.messages. The session is saved with neither
+    pending_user_message nor a corresponding s.messages entry, so the user's
+    typed text is lost permanently.
+
+    Fix: synthesize a user turn from pending_user_message into s.messages when
+    the most recent message isn't already that turn.
+    """
+
+    def test_cancel_synthesizes_user_message_when_messages_empty(self):
+        """When the agent thread is killed before it can append the user turn,
+        cancel_stream() must persist pending_user_message into s.messages so
+        the typed text survives a session reload."""
+        s = _make_pending_session(
+            session_id="cancel_sid_empty",
+            pending_msg="What's the weather forecast?",
+            messages=[],
+        )
+        stream_id, _agent = _setup_cancel_stream_state(s.session_id)
+
+        result = cancel_stream(stream_id)
+        assert result is True
+
+        # Reload from disk to confirm save happened
+        s2 = models.SESSIONS[s.session_id]
+        roles = [m.get("role") for m in s2.messages if isinstance(m, dict)]
+        contents = [m.get("content") for m in s2.messages if isinstance(m, dict)]
+
+        assert "user" in roles, (
+            "Expected user turn synthesized into s.messages — "
+            f"got roles={roles}"
+        )
+        assert "What's the weather forecast?" in contents, (
+            "Expected pending_user_message text preserved verbatim in s.messages — "
+            f"got contents={contents}"
+        )
+        assert s2.pending_user_message is None, (
+            "pending_user_message must be cleared after cancel"
+        )
+        assert s2.active_stream_id is None
+
+    def test_cancel_does_not_double_append_when_streaming_thread_already_merged(self):
+        """If the streaming thread won the race and already merged the user turn
+        into s.messages before cancel_stream() got the lock, cancel must not
+        append a duplicate."""
+        prior_user = {"role": "user", "content": "Run a tool for me"}
+        s = _make_pending_session(
+            session_id="cancel_sid_already_merged",
+            pending_msg="Run a tool for me",
+            messages=[prior_user],
+        )
+        stream_id, _agent = _setup_cancel_stream_state(s.session_id)
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[s.session_id]
+        user_messages = [m for m in s2.messages
+                         if isinstance(m, dict) and m.get("role") == "user"]
+        # Exactly one user turn — no duplicate
+        matching = [m for m in user_messages
+                    if "Run a tool for me" in str(m.get("content") or "")]
+        assert len(matching) == 1, (
+            "Expected exactly one user turn matching pending_user_message — "
+            f"got {len(matching)} ({user_messages})"
+        )
+
+    def test_cancel_synthesized_user_message_carries_attachments(self):
+        """A cancelled turn that had attachments uploaded should keep them on
+        the recovered user message."""
+        s = _make_pending_session(
+            session_id="cancel_sid_attachments",
+            pending_msg="Look at this screenshot",
+            messages=[],
+            attachments=["bug_screenshot.png", "stack_trace.txt"],
+        )
+        stream_id, _agent = _setup_cancel_stream_state(s.session_id)
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[s.session_id]
+        user_msgs = [m for m in s2.messages
+                     if isinstance(m, dict) and m.get("role") == "user"]
+        assert user_msgs, "User turn must be persisted on cancel"
+        recovered = user_msgs[0]
+        assert recovered.get("attachments") == [
+            "bug_screenshot.png", "stack_trace.txt"
+        ], (
+            "Attachment list must be preserved on the synthesized user turn — "
+            f"got {recovered.get('attachments')}"
+        )
+
+    def test_cancel_no_pending_user_message_does_nothing_extra(self):
+        """When there is no pending_user_message (e.g. cancel after the agent
+        has already returned), cancel_stream() must not synthesize a phantom
+        user turn."""
+        s = Session(
+            session_id="cancel_sid_no_pending",
+            title="Test",
+            messages=[{"role": "user", "content": "earlier turn"}],
+        )
+        s.active_stream_id = "stream_1298"
+        s.pending_user_message = None
+        s.save()
+        models.SESSIONS[s.session_id] = s
+        stream_id, _agent = _setup_cancel_stream_state(s.session_id)
+
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[s.session_id]
+        user_messages = [m for m in s2.messages
+                         if isinstance(m, dict) and m.get("role") == "user"]
+        # Still exactly one — the original earlier turn
+        assert len(user_messages) == 1
+        assert user_messages[0].get("content") == "earlier turn"
+
+
+# ── Client-side: ui.js source-level guards for activity-group state ─────────
+
+class TestIssue1298ActivityGroupExpandPersistence:
+    """Issue 1: Expanded Activity list collapses automatically when new
+    activity arrives.
+
+    Root cause:
+      - ensureActivityGroup() (static/ui.js) creates the live activity group
+        with `tool-call-group-collapsed` whenever it's missing
+      - finalizeThinkingCard() force-adds `tool-call-group-collapsed` on every
+        tool boundary, regardless of user intent
+      - The user's manually-set expand state lives only on a DOM class list,
+        so any destroy/recreate cycle (which fires on every thinking → tool →
+        thinking transition) wipes it.
+
+    Fix: track the user's last explicit toggle in a per-turn singleton, and
+    skip the force-collapse when the user has explicitly expanded.
+    """
+
+    def test_ui_js_tracks_user_expand_intent_for_live_activity_group(self):
+        src = (REPO_ROOT / "static" / "ui.js").read_text()
+        assert "_liveActivityUserExpanded" in src, (
+            "ui.js must declare a per-turn tracker for the user's expand intent "
+            "on the live activity group (#1298)"
+        )
+        assert "_onLiveActivityToggle" in src, (
+            "ui.js must expose a helper that records the user's manual toggle "
+            "of the live activity group"
+        )
+
+    def test_ensure_activity_group_restores_expand_intent(self):
+        """ensureActivityGroup() must consult _liveActivityUserExpanded when
+        creating a fresh live group so the user's prior expand survives the
+        destroy/recreate cycle."""
+        src = (REPO_ROOT / "static" / "ui.js").read_text()
+        # Find the ensureActivityGroup function body
+        m = re.search(
+            r"function ensureActivityGroup\(inner, opts\)\{(.*?)\n\}",
+            src, re.DOTALL,
+        )
+        assert m, "ensureActivityGroup() must exist in ui.js"
+        body = m.group(1)
+        assert "_liveActivityUserExpanded" in body, (
+            "ensureActivityGroup() body must reference the user-expand tracker "
+            "to restore intent on re-create (#1298)"
+        )
+        assert "live" in body and "_liveActivityUserExpanded === true" in body, (
+            "ensureActivityGroup() must override the default `collapsed` flag "
+            "when the user previously expanded the live group"
+        )
+
+    def test_finalize_thinking_card_respects_user_expand(self):
+        """finalizeThinkingCard() must NOT force-collapse the live activity
+        group when the user has explicitly expanded it (#1298)."""
+        src = (REPO_ROOT / "static" / "ui.js").read_text()
+        m = re.search(
+            r"function finalizeThinkingCard\(\)\{(.*?)\n\}",
+            src, re.DOTALL,
+        )
+        assert m, "finalizeThinkingCard() must exist in ui.js"
+        body = m.group(1)
+        assert "_liveActivityUserExpanded" in body, (
+            "finalizeThinkingCard() must respect the user's expand intent — "
+            "without this guard, the panel snaps shut on every tool boundary"
+        )
+        # Hard fail if force-collapse is unconditional
+        assert "_liveActivityUserExpanded !== true" in body or \
+               "_liveActivityUserExpanded!==true" in body.replace(" ", ""), (
+            "finalizeThinkingCard() must skip the force-collapse path when "
+            "_liveActivityUserExpanded === true"
+        )
+
+    def test_inline_onclick_records_user_intent(self):
+        """The summary button's inline onclick must call _onLiveActivityToggle
+        so user clicks update the tracker (#1298)."""
+        src = (REPO_ROOT / "static" / "ui.js").read_text()
+        # The summary button is built inline inside ensureActivityGroup.
+        assert "_onLiveActivityToggle" in src, (
+            "_onLiveActivityToggle helper must be defined"
+        )
+        # The inline onclick string must include the call so user toggles
+        # are captured into _liveActivityUserExpanded.
+        m = re.search(r'class="tool-call-group-summary"[^`]*`', src)
+        assert m, "live activity summary button template must be present"
+        # The onclick fragment is in the same template literal that builds
+        # the button — pull a wider window
+        m2 = re.search(
+            r"group\.innerHTML=`<button[^`]*?_onLiveActivityToggle[^`]*?`",
+            src, re.DOTALL,
+        )
+        assert m2, (
+            "ensureActivityGroup() inline onclick must invoke "
+            "_onLiveActivityToggle(g) so user clicks update the tracker"
+        )
+
+    def test_clear_live_tool_cards_resets_expand_intent(self):
+        """clearLiveToolCards() — invoked between turns — must reset the
+        per-turn user-expand tracker so the next turn starts collapsed by
+        default (#1298)."""
+        src = (REPO_ROOT / "static" / "ui.js").read_text()
+        m = re.search(
+            r"function clearLiveToolCards\(\)\{(.*?)\n\}",
+            src, re.DOTALL,
+        )
+        assert m, "clearLiveToolCards() must exist"
+        body = m.group(1)
+        assert "_clearLiveActivityUserIntent" in body, (
+            "clearLiveToolCards() must reset _liveActivityUserExpanded between "
+            "turns so prior expand intent doesn't bleed into the next turn"
+        )

From 50418cd47bf0c1580aab487a4cd425202e6dc853 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:20:05 +0000
Subject: [PATCH 6/9] test: stabilize flaky checkpoint test + add regression
 for #1339 fallback list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- tests/test_issue765_streaming_persistence.py — replace timing-based polling
  in test_checkpoint_fires_on_activity_counter_increment with deterministic
  threading.Event-driven sync. The old version used time.sleep(0.15)+(0.25)+(0.25)
  with a 0.1s polling thread, which under CI scheduling jitter could miss the
  second increment and complete with only 1 save instead of 2. Now waits up
  to 3.0s for save_count to advance to the target after each increment.
  Locally observed flake on Python 3.11 in CI run 25175204451.

- tests/test_pr1339_fallback_providers_list.py — new structural test that
  asserts streaming.py handles both legacy fallback_model (single dict) and
  new fallback_providers (list form) without calling .get() on a list. Three
  assertions: both keys consulted, list-form has explicit isinstance check,
  _fallback_resolved defaults to None.
---
 tests/test_issue765_streaming_persistence.py | 27 ++++++--
 tests/test_pr1339_fallback_providers_list.py | 72 ++++++++++++++++++++
 2 files changed, 93 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_pr1339_fallback_providers_list.py

diff --git a/tests/test_issue765_streaming_persistence.py b/tests/test_issue765_streaming_persistence.py
index e68c08d1..f55bb188 100644
--- a/tests/test_issue765_streaming_persistence.py
+++ b/tests/test_issue765_streaming_persistence.py
@@ -112,7 +112,12 @@ class TestPeriodicCheckpoint:
     """
 
     def test_checkpoint_fires_on_activity_counter_increment(self):
-        """Checkpoint saves when _checkpoint_activity counter grows."""
+        """Checkpoint saves when _checkpoint_activity counter grows.
+
+        Deterministic: instead of relying on time-based polling windows, we
+        wait for the checkpoint thread's save_count to advance after each
+        increment. Generous timeout guards against CI scheduling jitter.
+        """
         s = _make_session("ckpt1")
         s.pending_user_message = "do a long task"
         s.save()  # initial save (like routes.py does before streaming starts)
@@ -120,28 +125,38 @@ class TestPeriodicCheckpoint:
         stop_event = threading.Event()
         _checkpoint_activity = [0]
         save_count = [0]
+        save_event = threading.Event()
 
         def periodic_checkpoint():
             last = 0
-            while not stop_event.wait(0.1):  # fast interval for test
+            while not stop_event.wait(0.02):  # fast poll for low-jitter test
                 try:
                     cur = _checkpoint_activity[0]
                     if cur > last:
                         s.save(skip_index=True)
                         last = cur
                         save_count[0] += 1
+                        save_event.set()
                 except Exception:
                     pass
 
         t = threading.Thread(target=periodic_checkpoint, daemon=True)
         t.start()
 
-        # Simulate on_tool() completing twice (as would happen during a real agent run)
-        time.sleep(0.15)
+        def _wait_for_save(target_count, timeout=3.0):
+            """Wait until save_count[0] >= target_count, or timeout."""
+            deadline = time.monotonic() + timeout
+            while save_count[0] < target_count and time.monotonic() < deadline:
+                save_event.wait(timeout=0.05)
+                save_event.clear()
+            return save_count[0] >= target_count
+
+        # Simulate on_tool() completing twice
         _checkpoint_activity[0] += 1  # first tool completes
-        time.sleep(0.25)
+        assert _wait_for_save(1), f"Expected 1 save after first increment; got {save_count[0]}"
+
         _checkpoint_activity[0] += 1  # second tool completes
-        time.sleep(0.25)
+        assert _wait_for_save(2), f"Expected 2 saves after second increment; got {save_count[0]}"
 
         stop_event.set()
         t.join(timeout=2)
diff --git a/tests/test_pr1339_fallback_providers_list.py b/tests/test_pr1339_fallback_providers_list.py
new file mode 100644
index 00000000..c180c8dd
--- /dev/null
+++ b/tests/test_pr1339_fallback_providers_list.py
@@ -0,0 +1,72 @@
+"""Test for PR #1339 — streaming.py must support both single-dict `fallback_model`
+and list-form `fallback_providers` config without crashing on `.get()`.
+
+Before the fix, when config had `fallback_providers: [{provider, model, ...}, ...]`,
+streaming.py read it as if it were a dict and called `.get('model', '')` on a list,
+which would raise `AttributeError: 'list' object has no attribute 'get'`.
+
+The fix makes streaming.py handle both legacy dict form and new list form, picking
+the first entry from the list when given a list.
+"""
+import re
+from pathlib import Path
+
+STREAMING_PY = Path(__file__).resolve().parent.parent / "api" / "streaming.py"
+
+
+def _extract_fallback_block():
+    """Return the source range that handles fallback_model/fallback_providers."""
+    src = STREAMING_PY.read_text(encoding="utf-8")
+    # Locate the resolved-fallback region
+    idx = src.find("# Fallback model from profile config")
+    assert idx != -1, "Fallback block marker not found in streaming.py"
+    end = src.find("# Build kwargs defensively", idx)
+    assert end != -1, "End-of-block marker not found"
+    return src[idx:end]
+
+
+def test_fallback_handles_both_dict_and_list_config():
+    """Block must read either fallback_model (dict) or fallback_providers (list)."""
+    block = _extract_fallback_block()
+
+    # Both keys must be consulted
+    assert "fallback_model" in block, "Must still support legacy single-dict fallback_model"
+    assert "fallback_providers" in block, (
+        "Must support new list-form fallback_providers (PR #1339)"
+    )
+
+
+def test_fallback_list_iteration_picks_first_valid_entry():
+    """When given a list, code must pick the first valid dict entry, not call .get on the list."""
+    block = _extract_fallback_block()
+
+    # Must isinstance-check before calling .get
+    assert "isinstance(_fallback, list)" in block, (
+        "Must detect list-form fallback_providers explicitly to avoid AttributeError"
+    )
+    assert "isinstance(_fallback, dict)" in block or "isinstance(_fallback,dict)" in block, (
+        "Must keep legacy single-dict path explicitly"
+    )
+
+    # No bare _fallback.get() — every .get() on _fallback must be guarded by an isinstance(_fallback, dict) check.
+    # We verify this structurally: every line containing `_fallback.get(` must be inside or preceded by an isinstance(_fallback, dict) gate.
+    lines = block.split("\n")
+    in_dict_block = False
+    for i, line in enumerate(lines):
+        if "isinstance(_fallback, dict)" in line:
+            in_dict_block = True
+        if "_fallback.get(" in line and not in_dict_block:
+            # Look back up to 3 lines for the isinstance gate on the same elif/if
+            window = "\n".join(lines[max(0, i - 3): i + 1])
+            assert "isinstance(_fallback, dict)" in window, (
+                f"Line {i} calls _fallback.get() without a nearby isinstance(_fallback, dict) gate:\n{line}"
+            )
+
+
+def test_fallback_resolved_initialized_to_none():
+    """_fallback_resolved must default to None so AIAgent gets an explicit None when no fallback."""
+    block = _extract_fallback_block()
+    # The variable must be assignable to None at the top of the block
+    assert "_fallback_resolved = None" in block, (
+        "_fallback_resolved must be initialized to None so callers can rely on its presence"
+    )

From 929461ffbc366bfcc604ead5ba8f9c2cef7328d2 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:21:18 +0000
Subject: [PATCH 7/9] release: v0.50.246
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combines:
- 4 contributor PRs (#1335 user fenced code, #1337 mermaid+cache-bust,
  #1339 fallback_providers list, #1341 context_length persistence)
- Self-built #1338 (cancel data-loss + activity panel) — already
  independently APPROVED by nesquena before absorption
- CONTRIBUTORS.md and markdown refresh from #1340

See CHANGELOG.md for the full list with author credit.
---
 CHANGELOG.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5ff8b6e4..8c60500e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,26 @@
 
 ## [Unreleased]
 
+## [v0.50.246] — 2026-04-30
+
+### Added
+- **Render fenced code blocks in user messages** — typing a triple-backtick fenced code block in the composer now renders with proper code styling, syntax-aware diff/patch coloring, and the same `<pre><code>` pipeline used for assistant responses. Plain user text outside fences stays escaped (no markdown bold/italic/links interpreted in user bubbles); only fenced blocks are upgraded. Includes specialized colored-line rendering for `diff` / `patch` languages. (`static/ui.js`, `tests/test_1325_user_fenced_code.py`) @bergeouss — PR #1335, fixes #1325
+
+### Fixed
+- **Stop/Cancel during streaming no longer wipes the user's typed message (data-loss bug)** — When a user clicked Stop while the agent was streaming, `cancel_stream()` cleared `pending_user_message` before the streaming thread had merged the user turn into `s.messages`, persisting a session with neither the pending field nor a corresponding message. The user's typed text was permanently lost from the session JSON, not just the in-memory client copy. Now `cancel_stream()` synthesizes a user turn into `s.messages` from `pending_user_message` (with attachments preserved) when the most recent user message isn't already that turn — guards against double-append by content-matching against the last user message. (`api/streaming.py`, `tests/test_issue1298_cancel_and_activity.py`) — fixes #1298 (issue 2)
+- **Activity panel no longer auto-collapses when new tool/thinking events arrive** — Both `ensureActivityGroup()` (which re-creates the group with `tool-call-group-collapsed` on every destroy/recreate) and `finalizeThinkingCard()` (which force-adds the collapsed class on every tool boundary) ignored the user's manual expand. Tracks the user's last explicit toggle on the live activity group in a per-turn singleton (`_liveActivityUserExpanded`), restored on re-create and respected by the finalize path. Cleared between turns by `clearLiveToolCards()`. (`static/ui.js`, `tests/test_issue1298_cancel_and_activity.py`) — fixes #1298 (issue 1)
+- **Stale Mermaid render errors no longer leak into every chat** — Mermaid's render-failure path leaves a temporary `<div id="d<id>">` body-level node containing a "Syntax error in text" SVG. The previous code never removed it, so once any Mermaid block failed (or got mis-detected as Mermaid), every subsequent tab kept the syntax-error SVG visible regardless of content. Also tightens Mermaid detection so line-numbered tool output (`123|line`) and code blocks that don't start with a recognized Mermaid keyword are no longer mis-parsed as Mermaid; failed blocks are marked so a later render pass can't retry them. (`static/ui.js`, `tests/test_issue347.py`) @dso2ng — PR #1337
+- **Static asset cache busts automatically on every release** — `<script src="static/ui.js">` and friends were cached indefinitely by browsers and the service worker, so a new release with bug fixes could be invisible to a user until they hard-refreshed. Now `index.html` and `sw.js` registration both inject the current `WEBUI_VERSION` git tag as a `?v=` query string, URL-encoded server-side so unusual git tag formats can't break the JS. The service worker also no longer intercepts requests for itself, ensuring the browser always fetches the freshly-versioned `sw.js` directly from the network. (`api/routes.py`, `static/index.html`, `static/sw.js`, `tests/test_pwa_manifest_sw.py`) @dso2ng — PR #1337
+- **Context window indicator persists across page reloads** — `Session.__init__` now accepts `context_length`, `threshold_tokens`, and `last_prompt_tokens`; `save()` persists them and `compact()` exposes them so the GET `/api/session` response includes them. The frontend context-ring indicator was previously losing its percentage on every session load because the Session model silently dropped these fields when reconstructing from disk. (`api/models.py`, `api/routes.py`) @fxd-jason — PR #1341 (focused split from the held PR #1318)
+- **`fallback_providers` list config no longer crashes streaming** — `api/streaming.py:1701` previously read `_cfg.get('fallback_model')` and called `.get('model', '')` on the result. When users had `fallback_providers: [{...}, {...}]` in their config (the chained-fallback form documented in CHANGELOG since v0.50.151), the streaming path crashed with `AttributeError: 'list' object has no attribute 'get'`. Now consults both `fallback_model` (single dict, legacy) and `fallback_providers` (list, new), picks the first valid entry from the list, and defends both paths with `isinstance` checks. (`api/streaming.py`, `tests/test_pr1339_fallback_providers_list.py`) @jimdawdy-hub — PR #1339
+
+### Changed
+- **CI test stability** — `test_checkpoint_fires_on_activity_counter_increment` was rewritten to use deterministic `threading.Event` synchronization instead of `time.sleep` windows. The old version polled at 0.1s intervals and slept 0.15s/0.25s/0.25s between activity increments, which intermittently failed under CI scheduling jitter (one save instead of two). The new version waits up to 3.0s for the checkpoint thread to actually advance after each increment, with no sensitivity to scheduler timing. (`tests/test_issue765_streaming_persistence.py`)
+
+### Documentation
+- **`CONTRIBUTORS.md`** — new file with stack-ranked credit roll for all 66 contributors, generated from `git log` + `gh api` + CHANGELOG attribution lines. Top contributors table at top of `README.md`.
+- **README, ROADMAP, ARCHITECTURE, SPRINTS, TESTING** — refreshed to v0.50.246 / 3309 tests; removed stale `v0.50.36-local.1` header from ARCHITECTURE.md; updated SPRINTS.md "Where we are now" to reflect ~95% Claude parity. (PR #1340 — already merged, brought forward in this release.)
+
 ## [v0.50.245] — 2026-04-30
 
 ### Fixed

From f328f3b843cb0dccfd951131da5a1d71c1c57440 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:28:20 +0000
Subject: [PATCH 8/9] fix(cancel): gate substring guard on pending_started_at
 timestamp (Opus review)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-release Opus review on v0.50.246 caught a SHOULD-FIX in PR #1338's
cancel_stream synthesis: the symmetric substring guard
(_pending_user in _last_content OR _last_content in _pending_user) was too
loose. Common confirmation replies ("ok", "yes", "go") in the prior turn
would match longer follow-up prompts ("ok please continue"), the synthesis
would be skipped, and the user's typed text would be lost — exactly the
data-loss bug #1298 was supposed to fix.

The fix: gate the substring check on a timestamp comparison. Only treat
the latest user turn as 'already merged by the streaming thread' if its
timestamp is at or after pending_started_at. Earlier turns whose content
happens to be a substring of the pending must not short-circuit synthesis.

Also drops the symmetric (_last_content in _pending_user) branch — that
direction was the false-positive vector. Keeps the equality and prefix
match (workspace-prefix tolerance from the streaming thread).

Adds tests/test_issue1298_cancel_and_activity.py::
test_cancel_synthesizes_when_prior_turn_content_is_substring_of_pending —
regression for the exact 'ok' → 'ok please continue' scenario.
---
 api/streaming.py                            | 12 ++++-
 tests/test_issue1298_cancel_and_activity.py | 51 +++++++++++++++++++++
 2 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/api/streaming.py b/api/streaming.py
index 775cf784..35b04b0f 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -2607,6 +2607,7 @@ def cancel_stream(stream_id: str) -> bool:
                     _pending_user = getattr(_cs, 'pending_user_message', None)
                     _pending_atts_raw = getattr(_cs, 'pending_attachments', None)
                     _pending_atts = list(_pending_atts_raw) if isinstance(_pending_atts_raw, (list, tuple)) else []
+                    _pending_started = getattr(_cs, 'pending_started_at', None) or 0
                     _msgs_for_recovery = _cs.messages if isinstance(_cs.messages, list) else None
                     if _pending_user and _msgs_for_recovery is not None:
                         _last_user = None
@@ -2617,9 +2618,16 @@ def cancel_stream(stream_id: str) -> bool:
                         _already_persisted = False
                         if _last_user is not None:
                             _last_content = _last_user.get('content')
-                            if isinstance(_last_content, str):
+                            _last_ts = _last_user.get('timestamp') or 0
+                            # Only treat as already-persisted if the latest user turn
+                            # was created AT OR AFTER the current turn's pending_started_at.
+                            # An earlier turn whose content happens to be a substring
+                            # (e.g. prior reply was "ok", user now types "ok please continue")
+                            # must NOT short-circuit synthesis — that would re-introduce
+                            # the data-loss bug this guard is supposed to prevent.
+                            if isinstance(_last_content, str) and _last_ts >= _pending_started:
                                 # Tolerate the workspace prefix the streaming thread prepends.
-                                if _pending_user in _last_content or _last_content in _pending_user:
+                                if _pending_user == _last_content or _pending_user in _last_content:
                                     _already_persisted = True
                         if not _already_persisted:
                             _user_turn: dict = {
diff --git a/tests/test_issue1298_cancel_and_activity.py b/tests/test_issue1298_cancel_and_activity.py
index fe4b1b96..fe6e363d 100644
--- a/tests/test_issue1298_cancel_and_activity.py
+++ b/tests/test_issue1298_cancel_and_activity.py
@@ -213,6 +213,57 @@ class TestIssue1298CancelPreservesUserMessage:
         assert len(user_messages) == 1
         assert user_messages[0].get("content") == "earlier turn"
 
+    def test_cancel_synthesizes_when_prior_turn_content_is_substring_of_pending(self):
+        """Regression for Opus pre-release review of v0.50.246 (PR #1338):
+
+        The substring guard in cancel_stream() was symmetric — it would skip
+        synthesis if the prior user turn's content was a substring of the new
+        pending message. Common confirmation replies ("ok", "yes", "go") would
+        match longer follow-up prompts ("ok please continue") and the
+        synthesis would be skipped, re-introducing the data-loss bug.
+
+        The fix: gate the substring check on a timestamp comparison —
+        only treat the latest user turn as "already merged by the streaming
+        thread" if its timestamp is at or after pending_started_at. Earlier
+        turns whose content happens to be a substring must not short-circuit
+        the synthesis path.
+        """
+        import time as _time
+        # Prior reply was "ok" (a common short reply).
+        prior_ts = int(_time.time()) - 60  # 1 minute ago
+        prior_user = {
+            "role": "user",
+            "content": "ok",
+            "timestamp": prior_ts,
+        }
+        s = _make_pending_session(
+            session_id="cancel_sid_substring_collision",
+            pending_msg="ok please continue with the analysis",
+            messages=[prior_user],
+        )
+        # The pending turn started AFTER the prior turn was logged.
+        s.pending_started_at = prior_ts + 10
+        s.save()
+        models.SESSIONS[s.session_id] = s
+
+        stream_id, _agent = _setup_cancel_stream_state(s.session_id)
+        cancel_stream(stream_id)
+
+        s2 = models.SESSIONS[s.session_id]
+        user_messages = [m for m in s2.messages
+                         if isinstance(m, dict) and m.get("role") == "user"]
+        contents = [m.get("content") for m in user_messages]
+
+        assert "ok please continue with the analysis" in contents, (
+            "Pending user message must be synthesized — the substring 'ok' from a prior turn "
+            "must NOT cause the synthesis to be skipped. "
+            f"Got contents={contents}"
+        )
+        assert len(user_messages) == 2, (
+            "Expected both the original prior turn AND the synthesized new turn — "
+            f"got {len(user_messages)} user messages"
+        )
+
 
 # ── Client-side: ui.js source-level guards for activity-group state ─────────
 

From a5c10d594dfef7b02f08d52e91d521f8035901d6 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Thu, 30 Apr 2026 16:38:55 +0000
Subject: [PATCH 9/9] =?UTF-8?q?fix(streaming):=20persist=20context=5Flengt?=
 =?UTF-8?q?h=20on=20session=20=E2=80=94=20completes=20#1318=20fix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-release Opus + nesquena review on v0.50.246 caught that PR #1341
added the data-structure scaffolding (Session.__init__ accepts the 3
fields, save() persists them, compact() exposes them, GET /api/session
returns them) but did NOT add the writer that actually populates them.

Without a writer, the user-visible bug (context-ring shows 0% after
page reload) was NOT fixed by #1341 alone — the fields stayed None
forever because nothing wrote to s.context_length anywhere.

Adds the writer at api/streaming.py:2188 (post-merge per-turn save block,
before s.save()) so the values from agent.context_compressor land on
disk and survive page reloads.

Also moves the SSE usage payload comment to clarify that the live SSE
payload and the session-level persistence are now distinct paths
(payload below, persistence above).

Adds tests/test_pr1341_context_window_persistence.py — 6 structural +
round-trip tests covering Session __init__/save/compact, the routes
response, and the streaming.py writer placement.

Closes #1318 (the actual user-visible bug, not just the scaffolding).
---
 CHANGELOG.md                                  |   2 +-
 api/streaming.py                              |  15 +-
 .../test_pr1341_context_window_persistence.py | 142 ++++++++++++++++++
 3 files changed, 157 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_pr1341_context_window_persistence.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8c60500e..aca5c160 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,7 +12,7 @@
 - **Activity panel no longer auto-collapses when new tool/thinking events arrive** — Both `ensureActivityGroup()` (which re-creates the group with `tool-call-group-collapsed` on every destroy/recreate) and `finalizeThinkingCard()` (which force-adds the collapsed class on every tool boundary) ignored the user's manual expand. Tracks the user's last explicit toggle on the live activity group in a per-turn singleton (`_liveActivityUserExpanded`), restored on re-create and respected by the finalize path. Cleared between turns by `clearLiveToolCards()`. (`static/ui.js`, `tests/test_issue1298_cancel_and_activity.py`) — fixes #1298 (issue 1)
 - **Stale Mermaid render errors no longer leak into every chat** — Mermaid's render-failure path leaves a temporary `<div id="d<id>">` body-level node containing a "Syntax error in text" SVG. The previous code never removed it, so once any Mermaid block failed (or got mis-detected as Mermaid), every subsequent tab kept the syntax-error SVG visible regardless of content. Also tightens Mermaid detection so line-numbered tool output (`123|line`) and code blocks that don't start with a recognized Mermaid keyword are no longer mis-parsed as Mermaid; failed blocks are marked so a later render pass can't retry them. (`static/ui.js`, `tests/test_issue347.py`) @dso2ng — PR #1337
 - **Static asset cache busts automatically on every release** — `<script src="static/ui.js">` and friends were cached indefinitely by browsers and the service worker, so a new release with bug fixes could be invisible to a user until they hard-refreshed. Now `index.html` and `sw.js` registration both inject the current `WEBUI_VERSION` git tag as a `?v=` query string, URL-encoded server-side so unusual git tag formats can't break the JS. The service worker also no longer intercepts requests for itself, ensuring the browser always fetches the freshly-versioned `sw.js` directly from the network. (`api/routes.py`, `static/index.html`, `static/sw.js`, `tests/test_pwa_manifest_sw.py`) @dso2ng — PR #1337
-- **Context window indicator persists across page reloads** — `Session.__init__` now accepts `context_length`, `threshold_tokens`, and `last_prompt_tokens`; `save()` persists them and `compact()` exposes them so the GET `/api/session` response includes them. The frontend context-ring indicator was previously losing its percentage on every session load because the Session model silently dropped these fields when reconstructing from disk. (`api/models.py`, `api/routes.py`) @fxd-jason — PR #1341 (focused split from the held PR #1318)
+- **Context window indicator persists across page reloads (#1318 — fully fixed)** — `Session.__init__` now accepts `context_length`, `threshold_tokens`, and `last_prompt_tokens`; `save()` persists them via the `METADATA_FIELDS` round-trip and `compact()` exposes them on the GET `/api/session` response. **Critically**, `api/streaming.py` now writes the values from `agent.context_compressor` onto the session inside the post-merge per-turn save block, so the values land on disk and survive a page reload. Without that writer, the model fields would have been pure scaffolding — present but never populated. The frontend context-ring indicator was previously losing its percentage on every session load because nothing was writing these fields to disk; that data flow is now end-to-end. (`api/models.py`, `api/routes.py`, `api/streaming.py`, `tests/test_pr1341_context_window_persistence.py`) @fxd-jason — PR #1341 (focused split from the held PR #1318) + writer added during pre-release review
 - **`fallback_providers` list config no longer crashes streaming** — `api/streaming.py:1701` previously read `_cfg.get('fallback_model')` and called `.get('model', '')` on the result. When users had `fallback_providers: [{...}, {...}]` in their config (the chained-fallback form documented in CHANGELOG since v0.50.151), the streaming path crashed with `AttributeError: 'list' object has no attribute 'get'`. Now consults both `fallback_model` (single dict, legacy) and `fallback_providers` (list, new), picks the first valid entry from the list, and defends both paths with `isinstance` checks. (`api/streaming.py`, `tests/test_pr1339_fallback_providers_list.py`) @jimdawdy-hub — PR #1339
 
 ### Changed
diff --git a/api/streaming.py b/api/streaming.py
index 35b04b0f..33017563 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -2185,6 +2185,17 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                         if isinstance(_rm, dict) and _rm.get('role') == 'assistant':
                             _rm['reasoning'] = _reasoning_text
                             break
+                # Persist context window data on the session so the context-ring
+                # indicator survives a page reload (#1318). Must run BEFORE
+                # s.save() for the same reason as the reasoning trace above.
+                # The fields are captured into the SSE usage payload below; this
+                # block writes them to the session itself so GET /api/session
+                # returns them on reload instead of falling back to 0.
+                _cc_for_save = getattr(agent, 'context_compressor', None)
+                if _cc_for_save:
+                    s.context_length = getattr(_cc_for_save, 'context_length', 0) or 0
+                    s.threshold_tokens = getattr(_cc_for_save, 'threshold_tokens', 0) or 0
+                    s.last_prompt_tokens = getattr(_cc_for_save, 'last_prompt_tokens', 0) or 0
                 s.save()
             # Sync to state.db for /insights (opt-in setting)
             try:
@@ -2203,7 +2214,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
             except Exception:
                 logger.debug("Failed to sync session to insights")
             usage = {'input_tokens': input_tokens, 'output_tokens': output_tokens, 'estimated_cost': estimated_cost}
-            # Include context window data from the agent's compressor for the UI indicator
+            # Include context window data from the agent's compressor for the UI indicator.
+            # The session-level persistence happens above (before s.save()) so the values
+            # survive a page reload; this block only populates the live SSE usage payload.
             _cc = getattr(agent, 'context_compressor', None)
             if _cc:
                 usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
diff --git a/tests/test_pr1341_context_window_persistence.py b/tests/test_pr1341_context_window_persistence.py
new file mode 100644
index 00000000..517d89e5
--- /dev/null
+++ b/tests/test_pr1341_context_window_persistence.py
@@ -0,0 +1,142 @@
+"""Regression test for PR #1341 + Opus pre-release review of v0.50.246.
+
+PR #1341 added context_length/threshold_tokens/last_prompt_tokens fields to
+the Session model — but didn't add the writer that actually populates them
+during streaming. The pre-release review caught this: without the writer,
+the user-visible bug (context-ring shows 0% after page reload) would NOT
+have been fixed by #1341 alone.
+
+This test verifies that:
+1. After a streaming turn completes, the session's context_length /
+   threshold_tokens / last_prompt_tokens are written from the agent's
+   compressor BEFORE s.save() is called (so they land on disk).
+2. GET /api/session response includes the populated values.
+3. A reloaded session retains the populated values.
+
+Implementation reference: api/streaming.py around line 2188 (the per-turn
+post-merge save) writes from getattr(agent, 'context_compressor', None).
+"""
+import re
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parent.parent
+STREAMING = ROOT / "api" / "streaming.py"
+MODELS = ROOT / "api" / "models.py"
+ROUTES = ROOT / "api" / "routes.py"
+
+
+def test_streaming_persists_context_fields_on_session_before_save():
+    """The post-merge per-turn save block must write the three fields to the
+    session BEFORE calling s.save(), otherwise the values never reach disk."""
+    src = STREAMING.read_text(encoding="utf-8")
+
+    # Find the post-merge save block — anchored on the unique reasoning trace
+    # marker right above the persistence block.
+    block_start = src.find("if _reasoning_text and s.messages:")
+    assert block_start != -1, "Reasoning-trace marker not found in streaming.py"
+
+    # Save call follows shortly after
+    save_call = src.find("\n                s.save()", block_start)
+    assert save_call != -1, "s.save() not found after the post-merge marker"
+    assert save_call - block_start < 2000, (
+        "s.save() should be close to the post-merge marker — block expanded unexpectedly"
+    )
+
+    block = src[block_start:save_call]
+
+    # The three fields must all be assigned on s within this block
+    assert "s.context_length" in block, (
+        "s.context_length must be written before s.save() in the post-merge block"
+    )
+    assert "s.threshold_tokens" in block, (
+        "s.threshold_tokens must be written before s.save() in the post-merge block"
+    )
+    assert "s.last_prompt_tokens" in block, (
+        "s.last_prompt_tokens must be written before s.save() in the post-merge block"
+    )
+
+    # The values must come from the agent's context_compressor
+    assert "context_compressor" in block, (
+        "Values must be sourced from agent.context_compressor"
+    )
+
+
+def test_session_init_accepts_context_fields():
+    """Session.__init__ must accept the three fields as named kwargs."""
+    src = MODELS.read_text(encoding="utf-8")
+    # The init signature spans many lines — read the full def block
+    init_match = re.search(r"def __init__\(self,(.*?)\):", src, re.DOTALL)
+    assert init_match, "Session.__init__ signature not found"
+    sig = init_match.group(1)
+    assert "context_length" in sig, "Session.__init__ must accept context_length"
+    assert "threshold_tokens" in sig, "Session.__init__ must accept threshold_tokens"
+    assert "last_prompt_tokens" in sig, "Session.__init__ must accept last_prompt_tokens"
+
+
+def test_session_metadata_fields_includes_context_fields():
+    """Session.save() METADATA_FIELDS must include all three for round-trip persistence."""
+    src = MODELS.read_text(encoding="utf-8")
+    # Locate METADATA_FIELDS list
+    meta_match = re.search(
+        r"METADATA_FIELDS\s*=\s*\[(.*?)\]",
+        src,
+        re.DOTALL,
+    )
+    assert meta_match, "METADATA_FIELDS list not found in Session.save"
+    fields = meta_match.group(1)
+    assert "'context_length'" in fields, "METADATA_FIELDS must include 'context_length'"
+    assert "'threshold_tokens'" in fields, "METADATA_FIELDS must include 'threshold_tokens'"
+    assert "'last_prompt_tokens'" in fields, "METADATA_FIELDS must include 'last_prompt_tokens'"
+
+
+def test_session_compact_exposes_context_fields():
+    """Session.compact() must include the three fields in its output dict."""
+    src = MODELS.read_text(encoding="utf-8")
+    # Find compact() method body
+    compact_idx = src.find("def compact(")
+    assert compact_idx != -1, "Session.compact not found"
+    # Look ahead for the next def or 200 lines
+    end = src.find("\n    def ", compact_idx + 1)
+    body = src[compact_idx:end if end != -1 else compact_idx + 4000]
+
+    assert "'context_length':" in body, "compact() must include context_length"
+    assert "'threshold_tokens':" in body, "compact() must include threshold_tokens"
+    assert "'last_prompt_tokens':" in body, "compact() must include last_prompt_tokens"
+
+
+def test_routes_session_get_returns_context_fields():
+    """GET /api/session response must include the three fields."""
+    src = ROUTES.read_text(encoding="utf-8")
+    # The session-detail response builder uses getattr(s, ..., 0) or 0 pattern.
+    # Look for the three keys in the same response shape.
+    assert '"context_length"' in src, "GET /api/session response must include context_length"
+    assert '"threshold_tokens"' in src, "GET /api/session response must include threshold_tokens"
+    assert '"last_prompt_tokens"' in src, "GET /api/session response must include last_prompt_tokens"
+
+
+def test_session_round_trip_persists_context_fields(tmp_path, monkeypatch):
+    """Real round-trip: save a Session with the fields set, reload, fields still there.
+
+    Patches SESSION_DIR on the live api.models module so we don't pollute
+    sys.modules state and break test ordering for sibling tests that depend
+    on a stable api.models import (e.g. test_session_sidecar_repair.py).
+    """
+    from api import models
+
+    # Use tmp_path as the session dir for this test only
+    sessions_dir = tmp_path / "sessions"
+    sessions_dir.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setattr(models, "SESSION_DIR", sessions_dir)
+
+    s = models.Session(session_id="ctxtest1", title="Context test")
+    s.context_length = 200000
+    s.threshold_tokens = 180000
+    s.last_prompt_tokens = 45123
+    s.save()
+
+    # Reload from disk
+    s2 = models.Session.load("ctxtest1")
+    assert s2 is not None, "Session should reload"
+    assert s2.context_length == 200000, f"context_length lost on reload: got {s2.context_length}"
+    assert s2.threshold_tokens == 180000, f"threshold_tokens lost on reload: got {s2.threshold_tokens}"
+    assert s2.last_prompt_tokens == 45123, f"last_prompt_tokens lost on reload: got {s2.last_prompt_tokens}"