From 67e29fa9918f78fcdec52aa4da6fc23380acaed7 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Mon, 11 May 2026 13:13:26 -0600
Subject: [PATCH 01/12] feat: add opt-in streaming text fade

---
 STREAMING_FADE_HANDOFF.md      | 289 +++++++++++++++++
 api/config.py                  |   2 +
 static/boot.js                 |   2 +
 static/i18n.js                 |  18 ++
 static/index.html              |   7 +
 static/messages.js             | 564 +++++++++++++++++++++++----------
 static/panels.js               |  14 +-
 static/style.css               |   8 +
 tests/test_smooth_text_fade.py | 290 +++++++++++++++++
 9 files changed, 1032 insertions(+), 162 deletions(-)
 create mode 100644 STREAMING_FADE_HANDOFF.md
 create mode 100644 tests/test_smooth_text_fade.py

diff --git a/STREAMING_FADE_HANDOFF.md b/STREAMING_FADE_HANDOFF.md
new file mode 100644
index 00000000..ea3c8aeb
--- /dev/null
+++ b/STREAMING_FADE_HANDOFF.md
@@ -0,0 +1,289 @@
+# Streaming Fade Text Effect Handoff
+
+## Summary
+
+This branch adds an opt-in **Fade text effect** preference for HermesWebUI streaming assistant responses.
+
+When enabled, newly streamed assistant words fade in instead of appearing via the default incremental markdown renderer. The goal is a ChatGPT/OpenWebUI-like animated streaming feel while still catching up to high-throughput model output.
+
+The feature is **off by default** for performance.
+
+## User-facing behavior
+
+- New setting: **Settings → Preferences → Fade text effect**
+- Runtime global: `window._fadeTextEffect`
+- Default: `false`
+- When enabled:
+  - assistant stream uses a playout buffer rather than immediately rendering the full incoming chunk
+  - visible text advances at adaptive speed based on live incoming word velocity, backlog, and stream age
+  - new words are wrapped in spans and animated with opacity-only fade
+  - high-speed output uses rolling phrase-sized waves instead of giant block pops
+  - Hermes' bright live cursor is hidden during fade mode
+
+## Main files changed
+
+### `static/messages.js`
+
+Core streaming implementation inside `attachLiveStream(...)`.
+
+Added local fade state:
+
+- `_streamFadeVisibleText`
+- `_streamFadeWordCarry`
+- `_streamFadeWordBornAt`
+- `_streamFadeArrivalWps`
+- `_streamFadeLastRevealCount`
+- `_streamFadeLatestAnimationEndAt`
+
+Key helpers:
+
+- `_resetStreamFadeState()`
+- `_cancelPendingStreamRender()`
+- `_shouldUseStreamFade()`
+- `_streamFadeWordCountOf(text)`
+- `_streamFadeNextText(targetText)`
+- `_renderStreamingFadeMarkdown(displayText)`
+- `_wrapStreamingFadeWords(root)`
+- `_drainStreamFadeBeforeDone(onDone)`
+
+Important behavior:
+
+- Fade mode renders at ~60fps (`16ms`) while default streaming remains ~15fps (`66ms`).
+- Default SMD streaming path remains intact when fade mode is off.
+- On `done`, fade mode drains remaining buffered text and waits for the final stagger/fade window before the final `renderMessages()` replacement.
+- Prefix resets now call `_resetStreamFadeState()` so stale birth timestamps do not leak across markdown/tool-call rewrites.
+
+### `static/style.css`
+
+Adds opacity-only streaming fade CSS:
+
+```css
+.stream-fade-word.is-new {
+  animation: stream-fade-word-in var(--stream-fade-ms,140ms) ease-out both;
+}
+@keyframes stream-fade-word-in { from { opacity:0; } to { opacity:1; } }
+```
+
+Also hides the live cursor during fade mode:
+
+```css
+[data-live-assistant="1"]:last-child .msg-body.stream-fade-active > :last-child::after,
+[data-live-assistant="1"]:last-child .msg-body.stream-fade-active:not(:has(> *))::after {
+  display:none;
+  content:none;
+}
+```
+
+### Settings plumbing
+
+- `api/config.py`
+  - adds `fade_text_effect` default and bool key
+- `static/boot.js`
+  - initializes `window._fadeTextEffect`
+- `static/index.html`
+  - adds Preferences checkbox
+- `static/panels.js`
+  - loads, autosaves, and saves the setting
+- `static/i18n.js`
+  - adds locale strings for all supported locales
+
+### Tests
+
+New file:
+
+- `tests/test_smooth_text_fade.py`
+
+Coverage includes:
+
+- setting persistence/config plumbing
+- Preferences UI plumbing
+- i18n key presence
+- fade helper presence
+- executable Node regressions that invoke `_streamFadeNextText(...)`
+- speed-ramp behavior
+- high-speed rolling-wave behavior
+- done-drain behavior
+- CSS expectations
+- cursor hiding
+
+## Tunable constants
+
+Defined near the top of `attachLiveStream(...)` in `static/messages.js`:
+
+```js
+const _STREAM_FADE_MS=140;
+const _STREAM_FADE_WAVE_MS=320;
+const _STREAM_FADE_MAX_STAGGER_MS=520;
+```
+
+Meaning:
+
+- `_STREAM_FADE_MS`: base fade duration for normal streaming
+- `_STREAM_FADE_WAVE_MS`: longer duration for high-speed multi-word waves
+- `_STREAM_FADE_MAX_STAGGER_MS`: max stagger spread across newly inserted words
+
+Adaptive playout speed currently uses:
+
+```js
+const baseWps = 30 + Math.min(streamAgeSeconds * 4, 35); // 30 → 65 wps
+const arrivalWps = _streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps * 2.4 + 20, 320) : 0;
+const backlogWps = backlogWords > 0 ? Math.min(30 + backlogWords * 8, 420) : 0;
+const wordsPerSecond = Math.min(420, Math.max(baseWps, arrivalWps, backlogWps));
+```
+
+Rolling burst floor:
+
+```js
+const burstFloor = backlogWords >= 120 ? 24
+  : backlogWords >= 60 ? 18
+  : backlogWords >= 30 ? 12
+  : wordsPerSecond >= 300 ? 8
+  : wordsPerSecond >= 220 ? 6
+  : 0;
+```
+
+High-speed waves then use:
+
+```js
+const fadeMs = revealedThisFrame >= 8 ? _STREAM_FADE_WAVE_MS
+  : revealedThisFrame >= 4 ? 240
+  : _STREAM_FADE_MS;
+
+const waveStepMs = revealedThisFrame >= 18 ? 18
+  : revealedThisFrame >= 8 ? 22
+  : revealedThisFrame >= 4 ? 16
+  : 10;
+```
+
+## Design decisions and why
+
+### Why not use only OpenWebUI's renderer?
+
+A wholesale renderer transplant was avoided. Hermes keeps its existing streaming markdown path as default, and fade mode is a selective cosmetic layer.
+
+### Why a playout buffer?
+
+Hermes receives backend stream chunks that can arrive faster or more bursty than desired visually. Rendering each chunk immediately can pop large text blocks into the DOM. The playout buffer separates:
+
+- text received from backend (`assistantText`)
+- text currently visible (`_streamFadeVisibleText`)
+
+### Why adaptive speed?
+
+A fixed reveal rate felt robotic and lagged behind faster models. Earlier attempts using session-wide average arrival rate failed when the model spent time reasoning before writing because the denominator inflated and the ramp never triggered.
+
+Current approach tracks **live target-word arrival velocity** using deltas:
+
+```js
+const instantArrivalWps = (targetWords - _streamFadeLastTargetWords) * 1000 / arrivalElapsedMs;
+_streamFadeArrivalWps = _streamFadeArrivalWps
+  ? (_streamFadeArrivalWps * 0.65 + instantArrivalWps * 0.35)
+  : instantArrivalWps;
+```
+
+Then playout deliberately exceeds arrival velocity so it catches up.
+
+### Why rolling waves?
+
+At very high throughput, revealing too many words in one frame felt chunky and made the fade almost disappear. The current implementation reduces one-frame burst size and stretches/staggers high-speed waves across several hundred milliseconds.
+
+This makes fast output feel more like animated text sweeping in rather than paragraph blocks appearing.
+
+## Performance notes
+
+Fade mode is more expensive than the default streaming path because it re-renders markdown and wraps visible text nodes during active streaming.
+
+Mitigations:
+
+- feature is opt-in and off by default
+- default streaming-markdown path remains unchanged when disabled
+- fade render cadence is capped at ~60fps
+- skip wrapping inside `pre`, `code`, `script`, `style`, `textarea`, `svg`, and `math`
+- animation is opacity-only, compositor-friendly
+
+Expected impact:
+
+- fine on modern desktop/Apple Silicon hardware
+- higher CPU/battery use during long/high-speed responses
+- users can disable it instantly from Preferences
+
+## Verification performed
+
+Commands run successfully:
+
+```bash
+cd /Users/agent/HermesWebUI
+PY=/Users/agent/.hermes/hermes-agent/venv/bin/python
+$PY -m pytest tests/test_smooth_text_fade.py tests/test_1003_preferences_autosave.py tests/test_streaming_markdown.py tests/test_chinese_locale.py tests/test_japanese_locale.py tests/test_korean_locale.py tests/test_russian_locale.py tests/test_spanish_locale.py -q
+node --check static/messages.js static/panels.js static/boot.js static/i18n.js
+$PY -m py_compile api/config.py
+git diff --check
+```
+
+Latest result before writing this handoff:
+
+```text
+99 passed
+```
+
+Also performed:
+
+- dead/debug scan over diff for `TODO`, `FIXME`, `console.log`, `debugger`, stale `100ms`, stale `220ms`, stale `48` burst constants
+- review cleanup: blocked late `token` / `reasoning` / `interim_assistant` mutations during fade done-drain, moved fade wave calculations out of the per-word hot path, and made manual Settings save refresh `window._fadeTextEffect`
+- HermesWebUI restart via launchctl
+- live asset verification via `curl http://127.0.0.1:8787/static/messages.js`
+- real chat/SSE smoke test: temp session, prompt `Reply with exactly: OK`, received `OK`, got `done`, deleted temp session
+
+## Current service state when last verified
+
+- HermesWebUI runs on port `8787`
+- Restarted during validation
+- Health endpoint returned OK
+
+Useful checks:
+
+```bash
+curl -fsS http://127.0.0.1:8787/health
+curl -fsS http://127.0.0.1:8787/static/messages.js | grep -E "_STREAM_FADE_WAVE_MS=320|_STREAM_FADE_MAX_STAGGER_MS=520|burstFloor=backlogWords>=120\?24"
+curl -fsS http://127.0.0.1:8787/static/style.css | grep -E "var\(--stream-fade-ms,140ms\)|stream-fade-word-in"
+```
+
+## Known caveats
+
+- LLM telemetry often reports **tokens/sec**, while the UI reveals visible words. These are not equivalent.
+- The renderer cannot reveal text before complete visible text exists.
+- If backend chunks arrive as very large bursts, the rolling-wave logic smooths them but may still require subjective tuning.
+- The current visual is close, but final merge review should include manual browser testing with:
+  - normal-speed model
+  - high-throughput model (~100+ tok/s)
+  - long markdown responses
+  - code blocks
+  - lists/tables
+  - tool-call-heavy responses
+
+## Suggested next review steps
+
+1. Manually test in browser after hard refresh (`Cmd+Shift+R`).
+2. Try a high-throughput long essay and tune only these constants if needed:
+   - `_STREAM_FADE_WAVE_MS`
+   - `_STREAM_FADE_MAX_STAGGER_MS`
+   - burst floor thresholds
+   - `waveStepMs`
+3. Check the diff for whether the `done` handler reindent is acceptable for the PR. It is intentional because the original done body is now wrapped in `_finishDone` so fade mode can drain before final DOM replacement.
+4. If submitting PR, mention the feature is opt-in/off-by-default and the default streaming markdown path remains unchanged.
+
+## Files to include in PR
+
+Expected modified/new files:
+
+```text
+api/config.py
+static/boot.js
+static/i18n.js
+static/index.html
+static/messages.js
+static/panels.js
+static/style.css
+tests/test_smooth_text_fade.py
+STREAMING_FADE_HANDOFF.md
+```
diff --git a/api/config.py b/api/config.py
index 0c241ce5..bcd8b34f 100644
--- a/api/config.py
+++ b/api/config.py
@@ -3880,6 +3880,7 @@ _SETTINGS_DEFAULTS = {
     "send_key": "enter",  # 'enter' or 'ctrl+enter'
     "show_token_usage": False,  # show input/output token badge below assistant messages
     "show_tps": False,  # show tokens-per-second chip in assistant message headers
+    "fade_text_effect": False,  # animate newly streamed words with a lightweight fade-in effect
     "show_cli_sessions": False,  # merge CLI sessions from state.db into the sidebar
     "sync_to_insights": False,  # mirror WebUI token usage to state.db for /insights
     "check_for_updates": True,  # check if webui/agent repos are behind upstream
@@ -4008,6 +4009,7 @@ _SETTINGS_BOOL_KEYS = {
     "onboarding_completed",
     "show_token_usage",
     "show_tps",
+    "fade_text_effect",
     "show_cli_sessions",
     "sync_to_insights",
     "check_for_updates",
diff --git a/static/boot.js b/static/boot.js
index e08ad6e9..5bf48317 100644
--- a/static/boot.js
+++ b/static/boot.js
@@ -1376,6 +1376,7 @@ function applyBotName(){
     window._sendKey=s.send_key||'enter';
     window._showTokenUsage=!!s.show_token_usage;
     window._showTps=!!s.show_tps;
+    window._fadeTextEffect=!!s.fade_text_effect;
     window._showCliSessions=!!s.show_cli_sessions;
     window._soundEnabled=!!s.sound_enabled;
     window._notificationsEnabled=!!s.notifications_enabled;
@@ -1412,6 +1413,7 @@ function applyBotName(){
     window._sendKey='enter';
     window._showTokenUsage=false;
     window._showTps=false;
+    window._fadeTextEffect=false;
     window._showCliSessions=false;
     window._soundEnabled=false;
     window._notificationsEnabled=false;
diff --git a/static/i18n.js b/static/i18n.js
index 20820631..e27c8fac 100644
--- a/static/i18n.js
+++ b/static/i18n.js
@@ -230,6 +230,8 @@ const LOCALES = {
     busy_interrupt_confirm: 'Interrupted — sending new message',
     settings_label_busy_input_mode: 'Busy input mode',
     settings_desc_busy_input_mode: 'Controls what happens when you send a message while the agent is running. Queue waits; Interrupt cancels and starts fresh; Steer injects a correction mid-turn without interrupting (falls back to queue when agent or stream unavailable).',
+    settings_label_fade_text_effect: 'Fade text effect',
+    settings_desc_fade_text_effect: 'Fade newly streamed words in while the assistant is responding. Similar to OpenWebUI; off by default for maximum performance.',
     settings_busy_input_mode_queue: 'Queue follow-up',
     settings_busy_input_mode_interrupt: 'Interrupt current turn',
     settings_busy_input_mode_steer: 'Steer (mid-turn correction)',
@@ -1320,6 +1322,8 @@ const LOCALES = {
     busy_interrupt_confirm: '中断 — 新しいメッセージを送信中',
     settings_label_busy_input_mode: 'ビジー時の入力モード',
     settings_desc_busy_input_mode: 'エージェント実行中にメッセージを送信した時の動作を制御します。Queue は待機、Interrupt はキャンセルして再開、Steer は中断せずにターン中に修正を注入します (エージェントやストリームが利用不可ならキューにフォールバック)。',
+    settings_label_fade_text_effect: 'テキストのフェード効果',
+    settings_desc_fade_text_effect: 'アシスタントの応答中に新しくストリーミングされた単語をフェードインします。OpenWebUI に似た表示です。最大パフォーマンスのため既定ではオフです。',
     settings_busy_input_mode_queue: 'フォローアップをキュー',
     settings_busy_input_mode_interrupt: '現在のターンを中断',
     settings_busy_input_mode_steer: 'ステア (ターン中の修正)',
@@ -2366,6 +2370,8 @@ const LOCALES = {
     busy_interrupt_confirm: 'Прервано — отправка нового сообщения',
     settings_label_busy_input_mode: 'Режим ввода при занятости',
     settings_desc_busy_input_mode: 'Определяет поведение при отправке сообщения во время работы агента. Очередь ждёт; Прерывание отменяет и начинает заново; Steer внедряет коррекцию без прерывания.',
+    settings_label_fade_text_effect: 'Эффект плавного появления текста',
+    settings_desc_fade_text_effect: 'Плавно показывает новые слова во время ответа ассистента. Похоже на OpenWebUI; по умолчанию выключено для максимальной производительности.',
     settings_busy_input_mode_queue: 'Поставить в очередь',
     settings_busy_input_mode_interrupt: 'Прервать текущий оборот',
     settings_busy_input_mode_steer: 'Steer (прерывание + отправка)',
@@ -3427,6 +3433,8 @@ const LOCALES = {
     busy_interrupt_confirm: 'Interrumpido \u2014 enviando nuevo mensaje',
     settings_label_busy_input_mode: 'Modo de entrada ocupada',
     settings_desc_busy_input_mode: 'Controla qué sucede al enviar mensajes mientras el agente está activo. Cola espera; Interrumpir cancela y empieza de nuevo; Steer inyecta una corrección sin interrumpir (usa cola si el agente no está disponible).',
+    settings_label_fade_text_effect: 'Efecto de desvanecimiento de texto',
+    settings_desc_fade_text_effect: 'Hace aparecer gradualmente las palabras nuevas mientras el asistente responde. Similar a OpenWebUI; desactivado por defecto para máximo rendimiento.',
     settings_busy_input_mode_queue: 'Poner en cola',
     settings_busy_input_mode_interrupt: 'Interrumpir turno actual',
     settings_busy_input_mode_steer: 'Steer (corrección a mitad de turno)',
@@ -4427,6 +4435,8 @@ const LOCALES = {
     busy_interrupt_confirm: 'Unterbrochen \u2014 neue Nachricht wird gesendet',
     settings_label_busy_input_mode: 'Eingabemodus bei Besch\u00e4ftigung',
     settings_desc_busy_input_mode: 'Steuert, was passiert, wenn Sie w\u00e4hrend der Agentenaktivit\u00e4t eine Nachricht senden. Warteschlange wartet; Unterbrechen bricht ab und startet neu; Steer f\u00fcgt eine Korrektur ein ohne zu unterbrechen.',
+    settings_label_fade_text_effect: 'Text-Fade-Effekt',
+    settings_desc_fade_text_effect: 'Blendet neu gestreamte Wörter während der Antwort des Assistenten sanft ein. Ähnlich wie OpenWebUI; für maximale Leistung standardmäßig deaktiviert.',
     settings_busy_input_mode_queue: 'In Warteschlange einreihen',
     settings_busy_input_mode_interrupt: 'Aktuellen Durchgang unterbrechen',
     settings_busy_input_mode_steer: 'Steer (Korrektur ohne Unterbrechung)',
@@ -5474,6 +5484,8 @@ const LOCALES = {
     busy_interrupt_confirm: '已中断 — 正在发送新消息',
     settings_label_busy_input_mode: '忙碌输入模式',
     settings_desc_busy_input_mode: '控制在代理运行时发送消息的行为。队列等待；中断取消并重新开始；Steer中途注入纠正，不中断。',
+    settings_label_fade_text_effect: '文本淡入效果',
+    settings_desc_fade_text_effect: '在助手回复时让新流式输出的词语淡入显示。类似 OpenWebUI；为获得最佳性能默认关闭。',
     settings_busy_input_mode_queue: '加入队列',
     settings_busy_input_mode_interrupt: '中断当前回合',
     settings_busy_input_mode_steer: 'Steer（中断 + 发送）',
@@ -7009,6 +7021,8 @@ const LOCALES = {
     busy_interrupt_confirm: '\u5df2\u4e2d\u65ad \u2014 \u6b63\u5728\u767c\u9001\u65b0\u8a0a\u606f',
     settings_label_busy_input_mode: '\u5fd9\u788c\u8f38\u5165\u6a21\u5f0f',
     settings_desc_busy_input_mode: '\u63a7\u5236\u5728\u4ee3\u7406\u904b\u884c\u6642\u767c\u9001\u8a0a\u606f\u7684\u884c\u70ba\u3002\u4f47\u5217\u7b49\u5f85\uff1b\u4e2d\u65b7\u53d6\u6d88\u4e26\u91cd\u65b0\u958b\u59cb\uff1bSteer\u4e2d\u9014\u6ce8\u5165\u7d3a\u6b63\uff0c\u4e0d\u4e2d\u65b7\u3002',
+    settings_label_fade_text_effect: '文字淡入效果',
+    settings_desc_fade_text_effect: '在助理回覆時讓新串流輸出的詞語淡入顯示。類似 OpenWebUI；為獲得最佳效能預設關閉。',
     settings_busy_input_mode_queue: '\u52a0\u5165\u4f47\u5217',
     settings_busy_input_mode_interrupt: '\u4e2d\u65ad\u7576\u524d\u56de\u5408',
     settings_busy_input_mode_steer: 'Steer\uff08\u4e2d\u9014\u7d3a\u6b63\uff09',
@@ -7555,6 +7569,8 @@ const LOCALES = {
     busy_interrupt_confirm: 'Interrompido — enviando nova mensagem',
     settings_label_busy_input_mode: 'Modo de input ocupado',
     settings_desc_busy_input_mode: 'Controla o que acontece ao enviar mensagem com agente rodando. Fila espera; Interromper cancela; Steer injeta correção.',
+    settings_label_fade_text_effect: 'Efeito de fade no texto',
+    settings_desc_fade_text_effect: 'Faz novas palavras aparecerem gradualmente enquanto o assistente responde. Similar ao OpenWebUI; desativado por padrão para melhor desempenho.',
     settings_busy_input_mode_queue: 'Enfileirar follow-up',
     settings_busy_input_mode_interrupt: 'Interromper turno atual',
     settings_busy_input_mode_steer: 'Steer (correção no meio do turno)',
@@ -8521,6 +8537,8 @@ const LOCALES = {
     busy_interrupt_confirm: 'Interrupted — sending new message',
     settings_label_busy_input_mode: '작업 중 입력 방식',
     settings_desc_busy_input_mode: '에이전트가 실행 중일 때 메시지를 보내면 어떻게 처리할지 제어합니다. 대기는 다음 차례까지 기다리고, 중단은 현재 작업을 취소하고 새로 시작하며, 조정은 현재 작업을 중단하지 않고 중간 수정 사항을 전달합니다(에이전트 또는 스트림을 사용할 수 없으면 대기로 전환).',
+    settings_label_fade_text_effect: '텍스트 페이드 효과',
+    settings_desc_fade_text_effect: '어시스턴트가 응답하는 동안 새로 스트리밍되는 단어를 부드럽게 표시합니다. OpenWebUI와 비슷하며, 최대 성능을 위해 기본값은 꺼짐입니다.',
     settings_busy_input_mode_queue: '후속 메시지 대기',
     settings_busy_input_mode_interrupt: '현재 작업 중단',
     settings_busy_input_mode_steer: '조정(중간 수정)',
diff --git a/static/index.html b/static/index.html
index 2fd68911..40e0d2bb 100644
--- a/static/index.html
+++ b/static/index.html
@@ -981,6 +981,13 @@
               </label>
               <div style="font-size:11px;color:var(--muted);margin-top:4px">Displays tokens per second in assistant message headers while streaming and after a response completes. Off by default.</div>
             </div>
+            <div class="settings-field">
+              <label style="display:flex;align-items:center;gap:8px;cursor:pointer">
+                <input type="checkbox" id="settingsFadeTextEffect" style="width:15px;height:15px;accent-color:var(--accent)">
+                <span data-i18n="settings_label_fade_text_effect">Fade text effect</span>
+              </label>
+              <div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_fade_text_effect">Fade newly streamed words in while the assistant is responding. Similar to OpenWebUI; off by default for maximum performance.</div>
+            </div>
             <div class="settings-field">
               <label style="display:flex;align-items:center;gap:8px;cursor:pointer">
                 <input type="checkbox" id="settingsSimplifiedToolCalling" style="width:15px;height:15px;accent-color:var(--accent)">
diff --git a/static/messages.js b/static/messages.js
index 560ff623..9cdd6834 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -510,6 +510,20 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   // the final answer or the response to render twice.
   let _streamFinalized=false;
   let _pendingRafHandle=null;
+  let _streamFadeWordCount=0;
+  let _streamFadeVisibleText='';
+  let _streamFadeLastTickMs=0;
+  let _streamFadeWordCarry=0;
+  let _streamFadeWordBornAt=[];
+  let _streamFadeStartedAt=0;
+  let _streamFadeLastTargetWords=0;
+  let _streamFadeLastArrivalMs=0;
+  let _streamFadeArrivalWps=0;
+  let _streamFadeLatestAnimationEndAt=0;
+  let _streamFadeLastRevealCount=0;
+  const _STREAM_FADE_MS=140;
+  const _STREAM_FADE_WAVE_MS=320;
+  const _STREAM_FADE_MAX_STAGGER_MS=520;
 
   // rAF-throttled rendering: buffer tokens, render at most once per frame
   let _renderPending=false;
@@ -667,12 +681,221 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       if(!_SMD_SAFE_URL_RE.test(v)){n.removeAttribute('src');n.setAttribute('data-blocked-scheme','1');}
     }
   }
+
+  function _resetStreamFadeState(){
+    _streamFadeWordCount=0;
+    _streamFadeVisibleText='';
+    _streamFadeLastTickMs=0;
+    _streamFadeWordCarry=0;
+    _streamFadeWordBornAt=[];
+    _streamFadeStartedAt=0;
+    _streamFadeLastTargetWords=0;
+    _streamFadeLastArrivalMs=0;
+    _streamFadeArrivalWps=0;
+    _streamFadeLatestAnimationEndAt=0;
+    _streamFadeLastRevealCount=0;
+  }
+  function _cancelPendingStreamRender(){
+    if(_pendingRafHandle===null) return;
+    cancelAnimationFrame(_pendingRafHandle);
+    clearTimeout(_pendingRafHandle);
+    _pendingRafHandle=null;
+    _renderPending=false;
+  }
+  function _shouldUseStreamFade(){
+    return window._fadeTextEffect===true;
+  }
+  function _streamFadeSkipNode(node){
+    if(!node||node.nodeType!==1) return false;
+    const tag=(node.tagName||'').toLowerCase();
+    return tag==='pre'||tag==='code'||tag==='script'||tag==='style'||tag==='textarea'||tag==='svg'||tag==='math';
+  }
+  function _streamFadeWordCountOf(text){
+    const m=String(text||'').match(/\S+/g);
+    return m?m.length:0;
+  }
+  function _streamFadeNextText(targetText){
+    targetText=String(targetText||'');
+    const now=performance.now();
+    if(!targetText){
+      _resetStreamFadeState();
+      return {text:'', caughtUp:true};
+    }
+    if(!_streamFadeVisibleText||!targetText.startsWith(_streamFadeVisibleText)){
+      // Markdown/tool stripping can rewrite the visible prefix. Reset safely rather than
+      // trying to animate across incompatible strings or stale word birth timestamps.
+      _resetStreamFadeState();
+    }
+    if(!_streamFadeLastTickMs){
+      _streamFadeLastTickMs=now;
+      _streamFadeStartedAt=now;
+    }
+    if(_streamFadeVisibleText===targetText) return {text:_streamFadeVisibleText,caughtUp:true};
+
+    const remaining=targetText.slice(_streamFadeVisibleText.length);
+    const backlogWords=_streamFadeWordCountOf(remaining);
+    const targetWords=_streamFadeWordCountOf(targetText);
+    const elapsedMs=Math.max(16,Math.min(120,now-_streamFadeLastTickMs));
+    _streamFadeLastTickMs=now;
+
+    // OpenWebUI fades the actual arriving tokens, so long/fast responses naturally
+    // appear to accelerate. Hermes has a playout buffer, so track incoming word
+    // velocity and play out faster than it instead of using a metronomic cadence.
+    // LLM telemetry is usually tokens/sec, but the UI reveals words. A fixed word
+    // cadence can look stuck even when token throughput is high, so combine:
+    //   1) live target-word arrival velocity, 2) backlog pressure, 3) time ramp.
+    if(!_streamFadeLastArrivalMs){
+      _streamFadeLastArrivalMs=now;
+      _streamFadeLastTargetWords=targetWords;
+    } else if(targetWords>_streamFadeLastTargetWords){
+      const arrivalElapsedMs=Math.max(16, now-_streamFadeLastArrivalMs);
+      const instantArrivalWps=(targetWords-_streamFadeLastTargetWords)*1000/arrivalElapsedMs;
+      // EWMA smooths bursty token chunks without hiding sustained fast output.
+      _streamFadeArrivalWps=_streamFadeArrivalWps
+        ? (_streamFadeArrivalWps*0.65 + instantArrivalWps*0.35)
+        : instantArrivalWps;
+      _streamFadeLastArrivalMs=now;
+      _streamFadeLastTargetWords=targetWords;
+    } else if(targetWords<_streamFadeLastTargetWords){
+      _streamFadeLastTargetWords=targetWords;
+      _streamFadeLastArrivalMs=now;
+      _streamFadeArrivalWps=0;
+    }
+
+    const streamAgeSeconds=Math.max(0, (now-(_streamFadeStartedAt||now))/1000);
+    const baseWps=30 + Math.min(streamAgeSeconds*4, 35); // 30 → 65 wps over long answers
+    const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*2.4 + 20, 320) : 0;
+    const backlogWps=backlogWords>0 ? Math.min(30 + backlogWords*8, 420) : 0;
+    const wordsPerSecond=Math.min(420, Math.max(baseWps, arrivalWps, backlogWps));
+
+    // High cap allows line/sentence-sized catch-up, while the rolling wave below
+    // keeps the visual effect animated instead of collapsing into block pops.
+    _streamFadeWordCarry+=elapsedMs*wordsPerSecond/1000;
+    if(!_streamFadeVisibleText) _streamFadeWordCarry=Math.max(_streamFadeWordCarry,1);
+    let wordsToReveal=Math.floor(_streamFadeWordCarry);
+    // At very high throughput, reveal in rolling phrase-sized waves instead of
+    // dumping whole paragraphs in one frame. The wrapper lengthens/staggers the
+    // fade for these waves so fast output stays visibly animated.
+    const burstFloor=backlogWords>=120?24:backlogWords>=60?18:backlogWords>=30?12:wordsPerSecond>=300?8:wordsPerSecond>=220?6:0;
+    if(burstFloor>0) wordsToReveal=Math.max(wordsToReveal, Math.min(burstFloor, backlogWords));
+    if(wordsToReveal<1){_streamFadeLastRevealCount=0;return {text:_streamFadeVisibleText,caughtUp:false};}
+    _streamFadeLastRevealCount=Math.min(wordsToReveal, backlogWords);
+    _streamFadeWordCarry=Math.max(0,_streamFadeWordCarry-wordsToReveal);
+
+    let cut=0;
+    const wordRe=/(\s*\S+\s*)/g;
+    let match;
+    while(wordsToReveal>0&&(match=wordRe.exec(remaining))){
+      cut=wordRe.lastIndex;
+      wordsToReveal-=1;
+    }
+    if(cut<=0) cut=Math.min(remaining.length,4);
+    _streamFadeVisibleText+=remaining.slice(0,cut);
+    if(_streamFadeVisibleText.length>targetText.length) _streamFadeVisibleText=targetText;
+    return {text:_streamFadeVisibleText,caughtUp:_streamFadeVisibleText===targetText};
+  }
+  function _renderStreamingFadeMarkdown(displayText){
+    if(!assistantBody) return true;
+    const next=_streamFadeNextText(displayText);
+    const html=renderMd ? renderMd(next.text||'') : esc(next.text||'');
+    assistantBody.innerHTML=html;
+    assistantBody.classList.add('stream-fade-active');
+    _wrapStreamingFadeWords(assistantBody);
+    _sanitizeSmdLinks(assistantBody);
+    return next.caughtUp;
+  }
+  function _wrapStreamingFadeWords(root){
+    if(!root||!document.createTreeWalker) return;
+    const textNodes=[];
+    const walker=document.createTreeWalker(root,NodeFilter.SHOW_TEXT,{
+      acceptNode(node){
+        if(!node||!node.nodeValue||!node.nodeValue.trim()) return NodeFilter.FILTER_REJECT;
+        let parent=node.parentElement;
+        while(parent&&parent!==root){
+          if(parent.classList&&parent.classList.contains('stream-fade-word')) return NodeFilter.FILTER_REJECT;
+          if(_streamFadeSkipNode(parent)) return NodeFilter.FILTER_REJECT;
+          parent=parent.parentElement;
+        }
+        return NodeFilter.FILTER_ACCEPT;
+      }
+    });
+    let node;
+    while((node=walker.nextNode())) textNodes.push(node);
+    let wordIndex=0;
+    const now=performance.now();
+    const wordRe=/(\S+)(\s*)/g;
+    const revealedThisFrame=Math.max(1,_streamFadeLastRevealCount||1);
+    const fadeMs=revealedThisFrame>=8?_STREAM_FADE_WAVE_MS:revealedThisFrame>=4?240:_STREAM_FADE_MS;
+    const waveStepMs=revealedThisFrame>=18?18:revealedThisFrame>=8?22:revealedThisFrame>=4?16:10;
+    for(const textNode of textNodes){
+      const value=textNode.nodeValue||'';
+      wordRe.lastIndex=0;
+      const frag=document.createDocumentFragment();
+      let last=0, match, changed=false;
+      while((match=wordRe.exec(value))){
+        if(match.index>last) frag.appendChild(document.createTextNode(value.slice(last,match.index)));
+        wordIndex+=1;
+        const span=document.createElement('span');
+        if(!_streamFadeWordBornAt[wordIndex]){
+          const newWordOffset=Math.max(wordIndex-_streamFadeWordCount-1,0);
+          // High-speed output should feel like a continuous animated sweep, not
+          // a block pop. Keep bursts smaller, but stretch/stagger each wave so
+          // multiple sentences can fade across several hundred milliseconds.
+          const staggerMs=Math.min(newWordOffset*waveStepMs,_STREAM_FADE_MAX_STAGGER_MS);
+          _streamFadeWordBornAt[wordIndex]=now+staggerMs;
+        }
+        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,_streamFadeWordBornAt[wordIndex]+fadeMs);
+        const ageMs=now-_streamFadeWordBornAt[wordIndex];
+        const isAnimating=ageMs<fadeMs;
+        span.className=isAnimating?'stream-fade-word is-new':'stream-fade-word';
+        if(isAnimating){
+          const delayMs=Math.max(-fadeMs, -ageMs);
+          span.style.animationDelay=delayMs+'ms';
+          span.style.setProperty('--stream-fade-ms',fadeMs+'ms');
+        }
+        span.textContent=match[1];
+        frag.appendChild(span);
+        if(match[2]) frag.appendChild(document.createTextNode(match[2]));
+        last=match.index+match[0].length;
+        changed=true;
+      }
+      if(!changed) continue;
+      if(last<value.length) frag.appendChild(document.createTextNode(value.slice(last)));
+      textNode.replaceWith(frag);
+    }
+    _streamFadeWordBornAt.length=wordIndex+1;
+    _streamFadeWordCount=wordIndex;
+  }
+  function _streamFadeCurrentDisplayText(){
+    const parsed=_parseStreamState();
+    return segmentStart===0
+      ? parsed.displayText
+      : _stripXmlToolCalls(assistantText.slice(segmentStart));
+  }
+  function _drainStreamFadeBeforeDone(onDone){
+    const step=()=>{
+      if(!assistantBody){onDone();return;}
+      const target=_streamFadeCurrentDisplayText();
+      const caughtUp=_renderStreamingFadeMarkdown(target);
+      scrollIfPinned();
+      if(caughtUp){
+        // Let the last released words visibly finish their stagger + fade before
+        // the final renderMessages() DOM replacement removes the live spans.
+        const remainingAnimationMs=Math.max(_STREAM_FADE_MS, _streamFadeLatestAnimationEndAt-performance.now());
+        setTimeout(onDone, Math.min(remainingAnimationMs, _STREAM_FADE_WAVE_MS+_STREAM_FADE_MAX_STAGGER_MS));
+        return;
+      }
+      setTimeout(()=>requestAnimationFrame(step), 16);
+    };
+    step();
+  }
   function _resetAssistantSegment(){
     assistantRow=null;
     assistantBody=null;
     segmentStart=assistantText.length;
     _freshSegment=true;
     _smdEndParser();
+    _resetStreamFadeState();
   }
 
   let _lastRenderMs=0;
@@ -701,30 +924,41 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         const displayText = segmentStart===0
           ? parsed.displayText                          // first segment: uses think-tag stripping
           : _stripXmlToolCalls(assistantText.slice(segmentStart));
-        if(!_smdParser&&window.smd){
-          // On reconnect: prior content in assistantBody came from a different smd parser run.
-          // Clear it and start fresh — renderMessages() on done will restore the full content.
-          if(_smdReconnect){assistantBody.innerHTML='';_smdReconnect=false;}
-          _smdNewParser(assistantBody);
-        }
-        if(_smdParser){
-          _smdWrite(displayText);
+        if(_shouldUseStreamFade()){
+          _smdEndParser();
+          const caughtUp=_renderStreamingFadeMarkdown(displayText);
+          if(!caughtUp&&!_streamFinalized){
+            setTimeout(()=>_scheduleRender(), 24);
+          }
         } else {
-          // Fallback: smd not loaded yet, reconnect session, or smd unavailable — use renderMd
-          // for every live segment. Without this, the first segment inserts raw
-          // parsed.displayText and users see unformatted markdown until done.
-          const fallbackText = segmentStart===0
-            ? parsed.displayText
-            : _stripXmlToolCalls(assistantText.slice(segmentStart));
-          assistantBody.innerHTML = renderMd ? renderMd(fallbackText) : esc(fallbackText);
+          assistantBody.classList.remove('stream-fade-active');
+          _resetStreamFadeState();
+          if(!_smdParser&&window.smd){
+            // On reconnect: prior content in assistantBody came from a different smd parser run.
+            // Clear it and start fresh — renderMessages() on done will restore the full content.
+            if(_smdReconnect){assistantBody.innerHTML='';_smdReconnect=false;}
+            _smdNewParser(assistantBody);
+          }
+          if(_smdParser){
+            _smdWrite(displayText);
+          } else {
+            // Fallback: smd not loaded yet, reconnect session, or smd unavailable — use renderMd
+            // for every live segment. Without this, the first segment inserts raw
+            // parsed.displayText and users see unformatted markdown until done.
+            const fallbackText = segmentStart===0
+              ? parsed.displayText
+              : _stripXmlToolCalls(assistantText.slice(segmentStart));
+            assistantBody.innerHTML = renderMd ? renderMd(fallbackText) : esc(fallbackText);
+          }
         }
       }
       scrollIfPinned();
     };
-    if(sinceLastMs>=66){
+    const frameIntervalMs=_shouldUseStreamFade()?16:66;
+    if(sinceLastMs>=frameIntervalMs){
       _pendingRafHandle=requestAnimationFrame(_doRender);
     } else {
-      _pendingRafHandle=setTimeout(()=>requestAnimationFrame(_doRender), 66-sinceLastMs);
+      _pendingRafHandle=setTimeout(()=>requestAnimationFrame(_doRender), frameIntervalMs-sinceLastMs);
     }
   }
 
@@ -745,6 +979,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     // terminal handlers) address it without needing a reset here.
 
     source.addEventListener('token',e=>{
+      if(_terminalStateReached||_streamFinalized) return;
       if(!S.session||S.session.session_id!==activeSid) return;
       const d=JSON.parse(e.data);
       assistantText+=d.text;
@@ -756,6 +991,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     });
 
     source.addEventListener('interim_assistant',e=>{
+      if(_terminalStateReached||_streamFinalized) return;
       if(!S.session||S.session.session_id!==activeSid) return;
       const d=JSON.parse(e.data);
       const visible=String(d&&d.text?d.text:'').trim();
@@ -776,6 +1012,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     });
 
     source.addEventListener('reasoning',e=>{
+      if(_terminalStateReached||_streamFinalized) return;
       const d=JSON.parse(e.data);
       reasoningText += d.text || '';
       liveReasoningText += d.text || '';
@@ -954,153 +1191,162 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     source.addEventListener('done',e=>{
       _terminalStateReached=true;
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
-      // Bug A fix: cancel any pending rAF and mark stream finalized before
-      // the DOM is settled by renderMessages, so no trailing token/reasoning rAF
-      // can reintroduce a stale thinking card or duplicate content.
-      _streamFinalized=true;
-      if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);clearTimeout(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
-      if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
-      // Finalize smd parser — flushes any remaining buffered markdown state
-      // and runs Prism + copy buttons on the live segment before the DOM is replaced
-      if(assistantBody){
-        const _finBody=assistantBody;
-        _smdEndParser();
-        requestAnimationFrame(()=>{
-          if(typeof highlightCode==='function') highlightCode(_finBody);
-          if(typeof addCopyButtons==='function') addCopyButtons(_finBody);
-          if(typeof renderKatexBlocks==='function') renderKatexBlocks();
-        });
-      } else {
-        _smdEndParser();
-      }
-      const d=JSON.parse(e.data);
-      const isActiveSession=_isSessionCurrentPane(activeSid);
-      const isSessionViewed=_isSessionActivelyViewed(activeSid);
-      const completedSession=d.session||{session_id:activeSid};
-      const completedSid=completedSession.session_id||activeSid;
-      if(!isSessionViewed && typeof _markSessionCompletionUnread==='function'){
-        _markSessionCompletionUnread(completedSid, completedSession.message_count);
-      }
-      _clearOwnerInflightState();
-      if(typeof _markSessionCompletedInList==='function'){
-        _markSessionCompletedInList(completedSession, activeSid);
-      }
-      _clearApprovalForOwner();
-      _clearClarifyForOwner('terminal');
-      const shouldFollowOnDone=isActiveSession&&((typeof _shouldFollowMessagesOnDomReplace==='function')
-        ? _shouldFollowMessagesOnDomReplace()
-        : (typeof _isMessagePaneNearBottom==='function'&&_isMessagePaneNearBottom(1200)));
-      if(isActiveSession){
-        S.activeStreamId=null;
-      }
-      if(isActiveSession){
-        // Capture previous session totals BEFORE overwriting S.session with the new
-        // cumulative values from the done event. prevIn/prevOut are the totals as of
-        // the start of this turn; curIn/curOut are the full post-turn totals — the
-        // delta is the per-turn usage for #1159.
-        const _prevIn=(S.session&&S.session.input_tokens)||0;
-        const _prevOut=(S.session&&S.session.output_tokens)||0;
-        const _prevCost=(S.session&&S.session.estimated_cost)||0;
-        S.session=d.session;S.messages=d.session.messages||[];if(typeof _messagesTruncated!=='undefined')_messagesTruncated=!!d.session._messages_truncated;
-        if(S.session&&S.session.session_id){
-          localStorage.setItem('hermes-webui-session',S.session.session_id);
-          if(typeof _setActiveSessionUrl==='function') _setActiveSessionUrl(S.session.session_id);
+      const _doneData=JSON.parse(e.data);
+      const _finishDone=()=>{
+        // Bug A fix: cancel any pending rAF and mark stream finalized before
+        // the DOM is settled by renderMessages, so no trailing token/reasoning rAF
+        // can reintroduce a stale thinking card or duplicate content.
+        _streamFinalized=true;
+        _cancelPendingStreamRender();
+        if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
+        // Finalize smd parser — flushes any remaining buffered markdown state
+        // and runs Prism + copy buttons on the live segment before the DOM is replaced
+        if(assistantBody){
+          const _finBody=assistantBody;
+          _smdEndParser();
+          requestAnimationFrame(()=>{
+            if(typeof highlightCode==='function') highlightCode(_finBody);
+            if(typeof addCopyButtons==='function') addCopyButtons(_finBody);
+            if(typeof renderKatexBlocks==='function') renderKatexBlocks();
+          });
+        } else {
+          _smdEndParser();
         }
-        if(
-          window._compressionUi&&window._compressionUi.automatic&&
-          window._compressionUi.sessionId===activeSid&&
-          d.session&&d.session.session_id
-        ){
-          window._compressionUi={...window._compressionUi, sessionId:d.session.session_id};
+        const d=_doneData;
+        const isActiveSession=_isSessionCurrentPane(activeSid);
+        const isSessionViewed=_isSessionActivelyViewed(activeSid);
+        const completedSession=d.session||{session_id:activeSid};
+        const completedSid=completedSession.session_id||activeSid;
+        if(!isSessionViewed && typeof _markSessionCompletionUnread==='function'){
+          _markSessionCompletionUnread(completedSid, completedSession.message_count);
         }
-        // Find the last assistant message once for both reasoning persistence and timestamp
-        const lastAsst=[...S.messages].reverse().find(m=>m.role==='assistant');
-        // Persist reasoning trace so thinking card survives page reload
-        if(reasoningText&&lastAsst&&!lastAsst.reasoning) lastAsst.reasoning=reasoningText;
-        // Stamp _ts on the last assistant message if it has no timestamp
-        if(lastAsst&&!lastAsst._ts&&!lastAsst.timestamp) lastAsst._ts=Date.now()/1000;
-        if(d.usage){
-          S.lastUsage=d.usage;_syncCtxIndicator(d.usage);
-          // #503 — compute per-turn cost delta and attach to last assistant message
-          if(lastAsst){
-            const prevIn=_prevIn;
-            const prevOut=_prevOut;
-            const prevCost=_prevCost;
-            const curIn=d.usage.input_tokens||0;
-            const curOut=d.usage.output_tokens||0;
-            const curCost=d.usage.estimated_cost||0;
-            // Only set delta if values actually increased (skip no-op turns)
-            if(curIn>prevIn||curOut>prevOut){
-              lastAsst._turnUsage={
-                input_tokens:Math.max(0,curIn-prevIn),
-                output_tokens:Math.max(0,curOut-prevOut),
-                estimated_cost:Math.max(0,curCost-prevCost),
-              };
-            }
-            if(typeof d.usage.duration_seconds==='number'){
-              lastAsst._turnDuration=d.usage.duration_seconds;
-            }
-            if(typeof d.usage.tps==='number'&&d.usage.tps>0){
-              lastAsst._turnTps=d.usage.tps;
-            }
-            if(d.usage.gateway_routing){
-              lastAsst._gatewayRouting=d.usage.gateway_routing;
-              if(S.session)S.session.gateway_routing=d.usage.gateway_routing;
-              if(S.session&&Array.isArray(S.session.gateway_routing_history))S.session.gateway_routing_history.push(d.usage.gateway_routing);
-              else if(S.session)S.session.gateway_routing_history=[d.usage.gateway_routing];
+        _clearOwnerInflightState();
+        if(typeof _markSessionCompletedInList==='function'){
+          _markSessionCompletedInList(completedSession, activeSid);
+        }
+        _clearApprovalForOwner();
+        _clearClarifyForOwner('terminal');
+        const shouldFollowOnDone=isActiveSession&&((typeof _shouldFollowMessagesOnDomReplace==='function')
+          ? _shouldFollowMessagesOnDomReplace()
+          : (typeof _isMessagePaneNearBottom==='function'&&_isMessagePaneNearBottom(1200)));
+        if(isActiveSession){
+          S.activeStreamId=null;
+        }
+        if(isActiveSession){
+          // Capture previous session totals BEFORE overwriting S.session with the new
+          // cumulative values from the done event. prevIn/prevOut are the totals as of
+          // the start of this turn; curIn/curOut are the full post-turn totals — the
+          // delta is the per-turn usage for #1159.
+          const _prevIn=(S.session&&S.session.input_tokens)||0;
+          const _prevOut=(S.session&&S.session.output_tokens)||0;
+          const _prevCost=(S.session&&S.session.estimated_cost)||0;
+          S.session=d.session;S.messages=d.session.messages||[];if(typeof _messagesTruncated!=='undefined')_messagesTruncated=!!d.session._messages_truncated;
+          if(S.session&&S.session.session_id){
+            localStorage.setItem('hermes-webui-session',S.session.session_id);
+            if(typeof _setActiveSessionUrl==='function') _setActiveSessionUrl(S.session.session_id);
+          }
+          if(
+            window._compressionUi&&window._compressionUi.automatic&&
+            window._compressionUi.sessionId===activeSid&&
+            d.session&&d.session.session_id
+          ){
+            window._compressionUi={...window._compressionUi, sessionId:d.session.session_id};
+          }
+          // Find the last assistant message once for both reasoning persistence and timestamp
+          const lastAsst=[...S.messages].reverse().find(m=>m.role==='assistant');
+          // Persist reasoning trace so thinking card survives page reload
+          if(reasoningText&&lastAsst&&!lastAsst.reasoning) lastAsst.reasoning=reasoningText;
+          // Stamp _ts on the last assistant message if it has no timestamp
+          if(lastAsst&&!lastAsst._ts&&!lastAsst.timestamp) lastAsst._ts=Date.now()/1000;
+          if(d.usage){
+            S.lastUsage=d.usage;_syncCtxIndicator(d.usage);
+            // #503 — compute per-turn cost delta and attach to last assistant message
+            if(lastAsst){
+              const prevIn=_prevIn;
+              const prevOut=_prevOut;
+              const prevCost=_prevCost;
+              const curIn=d.usage.input_tokens||0;
+              const curOut=d.usage.output_tokens||0;
+              const curCost=d.usage.estimated_cost||0;
+              // Only set delta if values actually increased (skip no-op turns)
+              if(curIn>prevIn||curOut>prevOut){
+                lastAsst._turnUsage={
+                  input_tokens:Math.max(0,curIn-prevIn),
+                  output_tokens:Math.max(0,curOut-prevOut),
+                  estimated_cost:Math.max(0,curCost-prevCost),
+                };
+              }
+              if(typeof d.usage.duration_seconds==='number'){
+                lastAsst._turnDuration=d.usage.duration_seconds;
+              }
+              if(typeof d.usage.tps==='number'&&d.usage.tps>0){
+                lastAsst._turnTps=d.usage.tps;
+              }
+              if(d.usage.gateway_routing){
+                lastAsst._gatewayRouting=d.usage.gateway_routing;
+                if(S.session)S.session.gateway_routing=d.usage.gateway_routing;
+                if(S.session&&Array.isArray(S.session.gateway_routing_history))S.session.gateway_routing_history.push(d.usage.gateway_routing);
+                else if(S.session)S.session.gateway_routing_history=[d.usage.gateway_routing];
+              }
             }
           }
+          if(d.session.tool_calls&&d.session.tool_calls.length){
+            S.toolCalls=d.session.tool_calls.map(tc=>({...tc,done:true}));
+          } else {
+            S.toolCalls=S.toolCalls.map(tc=>({...tc,done:true}));
+          }
+          if(typeof _copyActivityDisclosureState==='function'&&lastAsst){
+            const assistantIdx=S.messages.indexOf(lastAsst);
+            if(assistantIdx>=0) _copyActivityDisclosureState('live:'+streamId, 'assistant:'+assistantIdx);
+          }
+          if(uploaded.length){
+            const lastUser=[...S.messages].reverse().find(m=>m.role==='user');
+            if(lastUser)lastUser.attachments=uploaded;
+          }
+          if(_latestGoalStatus&&_latestGoalStatus.message){
+            S.messages.push({
+              role:'assistant',
+              content:String(_latestGoalStatus.message),
+              _ts:Date.now()/1000,
+              _goalStatus:true,
+              _transient:true,
+            });
+          }
+          clearLiveToolCards();
+          S.busy=false;
+          // No-reply guard (#373): if agent returned nothing, show inline error
+          if(!S.messages.some(m=>m.role==='assistant'&&String(m.content||'').trim())&&!assistantText){removeThinking();S.messages.push({role:'assistant',content:'**No response received.** Check your API key and model selection.'});}
+          if(isSessionViewed) _markSessionViewed(completedSid, completedSession.message_count ?? S.messages.length);
+          syncTopbar();renderMessages({preserveScroll:true});
+          if(shouldFollowOnDone&&typeof scrollToBottom==='function') scrollToBottom();
+          loadDir('.');
+          // TTS auto-read: speak the last assistant response if enabled (#499)
+          if(typeof autoReadLastAssistant==='function') setTimeout(()=>autoReadLastAssistant(), 300);
         }
-        if(d.session.tool_calls&&d.session.tool_calls.length){
-          S.toolCalls=d.session.tool_calls.map(tc=>({...tc,done:true}));
-        } else {
-          S.toolCalls=S.toolCalls.map(tc=>({...tc,done:true}));
-        }
-        if(typeof _copyActivityDisclosureState==='function'&&lastAsst){
-          const assistantIdx=S.messages.indexOf(lastAsst);
-          if(assistantIdx>=0) _copyActivityDisclosureState('live:'+streamId, 'assistant:'+assistantIdx);
-        }
-        if(uploaded.length){
-          const lastUser=[...S.messages].reverse().find(m=>m.role==='user');
-          if(lastUser)lastUser.attachments=uploaded;
-        }
-        if(_latestGoalStatus&&_latestGoalStatus.message){
-          S.messages.push({
-            role:'assistant',
-            content:String(_latestGoalStatus.message),
-            _ts:Date.now()/1000,
-            _goalStatus:true,
-            _transient:true,
+        if(isActiveSession&&_pendingGoalContinuation&&typeof queueSessionMessage==='function'){
+          const _goalNext=_pendingGoalContinuation;
+          _pendingGoalContinuation=null;
+          queueSessionMessage(_goalNext.sid,{
+            text:_goalNext.text,
+            files:[],
+            model:_goalNext.model,
+            model_provider:_goalNext.model_provider,
+            profile:_goalNext.profile,
           });
+          if(typeof updateQueueBadge==='function')updateQueueBadge(_goalNext.sid);
         }
-        clearLiveToolCards();
-        S.busy=false;
-        // No-reply guard (#373): if agent returned nothing, show inline error
-        if(!S.messages.some(m=>m.role==='assistant'&&String(m.content||'').trim())&&!assistantText){removeThinking();S.messages.push({role:'assistant',content:'**No response received.** Check your API key and model selection.'});}
-        if(isSessionViewed) _markSessionViewed(completedSid, completedSession.message_count ?? S.messages.length);
-        syncTopbar();renderMessages({preserveScroll:true});
-        if(shouldFollowOnDone&&typeof scrollToBottom==='function') scrollToBottom();
-        loadDir('.');
-        // TTS auto-read: speak the last assistant response if enabled (#499)
-        if(typeof autoReadLastAssistant==='function') setTimeout(()=>autoReadLastAssistant(), 300);
+        if(isActiveSession) _queueDrainSid=activeSid;
+        renderSessionList();
+        _setActivePaneIdleIfOwner();
+        playNotificationSound();
+        sendBrowserNotification('Response complete',assistantText?assistantText.slice(0,100):'Task finished');
+      };
+      if(_shouldUseStreamFade()&&assistantBody){
+        _cancelPendingStreamRender();
+        _drainStreamFadeBeforeDone(_finishDone);
+        return;
       }
-      if(isActiveSession&&_pendingGoalContinuation&&typeof queueSessionMessage==='function'){
-        const _goalNext=_pendingGoalContinuation;
-        _pendingGoalContinuation=null;
-        queueSessionMessage(_goalNext.sid,{
-          text:_goalNext.text,
-          files:[],
-          model:_goalNext.model,
-          model_provider:_goalNext.model_provider,
-          profile:_goalNext.profile,
-        });
-        if(typeof updateQueueBadge==='function')updateQueueBadge(_goalNext.sid);
-      }
-      if(isActiveSession) _queueDrainSid=activeSid;
-      renderSessionList();
-      _setActivePaneIdleIfOwner();
-      playNotificationSound();
-      sendBrowserNotification('Response complete',assistantText?assistantText.slice(0,100):'Task finished');
+      _finishDone();
     });
 
     source.addEventListener('stream_end',e=>{
@@ -1195,7 +1441,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _terminalStateReached=true;
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
       _streamFinalized=true;
-      if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);clearTimeout(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
+      _cancelPendingStreamRender();
       _smdEndParser();
       if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
       // Application-level error sent explicitly by the server (rate limit, crash, etc.)
@@ -1281,7 +1527,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _terminalStateReached=true;
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
       _streamFinalized=true;
-      if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);clearTimeout(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
+      _cancelPendingStreamRender();
       _smdEndParser();
       if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
       source.close();
@@ -1375,7 +1621,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     // cannot fire after renderMessages() has settled the DOM with the error message.
     if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
     _streamFinalized=true;
-    if(_pendingRafHandle!==null){cancelAnimationFrame(_pendingRafHandle);clearTimeout(_pendingRafHandle);_pendingRafHandle=null;_renderPending=false;}
+    _cancelPendingStreamRender();
     if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
     _clearOwnerInflightState();
     _closeSource();
diff --git a/static/panels.js b/static/panels.js
index d2517582..105d2f3e 100644
--- a/static/panels.js
+++ b/static/panels.js
@@ -4950,6 +4950,8 @@ function _preferencesPayloadFromUi(){
   if(showUsageCb) payload.show_token_usage=showUsageCb.checked;
   const showTpsCb=$('settingsShowTps');
   if(showTpsCb) payload.show_tps=showTpsCb.checked;
+  const fadeTextCb=$('settingsFadeTextEffect');
+  if(fadeTextCb) payload.fade_text_effect=fadeTextCb.checked;
   const simplifiedToolCb=$('settingsSimplifiedToolCalling');
   if(simplifiedToolCb) payload.simplified_tool_calling=simplifiedToolCb.checked;
   const apiRedactCb=$('settingsApiRedact');
@@ -5016,6 +5018,7 @@ async function _autosavePreferencesSettings(payload){
       if(typeof clearMessageRenderCache==='function') clearMessageRenderCache();
       if(typeof renderMessages==='function') renderMessages();
     }
+    if(payload&&Object.prototype.hasOwnProperty.call(payload,'fade_text_effect')) window._fadeTextEffect=!!payload.fade_text_effect;
     if(payload&&payload.show_tps!==undefined){
       window._showTps=!!(saved&&saved.show_tps);
       if(typeof clearMessageRenderCache==='function') clearMessageRenderCache();
@@ -5183,6 +5186,8 @@ async function loadSettingsPanel(){
     if(showUsageCb){showUsageCb.checked=!!settings.show_token_usage;showUsageCb.addEventListener('change',_schedulePreferencesAutosave,{once:false});}
     const showTpsCb=$('settingsShowTps');
     if(showTpsCb){showTpsCb.checked=!!settings.show_tps;showTpsCb.addEventListener('change',_schedulePreferencesAutosave,{once:false});}
+    const fadeTextCb=$('settingsFadeTextEffect');
+    if(fadeTextCb){fadeTextCb.checked=!!settings.fade_text_effect;window._fadeTextEffect=fadeTextCb.checked;fadeTextCb.addEventListener('change',_schedulePreferencesAutosave,{once:false});}
     const simplifiedToolCb=$('settingsSimplifiedToolCalling');
     if(simplifiedToolCb){simplifiedToolCb.checked=settings.simplified_tool_calling!==false;simplifiedToolCb.addEventListener('change',_schedulePreferencesAutosave,{once:false});}
     const apiRedactCb=$('settingsApiRedact');
@@ -5762,10 +5767,11 @@ function _setSettingsAuthButtonsVisible(active){
 }
 
 function _applySavedSettingsUi(saved, body, opts){
-  const {sendKey,showTokenUsage,showTps,showCliSessions,theme,skin,language,sidebarDensity,fontSize}=opts;
+  const {sendKey,showTokenUsage,showTps,fadeTextEffect,showCliSessions,theme,skin,language,sidebarDensity,fontSize}=opts;
   window._sendKey=sendKey||'enter';
   window._showTokenUsage=showTokenUsage;
   window._showTps=showTps;
+  window._fadeTextEffect=!!fadeTextEffect;
   window._showCliSessions=showCliSessions;
   window._soundEnabled=body.sound_enabled;
   window._notificationsEnabled=body.notifications_enabled;
@@ -5857,6 +5863,7 @@ async function saveSettings(andClose){
   const sendKey=($('settingsSendKey')||{}).value;
   const showTokenUsage=!!($('settingsShowTokenUsage')||{}).checked;
   const showTps=!!($('settingsShowTps')||{}).checked;
+  const fadeTextEffect=!!($('settingsFadeTextEffect')||{}).checked;
   const showCliSessions=!!($('settingsShowCliSessions')||{}).checked;
   const pw=($('settingsPassword')||{}).value;
   const theme=($('settingsTheme')||{}).value||'dark';
@@ -5876,6 +5883,7 @@ async function saveSettings(andClose){
   body.language=language;
   body.show_token_usage=showTokenUsage;
   body.show_tps=showTps;
+  body.fade_text_effect=fadeTextEffect;
   body.simplified_tool_calling=!!($('settingsSimplifiedToolCalling')||{}).checked;
   body.api_redact_enabled=!!($('settingsApiRedact')||{}).checked;
   body.show_cli_sessions=showCliSessions;
@@ -5901,7 +5909,7 @@ async function saveSettings(andClose){
           if(typeof showToast==='function') showToast('Failed to update default model — settings saved');
         }
       }
-      _applySavedSettingsUi(saved, body, {sendKey,showTokenUsage,showTps,showCliSessions,theme,skin,language,sidebarDensity,fontSize});
+      _applySavedSettingsUi(saved, body, {sendKey,showTokenUsage,showTps,fadeTextEffect,showCliSessions,theme,skin,language,sidebarDensity,fontSize});
       showToast(t(saved.auth_just_enabled?'settings_saved_pw':'settings_saved_pw_updated'));
       _settingsDirty=false;
       _resetSettingsPanelState();
@@ -5920,7 +5928,7 @@ async function saveSettings(andClose){
         if(typeof showToast==='function') showToast('Failed to update default model — settings saved');
       }
     }
-    _applySavedSettingsUi(saved, body, {sendKey,showTokenUsage,showTps,showCliSessions,theme,skin,language,sidebarDensity,fontSize});
+    _applySavedSettingsUi(saved, body, {sendKey,showTokenUsage,showTps,fadeTextEffect,showCliSessions,theme,skin,language,sidebarDensity,fontSize});
     showToast(t('settings_saved'));
     _settingsDirty=false;
     _resetSettingsPanelState();
diff --git a/static/style.css b/static/style.css
index 7e87920f..9e949ef3 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3742,3 +3742,11 @@ main.main.showing-logs > #mainLogs{display:flex;}
 .log-line-debug{color:var(--muted);opacity:.75;}
 .logs-empty,.logs-hint{margin:8px 14px;padding:12px;border:1px solid var(--border);border-radius:8px;color:var(--muted);background:var(--surface);white-space:normal;font-family:var(--font-ui,system-ui,sans-serif);font-size:12px;}
 .logs-hint.warn{color:#f59e0b;border-color:rgba(245,158,11,.35);background:rgba(245,158,11,.08);}
+
+/* OpenWebUI-style streaming word fade (opt-in via Settings → Preferences).
+   Opacity-only fade; high-speed streams use a longer JS-driven wave duration. */
+.stream-fade-active .stream-fade-word{display:inline;will-change:opacity;}
+.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,140ms) ease-out both;}
+@keyframes stream-fade-word-in{from{opacity:0;}to{opacity:1;}}
+[data-live-assistant="1"]:last-child .msg-body.stream-fade-active > :last-child::after,
+[data-live-assistant="1"]:last-child .msg-body.stream-fade-active:not(:has(> *))::after{display:none;content:none;}
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
new file mode 100644
index 00000000..3ab4ffbe
--- /dev/null
+++ b/tests/test_smooth_text_fade.py
@@ -0,0 +1,290 @@
+import re
+import subprocess
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+CONFIG_PY = (REPO / "api" / "config.py").read_text(encoding="utf-8")
+INDEX_HTML = (REPO / "static" / "index.html").read_text(encoding="utf-8")
+PANELS_JS = (REPO / "static" / "panels.js").read_text(encoding="utf-8")
+MESSAGES_JS = (REPO / "static" / "messages.js").read_text(encoding="utf-8")
+BOOT_JS = (REPO / "static" / "boot.js").read_text(encoding="utf-8")
+STYLE_CSS = (REPO / "static" / "style.css").read_text(encoding="utf-8")
+I18N_JS = (REPO / "static" / "i18n.js").read_text(encoding="utf-8")
+
+FADE_SETTING = "fade_text_effect"
+FADE_CHECKBOX_ID = "settingsFadeTextEffect"
+FADE_RUNTIME_FLAG = "window._fadeTextEffect"
+FADE_LABEL_KEY = "settings_label_fade_text_effect"
+FADE_DESC_KEY = "settings_desc_fade_text_effect"
+
+
+def function_block(src: str, name: str) -> str:
+    marker = re.search(rf"(^|\n)\s*(?:async\s+)?function\s+{re.escape(name)}\(", src)
+    assert marker is not None, f"{name}() not found"
+    start = marker.start()
+    brace = src.find("{", marker.end())
+    assert brace != -1, f"{name}() opening brace not found"
+
+    depth = 0
+    in_string = None
+    escape = False
+    for i in range(brace, len(src)):
+        ch = src[i]
+        if in_string:
+            if escape:
+                escape = False
+            elif ch == "\\":
+                escape = True
+            elif ch == in_string:
+                in_string = None
+            continue
+        if ch in "'`\"":
+            in_string = ch
+            continue
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return src[start : i + 1]
+    raise AssertionError(f"{name}() closing brace not found")
+
+
+def event_listener_block(src: str, event_name: str) -> str:
+    start = src.index(f"source.addEventListener('{event_name}'")
+    end = src.index("source.addEventListener(", start + 1)
+    return src[start:end]
+
+
+def compact(src: str) -> str:
+    return re.sub(r"\s+", "", src)
+
+
+def assert_contains_all(src: str, snippets: list[str]) -> None:
+    for snippet in snippets:
+        assert snippet in src
+
+
+def fade_helper_script(performance_stub: str = "{_t:0,now(){return this._t;}}") -> str:
+    helpers = "\n".join(
+        function_block(MESSAGES_JS, name)
+        for name in [
+            "_streamFadeWordCountOf",
+            "_resetStreamFadeState",
+            "_streamFadeNextText",
+        ]
+    )
+    return f"""
+let _streamFadeVisibleText='';
+let _streamFadeLastTickMs=0;
+let _streamFadeWordCarry=0;
+let _streamFadeStartedAt=0;
+let _streamFadeLastTargetWords=0;
+let _streamFadeLastArrivalMs=0;
+let _streamFadeArrivalWps=0;
+let _streamFadeLatestAnimationEndAt=0;
+let _streamFadeLastRevealCount=0;
+const _STREAM_FADE_MS=140;
+const _STREAM_FADE_WAVE_MS=320;
+const _STREAM_FADE_MAX_STAGGER_MS=520;
+const performance={performance_stub};
+{helpers}
+"""
+
+
+def run_node(script: str) -> subprocess.CompletedProcess[str]:
+    result = subprocess.run(
+        ["node", "-e", script],
+        cwd=REPO,
+        text=True,
+        capture_output=True,
+        check=False,
+    )
+    assert result.returncode == 0, result.stderr
+    return result
+
+
+def test_fade_text_effect_setting_is_wired_through_backend_and_startup():
+    bool_keys = CONFIG_PY[CONFIG_PY.index("_SETTINGS_BOOL_KEYS") : CONFIG_PY.index("# Language codes")]
+    assert f'"{FADE_SETTING}": False' in CONFIG_PY
+    assert f'"{FADE_SETTING}"' in bool_keys
+    assert f"{FADE_RUNTIME_FLAG}=!!s.{FADE_SETTING}" in BOOT_JS
+    assert f"{FADE_RUNTIME_FLAG}=false" in BOOT_JS
+
+
+def test_preferences_ui_exposes_and_saves_fade_text_effect():
+    assert f'id="{FADE_CHECKBOX_ID}"' in INDEX_HTML
+    assert f'data-i18n="{FADE_LABEL_KEY}"' in INDEX_HTML
+    assert f'data-i18n="{FADE_DESC_KEY}"' in INDEX_HTML
+    assert FADE_LABEL_KEY in I18N_JS
+    assert FADE_DESC_KEY in I18N_JS
+
+    payload_block = function_block(PANELS_JS, "_preferencesPayloadFromUi")
+    assert_contains_all(payload_block, [f"$('{FADE_CHECKBOX_ID}')", f"payload.{FADE_SETTING}="])
+
+    load_block = function_block(PANELS_JS, "loadSettingsPanel")
+    fade_load = load_block[load_block.index(f"$('{FADE_CHECKBOX_ID}')") :]
+    assert_contains_all(
+        fade_load[:700],
+        [f"settings.{FADE_SETTING}", FADE_RUNTIME_FLAG, "addEventListener('change',_schedulePreferencesAutosave"],
+    )
+
+    autosave_block = function_block(PANELS_JS, "_autosavePreferencesSettings")
+    assert_contains_all(autosave_block, [FADE_SETTING, f"{FADE_RUNTIME_FLAG}=!!payload.{FADE_SETTING}"])
+
+    save_block = function_block(PANELS_JS, "saveSettings")
+    assert_contains_all(save_block, [FADE_CHECKBOX_ID, f"body.{FADE_SETTING}", "fadeTextEffect"])
+
+    apply_block = function_block(PANELS_JS, "_applySavedSettingsUi")
+    assert_contains_all(apply_block, ["fadeTextEffect", f"{FADE_RUNTIME_FLAG}=!!fadeTextEffect"])
+
+
+def test_fade_helpers_and_constants_exist():
+    for name in [
+        "_resetStreamFadeState",
+        "_shouldUseStreamFade",
+        "_streamFadeNextText",
+        "_streamFadeWordCountOf",
+        "_renderStreamingFadeMarkdown",
+        "_wrapStreamingFadeWords",
+        "_streamFadeSkipNode",
+        "_drainStreamFadeBeforeDone",
+    ]:
+        assert f"function {name}" in MESSAGES_JS
+
+    assert_contains_all(
+        MESSAGES_JS,
+        [
+            "const _STREAM_FADE_MS=140",
+            "const _STREAM_FADE_WAVE_MS=320",
+            "const _STREAM_FADE_MAX_STAGGER_MS=520",
+            "_streamFadeVisibleText",
+            "_streamFadeWordBornAt",
+            "_streamFadeArrivalWps",
+        ],
+    )
+
+
+def test_schedule_render_keeps_default_smd_path_when_fade_is_off():
+    block = function_block(MESSAGES_JS, "_scheduleRender")
+    assert "_shouldUseStreamFade()" in block
+    assert "_renderStreamingFadeMarkdown(displayText)" in block
+    assert "_smdWrite(displayText)" in block
+    assert "_smdNewParser(assistantBody)" in block
+    assert "?16:66" in compact(block)
+
+
+def test_fade_renderer_uses_playout_buffer_and_markdown_rerender():
+    next_block = function_block(MESSAGES_JS, "_streamFadeNextText")
+    render_block = function_block(MESSAGES_JS, "_renderStreamingFadeMarkdown")
+
+    assert_contains_all(
+        next_block,
+        [
+            "targetText.startsWith(_streamFadeVisibleText)",
+            "wordsPerSecond",
+            "instantArrivalWps",
+            "backlogWords",
+            "streamAgeSeconds",
+            "caughtUp",
+        ],
+    )
+    assert_contains_all(
+        render_block,
+        [
+            "_streamFadeNextText(displayText)",
+            "renderMd ? renderMd(next.text||'')",
+            "stream-fade-active",
+            "_wrapStreamingFadeWords(assistantBody)",
+            "_sanitizeSmdLinks(assistantBody)",
+        ],
+    )
+
+
+def test_fade_animation_state_survives_markdown_rerenders():
+    block = function_block(MESSAGES_JS, "_wrapStreamingFadeWords")
+    assert_contains_all(
+        block,
+        [
+            "_streamFadeWordBornAt[wordIndex]",
+            "ageMs",
+            "animationDelay",
+            "--stream-fade-ms",
+            "_streamFadeLatestAnimationEndAt",
+            "_streamFadeWordBornAt.length=wordIndex+1",
+        ],
+    )
+    assert "filter:" not in STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :].split(
+        "[data-live-assistant", 1
+    )[0]
+    assert "translateY" not in STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :].split(
+        "[data-live-assistant", 1
+    )[0]
+
+
+def test_done_drain_finishes_fade_before_final_dom_replacement_and_blocks_late_mutations():
+    done_block = event_listener_block(MESSAGES_JS, "done")
+    drain_block = function_block(MESSAGES_JS, "_drainStreamFadeBeforeDone")
+
+    assert_contains_all(done_block, ["_terminalStateReached=true", "_drainStreamFadeBeforeDone(_finishDone)"])
+    assert_contains_all(drain_block, ["remainingAnimationMs", "_STREAM_FADE_MAX_STAGGER_MS", "requestAnimationFrame(step)"])
+
+    for event_name in ["token", "interim_assistant", "reasoning"]:
+        assert "if(_terminalStateReached||_streamFinalized) return;" in event_listener_block(MESSAGES_JS, event_name)
+
+
+def test_new_segments_reset_fade_state():
+    assert "_resetStreamFadeState()" in function_block(MESSAGES_JS, "_resetAssistantSegment")
+
+
+def test_fade_css_animates_words_and_hides_live_cursor():
+    fade_css = STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :]
+    assert_contains_all(
+        fade_css,
+        [
+            "@keyframes stream-fade-word-in",
+            ".stream-fade-word.is-new",
+            "var(--stream-fade-ms,140ms) ease-out",
+            ".msg-body.stream-fade-active > :last-child::after",
+            "display:none",
+            "content:none",
+        ],
+    )
+    assert "prefers-reduced-motion: reduce" not in fade_css
+
+
+def test_stream_fade_next_text_executes_and_advances_playout():
+    script = (
+        fade_helper_script("{_t:0,now(){this._t+=33;return this._t;}}")
+        + r"""
+const target='one two three four five six seven eight nine ten eleven twelve';
+const first=_streamFadeNextText(target);
+const second=_streamFadeNextText(target);
+if (!first.text || !second.text) throw new Error('no text revealed');
+if (second.text.length < first.text.length) throw new Error('playout regressed');
+"""
+    )
+    result = run_node(script)
+    assert "ReferenceError" not in result.stderr
+
+
+def test_stream_fade_ramps_above_steady_arrival_rate():
+    script = (
+        fade_helper_script()
+        + r"""
+const words=Array.from({length:240},(_,i)=>'w'+i);
+let shown=0;
+let targetCount=0;
+for(let frame=0;frame<240;frame++){
+  performance._t += 16;
+  // Simulate sustained fast generation: ~40 words/sec arriving.
+  targetCount = Math.min(words.length, Math.floor(performance._t/1000*40));
+  const out=_streamFadeNextText(words.slice(0,targetCount).join(' '));
+  shown=(out.text.match(/\S+/g)||[]).length;
+}
+const backlog=targetCount-shown;
+if(shown < 150) throw new Error(`too slow: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
+if(backlog > 10) throw new Error(`did not catch up: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
+"""
+    )
+    run_node(script)

From e9c985a4878adc427b327e69b178bbe4928d2daa Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Mon, 11 May 2026 15:15:58 -0600
Subject: [PATCH 02/12] fix: make stream rAF cancellation explicit

---
 STREAMING_FADE_HANDOFF.md |  2 +-
 static/messages.js        | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/STREAMING_FADE_HANDOFF.md b/STREAMING_FADE_HANDOFF.md
index ea3c8aeb..9596cf69 100644
--- a/STREAMING_FADE_HANDOFF.md
+++ b/STREAMING_FADE_HANDOFF.md
@@ -38,7 +38,7 @@ Added local fade state:
 Key helpers:
 
 - `_resetStreamFadeState()`
-- `_cancelPendingStreamRender()`
+- `_cancelAnimationFramePendingStreamRender()`
 - `_shouldUseStreamFade()`
 - `_streamFadeWordCountOf(text)`
 - `_streamFadeNextText(targetText)`
diff --git a/static/messages.js b/static/messages.js
index 9cdd6834..de00c253 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -695,7 +695,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     _streamFadeLatestAnimationEndAt=0;
     _streamFadeLastRevealCount=0;
   }
-  function _cancelPendingStreamRender(){
+  function _cancelAnimationFramePendingStreamRender(){
     if(_pendingRafHandle===null) return;
     cancelAnimationFrame(_pendingRafHandle);
     clearTimeout(_pendingRafHandle);
@@ -1197,7 +1197,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         // the DOM is settled by renderMessages, so no trailing token/reasoning rAF
         // can reintroduce a stale thinking card or duplicate content.
         _streamFinalized=true;
-        _cancelPendingStreamRender();
+        _cancelAnimationFramePendingStreamRender();
         if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
         // Finalize smd parser — flushes any remaining buffered markdown state
         // and runs Prism + copy buttons on the live segment before the DOM is replaced
@@ -1342,7 +1342,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         sendBrowserNotification('Response complete',assistantText?assistantText.slice(0,100):'Task finished');
       };
       if(_shouldUseStreamFade()&&assistantBody){
-        _cancelPendingStreamRender();
+        _cancelAnimationFramePendingStreamRender();
         _drainStreamFadeBeforeDone(_finishDone);
         return;
       }
@@ -1441,7 +1441,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _terminalStateReached=true;
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
       _streamFinalized=true;
-      _cancelPendingStreamRender();
+      _cancelAnimationFramePendingStreamRender();
       _smdEndParser();
       if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
       // Application-level error sent explicitly by the server (rate limit, crash, etc.)
@@ -1527,7 +1527,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _terminalStateReached=true;
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
       _streamFinalized=true;
-      _cancelPendingStreamRender();
+      _cancelAnimationFramePendingStreamRender();
       _smdEndParser();
       if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
       source.close();
@@ -1621,7 +1621,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     // cannot fire after renderMessages() has settled the DOM with the error message.
     if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
     _streamFinalized=true;
-    _cancelPendingStreamRender();
+    _cancelAnimationFramePendingStreamRender();
     if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
     _clearOwnerInflightState();
     _closeSource();

From 8ff368fd809312bd567011fe6aac0d92362f56ff Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Mon, 11 May 2026 17:38:12 -0600
Subject: [PATCH 03/12] clean up implementation

---
 static/messages.js             | 23 ++++++++++++++---------
 static/style.css               |  5 +++--
 tests/test_smooth_text_fade.py | 10 ++++++++--
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index de00c253..3e758359 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -524,6 +524,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   const _STREAM_FADE_MS=140;
   const _STREAM_FADE_WAVE_MS=320;
   const _STREAM_FADE_MAX_STAGGER_MS=520;
+  const _streamFadeEnabledForStream=window._fadeTextEffect===true;
 
   // rAF-throttled rendering: buffer tokens, render at most once per frame
   let _renderPending=false;
@@ -703,7 +704,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     _renderPending=false;
   }
   function _shouldUseStreamFade(){
-    return window._fadeTextEffect===true;
+    return _streamFadeEnabledForStream;
   }
   function _streamFadeSkipNode(node){
     if(!node||node.nodeType!==1) return false;
@@ -718,8 +719,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     targetText=String(targetText||'');
     const now=performance.now();
     if(!targetText){
+      const hadVisible=!!_streamFadeVisibleText;
       _resetStreamFadeState();
-      return {text:'', caughtUp:true};
+      return {text:'', caughtUp:true, changed:hadVisible};
     }
     if(!_streamFadeVisibleText||!targetText.startsWith(_streamFadeVisibleText)){
       // Markdown/tool stripping can rewrite the visible prefix. Reset safely rather than
@@ -730,7 +732,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _streamFadeLastTickMs=now;
       _streamFadeStartedAt=now;
     }
-    if(_streamFadeVisibleText===targetText) return {text:_streamFadeVisibleText,caughtUp:true};
+    if(_streamFadeVisibleText===targetText) return {text:_streamFadeVisibleText,caughtUp:true,changed:false};
 
     const remaining=targetText.slice(_streamFadeVisibleText.length);
     const backlogWords=_streamFadeWordCountOf(remaining);
@@ -778,7 +780,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     // fade for these waves so fast output stays visibly animated.
     const burstFloor=backlogWords>=120?24:backlogWords>=60?18:backlogWords>=30?12:wordsPerSecond>=300?8:wordsPerSecond>=220?6:0;
     if(burstFloor>0) wordsToReveal=Math.max(wordsToReveal, Math.min(burstFloor, backlogWords));
-    if(wordsToReveal<1){_streamFadeLastRevealCount=0;return {text:_streamFadeVisibleText,caughtUp:false};}
+    if(wordsToReveal<1){_streamFadeLastRevealCount=0;return {text:_streamFadeVisibleText,caughtUp:false,changed:false};}
     _streamFadeLastRevealCount=Math.min(wordsToReveal, backlogWords);
     _streamFadeWordCarry=Math.max(0,_streamFadeWordCarry-wordsToReveal);
 
@@ -792,11 +794,12 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     if(cut<=0) cut=Math.min(remaining.length,4);
     _streamFadeVisibleText+=remaining.slice(0,cut);
     if(_streamFadeVisibleText.length>targetText.length) _streamFadeVisibleText=targetText;
-    return {text:_streamFadeVisibleText,caughtUp:_streamFadeVisibleText===targetText};
+    return {text:_streamFadeVisibleText,caughtUp:_streamFadeVisibleText===targetText,changed:true};
   }
   function _renderStreamingFadeMarkdown(displayText){
     if(!assistantBody) return true;
     const next=_streamFadeNextText(displayText);
+    if(!next.changed) return next.caughtUp;
     const html=renderMd ? renderMd(next.text||'') : esc(next.text||'');
     assistantBody.innerHTML=html;
     assistantBody.classList.add('stream-fade-active');
@@ -835,7 +838,6 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       while((match=wordRe.exec(value))){
         if(match.index>last) frag.appendChild(document.createTextNode(value.slice(last,match.index)));
         wordIndex+=1;
-        const span=document.createElement('span');
         if(!_streamFadeWordBornAt[wordIndex]){
           const newWordOffset=Math.max(wordIndex-_streamFadeWordCount-1,0);
           // High-speed output should feel like a continuous animated sweep, not
@@ -847,14 +849,17 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,_streamFadeWordBornAt[wordIndex]+fadeMs);
         const ageMs=now-_streamFadeWordBornAt[wordIndex];
         const isAnimating=ageMs<fadeMs;
-        span.className=isAnimating?'stream-fade-word is-new':'stream-fade-word';
         if(isAnimating){
+          const span=document.createElement('span');
+          span.className='stream-fade-word is-new';
           const delayMs=Math.max(-fadeMs, -ageMs);
           span.style.animationDelay=delayMs+'ms';
           span.style.setProperty('--stream-fade-ms',fadeMs+'ms');
+          span.textContent=match[1];
+          frag.appendChild(span);
+        }else{
+          frag.appendChild(document.createTextNode(match[1]));
         }
-        span.textContent=match[1];
-        frag.appendChild(span);
         if(match[2]) frag.appendChild(document.createTextNode(match[2]));
         last=match.index+match[0].length;
         changed=true;
diff --git a/static/style.css b/static/style.css
index 9e949ef3..4d2bd92a 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3745,8 +3745,9 @@ main.main.showing-logs > #mainLogs{display:flex;}
 
 /* OpenWebUI-style streaming word fade (opt-in via Settings → Preferences).
    Opacity-only fade; high-speed streams use a longer JS-driven wave duration. */
-.stream-fade-active .stream-fade-word{display:inline;will-change:opacity;}
-.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,140ms) ease-out both;}
+.stream-fade-active .stream-fade-word{display:inline;}
+.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,140ms) cubic-bezier(.2,.7,.2,1) both;will-change:opacity;}
 @keyframes stream-fade-word-in{from{opacity:0;}to{opacity:1;}}
+@media (prefers-reduced-motion: reduce){.stream-fade-word.is-new{animation:none;will-change:auto;}}
 [data-live-assistant="1"]:last-child .msg-body.stream-fade-active > :last-child::after,
 [data-live-assistant="1"]:last-child .msg-body.stream-fade-active:not(:has(> *))::after{display:none;content:none;}
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index 3ab4ffbe..5a8e0bc8 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -84,6 +84,8 @@ let _streamFadeLastArrivalMs=0;
 let _streamFadeArrivalWps=0;
 let _streamFadeLatestAnimationEndAt=0;
 let _streamFadeLastRevealCount=0;
+let _streamFadeWordCount=0;
+let _streamFadeWordBornAt=[];
 const _STREAM_FADE_MS=140;
 const _STREAM_FADE_WAVE_MS=320;
 const _STREAM_FADE_MAX_STAGGER_MS=520;
@@ -193,6 +195,7 @@ def test_fade_renderer_uses_playout_buffer_and_markdown_rerender():
         render_block,
         [
             "_streamFadeNextText(displayText)",
+            "if(!next.changed) return next.caughtUp",
             "renderMd ? renderMd(next.text||'')",
             "stream-fade-active",
             "_wrapStreamingFadeWords(assistantBody)",
@@ -210,6 +213,7 @@ def test_fade_animation_state_survives_markdown_rerenders():
             "ageMs",
             "animationDelay",
             "--stream-fade-ms",
+            "span.className='stream-fade-word is-new'",
             "_streamFadeLatestAnimationEndAt",
             "_streamFadeWordBornAt.length=wordIndex+1",
         ],
@@ -244,13 +248,15 @@ def test_fade_css_animates_words_and_hides_live_cursor():
         [
             "@keyframes stream-fade-word-in",
             ".stream-fade-word.is-new",
-            "var(--stream-fade-ms,140ms) ease-out",
+            "var(--stream-fade-ms,140ms) cubic-bezier(.2,.7,.2,1)",
+            "prefers-reduced-motion: reduce",
             ".msg-body.stream-fade-active > :last-child::after",
             "display:none",
             "content:none",
         ],
     )
-    assert "prefers-reduced-motion: reduce" not in fade_css
+    assert ".stream-fade-active .stream-fade-word{display:inline;}" in fade_css
+    assert ".stream-fade-active .stream-fade-word{display:inline;will-change:opacity;}" not in fade_css
 
 
 def test_stream_fade_next_text_executes_and_advances_playout():

From 7fa2f7031478b9d2680b038b084da8f19cc3ca60 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Mon, 11 May 2026 18:02:42 -0600
Subject: [PATCH 04/12] optimize performance & tune readability

---
 STREAMING_FADE_HANDOFF.md      | 289 ---------------------------------
 static/messages.js             | 184 ++++++++++-----------
 tests/test_smooth_text_fade.py |  69 ++++++--
 3 files changed, 141 insertions(+), 401 deletions(-)
 delete mode 100644 STREAMING_FADE_HANDOFF.md

diff --git a/STREAMING_FADE_HANDOFF.md b/STREAMING_FADE_HANDOFF.md
deleted file mode 100644
index 9596cf69..00000000
--- a/STREAMING_FADE_HANDOFF.md
+++ /dev/null
@@ -1,289 +0,0 @@
-# Streaming Fade Text Effect Handoff
-
-## Summary
-
-This branch adds an opt-in **Fade text effect** preference for HermesWebUI streaming assistant responses.
-
-When enabled, newly streamed assistant words fade in instead of appearing via the default incremental markdown renderer. The goal is a ChatGPT/OpenWebUI-like animated streaming feel while still catching up to high-throughput model output.
-
-The feature is **off by default** for performance.
-
-## User-facing behavior
-
-- New setting: **Settings → Preferences → Fade text effect**
-- Runtime global: `window._fadeTextEffect`
-- Default: `false`
-- When enabled:
-  - assistant stream uses a playout buffer rather than immediately rendering the full incoming chunk
-  - visible text advances at adaptive speed based on live incoming word velocity, backlog, and stream age
-  - new words are wrapped in spans and animated with opacity-only fade
-  - high-speed output uses rolling phrase-sized waves instead of giant block pops
-  - Hermes' bright live cursor is hidden during fade mode
-
-## Main files changed
-
-### `static/messages.js`
-
-Core streaming implementation inside `attachLiveStream(...)`.
-
-Added local fade state:
-
-- `_streamFadeVisibleText`
-- `_streamFadeWordCarry`
-- `_streamFadeWordBornAt`
-- `_streamFadeArrivalWps`
-- `_streamFadeLastRevealCount`
-- `_streamFadeLatestAnimationEndAt`
-
-Key helpers:
-
-- `_resetStreamFadeState()`
-- `_cancelAnimationFramePendingStreamRender()`
-- `_shouldUseStreamFade()`
-- `_streamFadeWordCountOf(text)`
-- `_streamFadeNextText(targetText)`
-- `_renderStreamingFadeMarkdown(displayText)`
-- `_wrapStreamingFadeWords(root)`
-- `_drainStreamFadeBeforeDone(onDone)`
-
-Important behavior:
-
-- Fade mode renders at ~60fps (`16ms`) while default streaming remains ~15fps (`66ms`).
-- Default SMD streaming path remains intact when fade mode is off.
-- On `done`, fade mode drains remaining buffered text and waits for the final stagger/fade window before the final `renderMessages()` replacement.
-- Prefix resets now call `_resetStreamFadeState()` so stale birth timestamps do not leak across markdown/tool-call rewrites.
-
-### `static/style.css`
-
-Adds opacity-only streaming fade CSS:
-
-```css
-.stream-fade-word.is-new {
-  animation: stream-fade-word-in var(--stream-fade-ms,140ms) ease-out both;
-}
-@keyframes stream-fade-word-in { from { opacity:0; } to { opacity:1; } }
-```
-
-Also hides the live cursor during fade mode:
-
-```css
-[data-live-assistant="1"]:last-child .msg-body.stream-fade-active > :last-child::after,
-[data-live-assistant="1"]:last-child .msg-body.stream-fade-active:not(:has(> *))::after {
-  display:none;
-  content:none;
-}
-```
-
-### Settings plumbing
-
-- `api/config.py`
-  - adds `fade_text_effect` default and bool key
-- `static/boot.js`
-  - initializes `window._fadeTextEffect`
-- `static/index.html`
-  - adds Preferences checkbox
-- `static/panels.js`
-  - loads, autosaves, and saves the setting
-- `static/i18n.js`
-  - adds locale strings for all supported locales
-
-### Tests
-
-New file:
-
-- `tests/test_smooth_text_fade.py`
-
-Coverage includes:
-
-- setting persistence/config plumbing
-- Preferences UI plumbing
-- i18n key presence
-- fade helper presence
-- executable Node regressions that invoke `_streamFadeNextText(...)`
-- speed-ramp behavior
-- high-speed rolling-wave behavior
-- done-drain behavior
-- CSS expectations
-- cursor hiding
-
-## Tunable constants
-
-Defined near the top of `attachLiveStream(...)` in `static/messages.js`:
-
-```js
-const _STREAM_FADE_MS=140;
-const _STREAM_FADE_WAVE_MS=320;
-const _STREAM_FADE_MAX_STAGGER_MS=520;
-```
-
-Meaning:
-
-- `_STREAM_FADE_MS`: base fade duration for normal streaming
-- `_STREAM_FADE_WAVE_MS`: longer duration for high-speed multi-word waves
-- `_STREAM_FADE_MAX_STAGGER_MS`: max stagger spread across newly inserted words
-
-Adaptive playout speed currently uses:
-
-```js
-const baseWps = 30 + Math.min(streamAgeSeconds * 4, 35); // 30 → 65 wps
-const arrivalWps = _streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps * 2.4 + 20, 320) : 0;
-const backlogWps = backlogWords > 0 ? Math.min(30 + backlogWords * 8, 420) : 0;
-const wordsPerSecond = Math.min(420, Math.max(baseWps, arrivalWps, backlogWps));
-```
-
-Rolling burst floor:
-
-```js
-const burstFloor = backlogWords >= 120 ? 24
-  : backlogWords >= 60 ? 18
-  : backlogWords >= 30 ? 12
-  : wordsPerSecond >= 300 ? 8
-  : wordsPerSecond >= 220 ? 6
-  : 0;
-```
-
-High-speed waves then use:
-
-```js
-const fadeMs = revealedThisFrame >= 8 ? _STREAM_FADE_WAVE_MS
-  : revealedThisFrame >= 4 ? 240
-  : _STREAM_FADE_MS;
-
-const waveStepMs = revealedThisFrame >= 18 ? 18
-  : revealedThisFrame >= 8 ? 22
-  : revealedThisFrame >= 4 ? 16
-  : 10;
-```
-
-## Design decisions and why
-
-### Why not use only OpenWebUI's renderer?
-
-A wholesale renderer transplant was avoided. Hermes keeps its existing streaming markdown path as default, and fade mode is a selective cosmetic layer.
-
-### Why a playout buffer?
-
-Hermes receives backend stream chunks that can arrive faster or more bursty than desired visually. Rendering each chunk immediately can pop large text blocks into the DOM. The playout buffer separates:
-
-- text received from backend (`assistantText`)
-- text currently visible (`_streamFadeVisibleText`)
-
-### Why adaptive speed?
-
-A fixed reveal rate felt robotic and lagged behind faster models. Earlier attempts using session-wide average arrival rate failed when the model spent time reasoning before writing because the denominator inflated and the ramp never triggered.
-
-Current approach tracks **live target-word arrival velocity** using deltas:
-
-```js
-const instantArrivalWps = (targetWords - _streamFadeLastTargetWords) * 1000 / arrivalElapsedMs;
-_streamFadeArrivalWps = _streamFadeArrivalWps
-  ? (_streamFadeArrivalWps * 0.65 + instantArrivalWps * 0.35)
-  : instantArrivalWps;
-```
-
-Then playout deliberately exceeds arrival velocity so it catches up.
-
-### Why rolling waves?
-
-At very high throughput, revealing too many words in one frame felt chunky and made the fade almost disappear. The current implementation reduces one-frame burst size and stretches/staggers high-speed waves across several hundred milliseconds.
-
-This makes fast output feel more like animated text sweeping in rather than paragraph blocks appearing.
-
-## Performance notes
-
-Fade mode is more expensive than the default streaming path because it re-renders markdown and wraps visible text nodes during active streaming.
-
-Mitigations:
-
-- feature is opt-in and off by default
-- default streaming-markdown path remains unchanged when disabled
-- fade render cadence is capped at ~60fps
-- skip wrapping inside `pre`, `code`, `script`, `style`, `textarea`, `svg`, and `math`
-- animation is opacity-only, compositor-friendly
-
-Expected impact:
-
-- fine on modern desktop/Apple Silicon hardware
-- higher CPU/battery use during long/high-speed responses
-- users can disable it instantly from Preferences
-
-## Verification performed
-
-Commands run successfully:
-
-```bash
-cd /Users/agent/HermesWebUI
-PY=/Users/agent/.hermes/hermes-agent/venv/bin/python
-$PY -m pytest tests/test_smooth_text_fade.py tests/test_1003_preferences_autosave.py tests/test_streaming_markdown.py tests/test_chinese_locale.py tests/test_japanese_locale.py tests/test_korean_locale.py tests/test_russian_locale.py tests/test_spanish_locale.py -q
-node --check static/messages.js static/panels.js static/boot.js static/i18n.js
-$PY -m py_compile api/config.py
-git diff --check
-```
-
-Latest result before writing this handoff:
-
-```text
-99 passed
-```
-
-Also performed:
-
-- dead/debug scan over diff for `TODO`, `FIXME`, `console.log`, `debugger`, stale `100ms`, stale `220ms`, stale `48` burst constants
-- review cleanup: blocked late `token` / `reasoning` / `interim_assistant` mutations during fade done-drain, moved fade wave calculations out of the per-word hot path, and made manual Settings save refresh `window._fadeTextEffect`
-- HermesWebUI restart via launchctl
-- live asset verification via `curl http://127.0.0.1:8787/static/messages.js`
-- real chat/SSE smoke test: temp session, prompt `Reply with exactly: OK`, received `OK`, got `done`, deleted temp session
-
-## Current service state when last verified
-
-- HermesWebUI runs on port `8787`
-- Restarted during validation
-- Health endpoint returned OK
-
-Useful checks:
-
-```bash
-curl -fsS http://127.0.0.1:8787/health
-curl -fsS http://127.0.0.1:8787/static/messages.js | grep -E "_STREAM_FADE_WAVE_MS=320|_STREAM_FADE_MAX_STAGGER_MS=520|burstFloor=backlogWords>=120\?24"
-curl -fsS http://127.0.0.1:8787/static/style.css | grep -E "var\(--stream-fade-ms,140ms\)|stream-fade-word-in"
-```
-
-## Known caveats
-
-- LLM telemetry often reports **tokens/sec**, while the UI reveals visible words. These are not equivalent.
-- The renderer cannot reveal text before complete visible text exists.
-- If backend chunks arrive as very large bursts, the rolling-wave logic smooths them but may still require subjective tuning.
-- The current visual is close, but final merge review should include manual browser testing with:
-  - normal-speed model
-  - high-throughput model (~100+ tok/s)
-  - long markdown responses
-  - code blocks
-  - lists/tables
-  - tool-call-heavy responses
-
-## Suggested next review steps
-
-1. Manually test in browser after hard refresh (`Cmd+Shift+R`).
-2. Try a high-throughput long essay and tune only these constants if needed:
-   - `_STREAM_FADE_WAVE_MS`
-   - `_STREAM_FADE_MAX_STAGGER_MS`
-   - burst floor thresholds
-   - `waveStepMs`
-3. Check the diff for whether the `done` handler reindent is acceptable for the PR. It is intentional because the original done body is now wrapped in `_finishDone` so fade mode can drain before final DOM replacement.
-4. If submitting PR, mention the feature is opt-in/off-by-default and the default streaming markdown path remains unchanged.
-
-## Files to include in PR
-
-Expected modified/new files:
-
-```text
-api/config.py
-static/boot.js
-static/i18n.js
-static/index.html
-static/messages.js
-static/panels.js
-static/style.css
-tests/test_smooth_text_fade.py
-STREAMING_FADE_HANDOFF.md
-```
diff --git a/static/messages.js b/static/messages.js
index 3e758359..865445e3 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -510,17 +510,16 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   // the final answer or the response to render twice.
   let _streamFinalized=false;
   let _pendingRafHandle=null;
-  let _streamFadeWordCount=0;
   let _streamFadeVisibleText='';
   let _streamFadeLastTickMs=0;
   let _streamFadeWordCarry=0;
-  let _streamFadeWordBornAt=[];
   let _streamFadeStartedAt=0;
   let _streamFadeLastTargetWords=0;
   let _streamFadeLastArrivalMs=0;
   let _streamFadeArrivalWps=0;
   let _streamFadeLatestAnimationEndAt=0;
   let _streamFadeLastRevealCount=0;
+  let _streamFadeAppendOffset=0;
   const _STREAM_FADE_MS=140;
   const _STREAM_FADE_WAVE_MS=320;
   const _STREAM_FADE_MAX_STAGGER_MS=520;
@@ -616,11 +615,11 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   }
   // Helper: create (or recreate) the smd parser bound to a given DOM element.
   // Called when assistantBody is first created and after each tool-call segment reset.
-  function _smdNewParser(el){
+  function _smdNewParser(el, fade=false){
     _smdWrittenLen=0;
     _smdWrittenText='';
     if(!window.smd){_smdParser=null;return;}
-    const renderer=window.smd.default_renderer(el);
+    const renderer=fade ? _streamFadeRenderer(el) : window.smd.default_renderer(el);
     _smdParser=window.smd.parser(renderer);
   }
   // Helper: end the current smd parser (flushes remaining state) and null it out.
@@ -637,7 +636,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   }
   // Helper: feed new displayText delta to the smd parser.
   // Only feeds chars beyond what has already been written (_smdWrittenLen).
-  function _smdWrite(displayText){
+  function _smdWrite(displayText, fade=false){
     if(!_smdParser||!window.smd) return;
     displayText=String(displayText||'');
     // Self-heal desyncs: if displayText no longer starts with what we've already
@@ -648,7 +647,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _smdWrittenLen=0;
       _smdWrittenText='';
       if(assistantBody) assistantBody.innerHTML='';
-      _smdNewParser(assistantBody);
+      _smdNewParser(assistantBody,fade);
       if(!_smdParser) return;
     }
     const delta=displayText.slice(_smdWrittenText.length);
@@ -656,15 +655,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     try{window.smd.parser_write(_smdParser,delta);}catch(_){}
     _smdWrittenLen=displayText.length;
     _smdWrittenText=displayText;
-    // streaming-markdown does NOT sanitize URL schemes — `[click](javascript:...)`
-    // and `![alt](javascript:...)` survive as href/src.  Strip any unsafe schemes
-    // from anchors/images that were just added to the live DOM.  The existing
-    // renderMd() path filters these via its http(s)-only regex; we need a matching
-    // guard here so the live-stream path isn't an XSS vector for agent-echoed
-    // prompt-injection content.  The final renderMessages() call at `done` uses
-    // renderMd which is already safe, but during streaming the user could click
-    // a malicious link before that replacement happens.
-    if(assistantBody){_sanitizeSmdLinks(assistantBody);}
+    // streaming-markdown does NOT sanitize URL schemes. The default live path
+    // scans after writes; fade mode blocks unsafe href/src in its renderer.set_attr.
+    if(assistantBody&&!fade){_sanitizeSmdLinks(assistantBody);}
   }
   // Allowed URL schemes for anchors and images rendered from agent-streamed markdown.
   // Matches the effective allowlist of renderMd() (http/https via regex + relative).
@@ -684,17 +677,16 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   }
 
   function _resetStreamFadeState(){
-    _streamFadeWordCount=0;
     _streamFadeVisibleText='';
     _streamFadeLastTickMs=0;
     _streamFadeWordCarry=0;
-    _streamFadeWordBornAt=[];
     _streamFadeStartedAt=0;
     _streamFadeLastTargetWords=0;
     _streamFadeLastArrivalMs=0;
     _streamFadeArrivalWps=0;
     _streamFadeLatestAnimationEndAt=0;
     _streamFadeLastRevealCount=0;
+    _streamFadeAppendOffset=0;
   }
   function _cancelAnimationFramePendingStreamRender(){
     if(_pendingRafHandle===null) return;
@@ -711,6 +703,60 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     const tag=(node.tagName||'').toLowerCase();
     return tag==='pre'||tag==='code'||tag==='script'||tag==='style'||tag==='textarea'||tag==='svg'||tag==='math';
   }
+  function _streamFadeRenderer(el){
+    const renderer=window.smd.default_renderer(el);
+    const baseAddText=renderer.add_text;
+    const baseSetAttr=renderer.set_attr;
+    renderer.add_text=(data,text)=>{
+      const parent=data&&data.nodes&&data.nodes[data.index];
+      if(!parent||_streamFadeSkipNode(parent)){baseAddText(data,text);return;}
+      const frag=document.createDocumentFragment();
+      const wordRe=/(\S+)(\s*)/g;
+      const value=String(text||'');
+      const revealedThisFrame=Math.max(1,_streamFadeLastRevealCount||1);
+      const fadeMs=revealedThisFrame>=8?_STREAM_FADE_WAVE_MS:revealedThisFrame>=4?240:_STREAM_FADE_MS;
+      const waveStepMs=revealedThisFrame>=18?18:revealedThisFrame>=8?22:revealedThisFrame>=4?16:12;
+      const reduceMotion=window.matchMedia&&window.matchMedia('(prefers-reduced-motion: reduce)').matches;
+      let last=0, match, changed=false;
+      while((match=wordRe.exec(value))){
+        if(match.index>last) frag.appendChild(document.createTextNode(value.slice(last,match.index)));
+        if(reduceMotion){
+          frag.appendChild(document.createTextNode(match[1]));
+          if(match[2]) frag.appendChild(document.createTextNode(match[2]));
+          last=match.index+match[0].length;
+          changed=true;
+          continue;
+        }
+        const span=document.createElement('span');
+        span.className='stream-fade-word is-new';
+        const delayMs=Math.min(_streamFadeAppendOffset*waveStepMs,_STREAM_FADE_MAX_STAGGER_MS);
+        span.style.animationDelay=delayMs+'ms';
+        span.style.setProperty('--stream-fade-ms',fadeMs+'ms');
+        span.textContent=match[1];
+        span.addEventListener('animationend',()=>span.replaceWith(document.createTextNode(span.textContent||'')),{once:true});
+        frag.appendChild(span);
+        _streamFadeAppendOffset+=1;
+        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,performance.now()+delayMs+fadeMs);
+        if(match[2]) frag.appendChild(document.createTextNode(match[2]));
+        last=match.index+match[0].length;
+        changed=true;
+      }
+      if(!changed){baseAddText(data,text);return;}
+      if(last<value.length) frag.appendChild(document.createTextNode(value.slice(last)));
+      parent.appendChild(frag);
+    };
+    renderer.set_attr=(data,attr,value)=>{
+      const isHref=window.smd&&attr===window.smd.HREF;
+      const isSrc=window.smd&&attr===window.smd.SRC;
+      if((isHref||isSrc)&&!_SMD_SAFE_URL_RE.test(String(value||''))){
+        const node=data&&data.nodes&&data.nodes[data.index];
+        if(node&&node.setAttribute) node.setAttribute('data-blocked-scheme','1');
+        return;
+      }
+      baseSetAttr(data,attr,value);
+    };
+    return renderer;
+  }
   function _streamFadeWordCountOf(text){
     const m=String(text||'').match(/\S+/g);
     return m?m.length:0;
@@ -765,21 +811,18 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     }
 
     const streamAgeSeconds=Math.max(0, (now-(_streamFadeStartedAt||now))/1000);
-    const baseWps=30 + Math.min(streamAgeSeconds*4, 35); // 30 → 65 wps over long answers
-    const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*2.4 + 20, 320) : 0;
-    const backlogWps=backlogWords>0 ? Math.min(30 + backlogWords*8, 420) : 0;
-    const wordsPerSecond=Math.min(420, Math.max(baseWps, arrivalWps, backlogWps));
+    const baseWps=22 + Math.min(streamAgeSeconds*2.5, 28); // 22 → 50 wps over long answers
+    const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*1.05 + 8, 90) : 0;
+    const backlogWps=backlogWords>0 ? Math.min(22 + backlogWords*1.1, 100) : 0;
+    const wordsPerSecond=Math.min(100, Math.max(baseWps, arrivalWps, backlogWps));
 
-    // High cap allows line/sentence-sized catch-up, while the rolling wave below
-    // keeps the visual effect animated instead of collapsing into block pops.
     _streamFadeWordCarry+=elapsedMs*wordsPerSecond/1000;
     if(!_streamFadeVisibleText) _streamFadeWordCarry=Math.max(_streamFadeWordCarry,1);
     let wordsToReveal=Math.floor(_streamFadeWordCarry);
-    // At very high throughput, reveal in rolling phrase-sized waves instead of
-    // dumping whole paragraphs in one frame. The wrapper lengthens/staggers the
-    // fade for these waves so fast output stays visibly animated.
-    const burstFloor=backlogWords>=120?24:backlogWords>=60?18:backlogWords>=30?12:wordsPerSecond>=300?8:wordsPerSecond>=220?6:0;
-    if(burstFloor>0) wordsToReveal=Math.max(wordsToReveal, Math.min(burstFloor, backlogWords));
+    // At very high throughput, cap each frame to a small readable wave. Sustained
+    // playback still catches up, but whole paragraphs no longer pop in at once.
+    const waveCap=backlogWords>=160?3:2;
+    wordsToReveal=Math.min(wordsToReveal,waveCap,backlogWords);
     if(wordsToReveal<1){_streamFadeLastRevealCount=0;return {text:_streamFadeVisibleText,caughtUp:false,changed:false};}
     _streamFadeLastRevealCount=Math.min(wordsToReveal, backlogWords);
     _streamFadeWordCarry=Math.max(0,_streamFadeWordCarry-wordsToReveal);
@@ -792,6 +835,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       wordsToReveal-=1;
     }
     if(cut<=0) cut=Math.min(remaining.length,4);
+    const paragraphBreak=remaining.slice(0,cut).search(/\n\s*\n/);
+    if(paragraphBreak>0) cut=paragraphBreak+2;
     _streamFadeVisibleText+=remaining.slice(0,cut);
     if(_streamFadeVisibleText.length>targetText.length) _streamFadeVisibleText=targetText;
     return {text:_streamFadeVisibleText,caughtUp:_streamFadeVisibleText===targetText,changed:true};
@@ -800,76 +845,19 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     if(!assistantBody) return true;
     const next=_streamFadeNextText(displayText);
     if(!next.changed) return next.caughtUp;
-    const html=renderMd ? renderMd(next.text||'') : esc(next.text||'');
-    assistantBody.innerHTML=html;
     assistantBody.classList.add('stream-fade-active');
-    _wrapStreamingFadeWords(assistantBody);
-    _sanitizeSmdLinks(assistantBody);
-    return next.caughtUp;
-  }
-  function _wrapStreamingFadeWords(root){
-    if(!root||!document.createTreeWalker) return;
-    const textNodes=[];
-    const walker=document.createTreeWalker(root,NodeFilter.SHOW_TEXT,{
-      acceptNode(node){
-        if(!node||!node.nodeValue||!node.nodeValue.trim()) return NodeFilter.FILTER_REJECT;
-        let parent=node.parentElement;
-        while(parent&&parent!==root){
-          if(parent.classList&&parent.classList.contains('stream-fade-word')) return NodeFilter.FILTER_REJECT;
-          if(_streamFadeSkipNode(parent)) return NodeFilter.FILTER_REJECT;
-          parent=parent.parentElement;
-        }
-        return NodeFilter.FILTER_ACCEPT;
-      }
-    });
-    let node;
-    while((node=walker.nextNode())) textNodes.push(node);
-    let wordIndex=0;
-    const now=performance.now();
-    const wordRe=/(\S+)(\s*)/g;
-    const revealedThisFrame=Math.max(1,_streamFadeLastRevealCount||1);
-    const fadeMs=revealedThisFrame>=8?_STREAM_FADE_WAVE_MS:revealedThisFrame>=4?240:_STREAM_FADE_MS;
-    const waveStepMs=revealedThisFrame>=18?18:revealedThisFrame>=8?22:revealedThisFrame>=4?16:10;
-    for(const textNode of textNodes){
-      const value=textNode.nodeValue||'';
-      wordRe.lastIndex=0;
-      const frag=document.createDocumentFragment();
-      let last=0, match, changed=false;
-      while((match=wordRe.exec(value))){
-        if(match.index>last) frag.appendChild(document.createTextNode(value.slice(last,match.index)));
-        wordIndex+=1;
-        if(!_streamFadeWordBornAt[wordIndex]){
-          const newWordOffset=Math.max(wordIndex-_streamFadeWordCount-1,0);
-          // High-speed output should feel like a continuous animated sweep, not
-          // a block pop. Keep bursts smaller, but stretch/stagger each wave so
-          // multiple sentences can fade across several hundred milliseconds.
-          const staggerMs=Math.min(newWordOffset*waveStepMs,_STREAM_FADE_MAX_STAGGER_MS);
-          _streamFadeWordBornAt[wordIndex]=now+staggerMs;
-        }
-        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,_streamFadeWordBornAt[wordIndex]+fadeMs);
-        const ageMs=now-_streamFadeWordBornAt[wordIndex];
-        const isAnimating=ageMs<fadeMs;
-        if(isAnimating){
-          const span=document.createElement('span');
-          span.className='stream-fade-word is-new';
-          const delayMs=Math.max(-fadeMs, -ageMs);
-          span.style.animationDelay=delayMs+'ms';
-          span.style.setProperty('--stream-fade-ms',fadeMs+'ms');
-          span.textContent=match[1];
-          frag.appendChild(span);
-        }else{
-          frag.appendChild(document.createTextNode(match[1]));
-        }
-        if(match[2]) frag.appendChild(document.createTextNode(match[2]));
-        last=match.index+match[0].length;
-        changed=true;
-      }
-      if(!changed) continue;
-      if(last<value.length) frag.appendChild(document.createTextNode(value.slice(last)));
-      textNode.replaceWith(frag);
+    if(!_smdParser&&window.smd){
+      if(_smdReconnect){assistantBody.innerHTML='';_smdReconnect=false;}
+      _smdNewParser(assistantBody,true);
     }
-    _streamFadeWordBornAt.length=wordIndex+1;
-    _streamFadeWordCount=wordIndex;
+    if(_smdParser){
+      _streamFadeAppendOffset=0;
+      _smdWrite(next.text,true);
+    }else{
+      assistantBody.innerHTML=renderMd ? renderMd(next.text||'') : esc(next.text||'');
+      _sanitizeSmdLinks(assistantBody);
+    }
+    return next.caughtUp;
   }
   function _streamFadeCurrentDisplayText(){
     const parsed=_parseStreamState();
@@ -884,6 +872,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       const caughtUp=_renderStreamingFadeMarkdown(target);
       scrollIfPinned();
       if(caughtUp){
+        // parser_end can flush pending markdown text; include that final text in
+        // the fade wait instead of replacing it immediately in renderMessages().
+        if(_smdParser) _smdEndParser();
         // Let the last released words visibly finish their stagger + fade before
         // the final renderMessages() DOM replacement removes the live spans.
         const remainingAnimationMs=Math.max(_STREAM_FADE_MS, _streamFadeLatestAnimationEndAt-performance.now());
@@ -930,7 +921,6 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
           ? parsed.displayText                          // first segment: uses think-tag stripping
           : _stripXmlToolCalls(assistantText.slice(segmentStart));
         if(_shouldUseStreamFade()){
-          _smdEndParser();
           const caughtUp=_renderStreamingFadeMarkdown(displayText);
           if(!caughtUp&&!_streamFinalized){
             setTimeout(()=>_scheduleRender(), 24);
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index 5a8e0bc8..ef3e116a 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -84,8 +84,7 @@ let _streamFadeLastArrivalMs=0;
 let _streamFadeArrivalWps=0;
 let _streamFadeLatestAnimationEndAt=0;
 let _streamFadeLastRevealCount=0;
-let _streamFadeWordCount=0;
-let _streamFadeWordBornAt=[];
+let _streamFadeAppendOffset=0;
 const _STREAM_FADE_MS=140;
 const _STREAM_FADE_WAVE_MS=320;
 const _STREAM_FADE_MAX_STAGGER_MS=520;
@@ -148,7 +147,7 @@ def test_fade_helpers_and_constants_exist():
         "_streamFadeNextText",
         "_streamFadeWordCountOf",
         "_renderStreamingFadeMarkdown",
-        "_wrapStreamingFadeWords",
+        "_streamFadeRenderer",
         "_streamFadeSkipNode",
         "_drainStreamFadeBeforeDone",
     ]:
@@ -161,7 +160,7 @@ def test_fade_helpers_and_constants_exist():
             "const _STREAM_FADE_WAVE_MS=320",
             "const _STREAM_FADE_MAX_STAGGER_MS=520",
             "_streamFadeVisibleText",
-            "_streamFadeWordBornAt",
+            "_streamFadeAppendOffset",
             "_streamFadeArrivalWps",
         ],
     )
@@ -176,7 +175,7 @@ def test_schedule_render_keeps_default_smd_path_when_fade_is_off():
     assert "?16:66" in compact(block)
 
 
-def test_fade_renderer_uses_playout_buffer_and_markdown_rerender():
+def test_fade_renderer_uses_playout_buffer_and_incremental_markdown():
     next_block = function_block(MESSAGES_JS, "_streamFadeNextText")
     render_block = function_block(MESSAGES_JS, "_renderStreamingFadeMarkdown")
 
@@ -196,26 +195,31 @@ def test_fade_renderer_uses_playout_buffer_and_markdown_rerender():
         [
             "_streamFadeNextText(displayText)",
             "if(!next.changed) return next.caughtUp",
-            "renderMd ? renderMd(next.text||'')",
+            "_smdNewParser(assistantBody,true)",
+            "_smdWrite(next.text,true)",
             "stream-fade-active",
-            "_wrapStreamingFadeWords(assistantBody)",
-            "_sanitizeSmdLinks(assistantBody)",
         ],
     )
+    assert "renderMd ? renderMd(next.text||'')" in render_block
+    assert "_wrapStreamingFadeWords" not in MESSAGES_JS
 
 
-def test_fade_animation_state_survives_markdown_rerenders():
-    block = function_block(MESSAGES_JS, "_wrapStreamingFadeWords")
+def test_fade_renderer_animates_new_text_and_cleans_up_spans():
+    block = function_block(MESSAGES_JS, "_streamFadeRenderer")
     assert_contains_all(
         block,
         [
-            "_streamFadeWordBornAt[wordIndex]",
-            "ageMs",
+            "renderer.add_text",
+            "waveStepMs",
             "animationDelay",
             "--stream-fade-ms",
             "span.className='stream-fade-word is-new'",
+            "animationend",
+            "span.replaceWith(document.createTextNode",
+            "prefers-reduced-motion: reduce",
+            "renderer.set_attr",
+            "data-blocked-scheme",
             "_streamFadeLatestAnimationEndAt",
-            "_streamFadeWordBornAt.length=wordIndex+1",
         ],
     )
     assert "filter:" not in STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :].split(
@@ -289,8 +293,43 @@ for(let frame=0;frame<240;frame++){
   shown=(out.text.match(/\S+/g)||[]).length;
 }
 const backlog=targetCount-shown;
-if(shown < 150) throw new Error(`too slow: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
-if(backlog > 10) throw new Error(`did not catch up: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
+if(shown < 145) throw new Error(`too slow: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
+if(backlog > 15) throw new Error(`did not catch up: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
+"""
+    )
+    run_node(script)
+
+
+def test_stream_fade_caps_large_backlog_to_readable_waves():
+    script = (
+        fade_helper_script()
+        + r"""
+const words=Array.from({length:500},(_,i)=>'w'+i);
+const target=words.join(' ');
+let previous=0;
+for(let frame=0;frame<40;frame++){
+  performance._t += 16;
+  const out=_streamFadeNextText(target);
+  const shown=(out.text.match(/\S+/g)||[]).length;
+  const revealed=shown-previous;
+  previous=shown;
+  if(revealed>3) throw new Error(`revealed too much in one frame: ${revealed}`);
+}
+if(previous<50) throw new Error(`too slow under large backlog: ${previous}`);
+"""
+    )
+    run_node(script)
+
+
+def test_stream_fade_does_not_reveal_across_multiple_paragraphs_in_one_frame():
+    script = (
+        fade_helper_script()
+        + r"""
+const target='alpha beta gamma\n\nsecond paragraph starts here\n\nthird paragraph starts here';
+performance._t += 200;
+const out=_streamFadeNextText(target);
+const breaks=(out.text.match(/\n\s*\n/g)||[]).length;
+if(breaks>1) throw new Error(`revealed multiple paragraph breaks: ${JSON.stringify(out.text)}`);
 """
     )
     run_node(script)

From 9e9ff30c78f90a84a29098e7912d448a69176b3e Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Mon, 11 May 2026 21:13:31 -0600
Subject: [PATCH 05/12] adjust stream params

---
 static/messages.js             | 6 +++---
 static/style.css               | 2 +-
 tests/test_smooth_text_fade.py | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index 606ff4b7..c9682759 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -535,7 +535,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   let _streamFadeLatestAnimationEndAt=0;
   let _streamFadeLastRevealCount=0;
   let _streamFadeAppendOffset=0;
-  const _STREAM_FADE_MS=140;
+  const _STREAM_FADE_MS=160;
   const _STREAM_FADE_WAVE_MS=320;
   const _STREAM_FADE_MAX_STAGGER_MS=520;
   const _streamFadeEnabledForStream=window._fadeTextEffect===true;
@@ -938,7 +938,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         if(_shouldUseStreamFade()){
           const caughtUp=_renderStreamingFadeMarkdown(displayText);
           if(!caughtUp&&!_streamFinalized){
-            setTimeout(()=>_scheduleRender(), 24);
+            setTimeout(()=>_scheduleRender(), 33);
           }
         } else {
           assistantBody.classList.remove('stream-fade-active');
@@ -964,7 +964,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       }
       scrollIfPinned();
     };
-    const frameIntervalMs=_shouldUseStreamFade()?16:66;
+    const frameIntervalMs=_shouldUseStreamFade()?33:66;
     if(sinceLastMs>=frameIntervalMs){
       _pendingRafHandle=requestAnimationFrame(_doRender);
     } else {
diff --git a/static/style.css b/static/style.css
index 4d2bd92a..02429e76 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3746,7 +3746,7 @@ main.main.showing-logs > #mainLogs{display:flex;}
 /* OpenWebUI-style streaming word fade (opt-in via Settings → Preferences).
    Opacity-only fade; high-speed streams use a longer JS-driven wave duration. */
 .stream-fade-active .stream-fade-word{display:inline;}
-.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,140ms) cubic-bezier(.2,.7,.2,1) both;will-change:opacity;}
+.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,160ms) cubic-bezier(.2,.7,.2,1) both;will-change:opacity;}
 @keyframes stream-fade-word-in{from{opacity:0;}to{opacity:1;}}
 @media (prefers-reduced-motion: reduce){.stream-fade-word.is-new{animation:none;will-change:auto;}}
 [data-live-assistant="1"]:last-child .msg-body.stream-fade-active > :last-child::after,
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index ef3e116a..e4d4aad1 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -85,7 +85,7 @@ let _streamFadeArrivalWps=0;
 let _streamFadeLatestAnimationEndAt=0;
 let _streamFadeLastRevealCount=0;
 let _streamFadeAppendOffset=0;
-const _STREAM_FADE_MS=140;
+const _STREAM_FADE_MS=160;
 const _STREAM_FADE_WAVE_MS=320;
 const _STREAM_FADE_MAX_STAGGER_MS=520;
 const performance={performance_stub};
@@ -156,7 +156,7 @@ def test_fade_helpers_and_constants_exist():
     assert_contains_all(
         MESSAGES_JS,
         [
-            "const _STREAM_FADE_MS=140",
+            "const _STREAM_FADE_MS=160",
             "const _STREAM_FADE_WAVE_MS=320",
             "const _STREAM_FADE_MAX_STAGGER_MS=520",
             "_streamFadeVisibleText",
@@ -172,7 +172,7 @@ def test_schedule_render_keeps_default_smd_path_when_fade_is_off():
     assert "_renderStreamingFadeMarkdown(displayText)" in block
     assert "_smdWrite(displayText)" in block
     assert "_smdNewParser(assistantBody)" in block
-    assert "?16:66" in compact(block)
+    assert "?33:66" in compact(block)
 
 
 def test_fade_renderer_uses_playout_buffer_and_incremental_markdown():
@@ -252,7 +252,7 @@ def test_fade_css_animates_words_and_hides_live_cursor():
         [
             "@keyframes stream-fade-word-in",
             ".stream-fade-word.is-new",
-            "var(--stream-fade-ms,140ms) cubic-bezier(.2,.7,.2,1)",
+            "var(--stream-fade-ms,160ms) cubic-bezier(.2,.7,.2,1)",
             "prefers-reduced-motion: reduce",
             ".msg-body.stream-fade-active > :last-child::after",
             "display:none",

From c9c70fc1c5626d53614cb99471a15a65636edc87 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 09:06:08 -0600
Subject: [PATCH 06/12] improve smoothness with punctiotion rules & performance
 optimizations

---
 static/messages.js             |  77 +++++++++++----
 static/style.css               |   2 +-
 tests/test_smooth_text_fade.py | 165 +++++++++------------------------
 3 files changed, 104 insertions(+), 140 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index c9682759..31520ec7 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -533,11 +533,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   let _streamFadeLastArrivalMs=0;
   let _streamFadeArrivalWps=0;
   let _streamFadeLatestAnimationEndAt=0;
-  let _streamFadeLastRevealCount=0;
   let _streamFadeAppendOffset=0;
+  let _streamFadeVisibleWords=0;
+  let _streamFadeHoldUntilMs=0;
+  let _streamFadeReduceMotionMql=null;
+  let _streamFadeReduceMotion=false;
   const _STREAM_FADE_MS=160;
-  const _STREAM_FADE_WAVE_MS=320;
-  const _STREAM_FADE_MAX_STAGGER_MS=520;
+  const _STREAM_FADE_STAGGER_MS=12;
+  const _STREAM_FADE_DONE_MAX_MS=220;
   const _streamFadeEnabledForStream=window._fadeTextEffect===true;
 
   // rAF-throttled rendering: buffer tokens, render at most once per frame
@@ -700,8 +703,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     _streamFadeLastArrivalMs=0;
     _streamFadeArrivalWps=0;
     _streamFadeLatestAnimationEndAt=0;
-    _streamFadeLastRevealCount=0;
     _streamFadeAppendOffset=0;
+    _streamFadeVisibleWords=0;
+    _streamFadeHoldUntilMs=0;
   }
   function _cancelAnimationFramePendingStreamRender(){
     if(_pendingRafHandle===null) return;
@@ -718,7 +722,28 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     const tag=(node.tagName||'').toLowerCase();
     return tag==='pre'||tag==='code'||tag==='script'||tag==='style'||tag==='textarea'||tag==='svg'||tag==='math';
   }
+  function _streamFadeReduceMotionEnabled(){
+    if(!window.matchMedia) return false;
+    if(!_streamFadeReduceMotionMql){
+      _streamFadeReduceMotionMql=window.matchMedia('(prefers-reduced-motion: reduce)');
+      _streamFadeReduceMotion=!!_streamFadeReduceMotionMql.matches;
+      const onChange=e=>{_streamFadeReduceMotion=!!e.matches;};
+      try{_streamFadeReduceMotionMql.addEventListener('change',onChange);}
+      catch(_){try{_streamFadeReduceMotionMql.addListener(onChange);}catch(_){}}
+    }
+    return _streamFadeReduceMotion;
+  }
+  function _streamFadeBindCleanup(el){
+    if(!el||el._streamFadeCleanupBound) return;
+    el._streamFadeCleanupBound=true;
+    el.addEventListener('animationend',e=>{
+      const span=e.target;
+      if(!span||!span.classList||!span.classList.contains('stream-fade-word')) return;
+      span.replaceWith(document.createTextNode(span.textContent||''));
+    });
+  }
   function _streamFadeRenderer(el){
+    _streamFadeBindCleanup(el);
     const renderer=window.smd.default_renderer(el);
     const baseAddText=renderer.add_text;
     const baseSetAttr=renderer.set_attr;
@@ -728,10 +753,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       const frag=document.createDocumentFragment();
       const wordRe=/(\S+)(\s*)/g;
       const value=String(text||'');
-      const revealedThisFrame=Math.max(1,_streamFadeLastRevealCount||1);
-      const fadeMs=revealedThisFrame>=8?_STREAM_FADE_WAVE_MS:revealedThisFrame>=4?240:_STREAM_FADE_MS;
-      const waveStepMs=revealedThisFrame>=18?18:revealedThisFrame>=8?22:revealedThisFrame>=4?16:12;
-      const reduceMotion=window.matchMedia&&window.matchMedia('(prefers-reduced-motion: reduce)').matches;
+      const reduceMotion=_streamFadeReduceMotionEnabled();
       let last=0, match, changed=false;
       while((match=wordRe.exec(value))){
         if(match.index>last) frag.appendChild(document.createTextNode(value.slice(last,match.index)));
@@ -744,14 +766,13 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         }
         const span=document.createElement('span');
         span.className='stream-fade-word is-new';
-        const delayMs=Math.min(_streamFadeAppendOffset*waveStepMs,_STREAM_FADE_MAX_STAGGER_MS);
+        const delayMs=_streamFadeAppendOffset*_STREAM_FADE_STAGGER_MS;
         span.style.animationDelay=delayMs+'ms';
-        span.style.setProperty('--stream-fade-ms',fadeMs+'ms');
+        span.style.setProperty('--stream-fade-ms',_STREAM_FADE_MS+'ms');
         span.textContent=match[1];
-        span.addEventListener('animationend',()=>span.replaceWith(document.createTextNode(span.textContent||'')),{once:true});
         frag.appendChild(span);
         _streamFadeAppendOffset+=1;
-        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,performance.now()+delayMs+fadeMs);
+        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,performance.now()+delayMs+_STREAM_FADE_MS);
         if(match[2]) frag.appendChild(document.createTextNode(match[2]));
         last=match.index+match[0].length;
         changed=true;
@@ -776,6 +797,13 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     const m=String(text||'').match(/\S+/g);
     return m?m.length:0;
   }
+  function _streamFadePauseAfter(text, paragraphBreakIndex){
+    if(paragraphBreakIndex>=0) return 90;
+    const trimmed=String(text||'').trimEnd();
+    if(/[.!?]["')\]]*$/.test(trimmed)) return 45;
+    if(/[:;]["')\]]*$/.test(trimmed)) return 30;
+    return 0;
+  }
   function _streamFadeNextText(targetText){
     targetText=String(targetText||'');
     const now=performance.now();
@@ -797,7 +825,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
 
     const remaining=targetText.slice(_streamFadeVisibleText.length);
     const backlogWords=_streamFadeWordCountOf(remaining);
-    const targetWords=_streamFadeWordCountOf(targetText);
+    const targetWords=_streamFadeVisibleWords+backlogWords;
     const elapsedMs=Math.max(16,Math.min(120,now-_streamFadeLastTickMs));
     _streamFadeLastTickMs=now;
 
@@ -825,6 +853,10 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       _streamFadeArrivalWps=0;
     }
 
+    if(now<_streamFadeHoldUntilMs){
+      return {text:_streamFadeVisibleText,caughtUp:false,changed:false};
+    }
+
     const streamAgeSeconds=Math.max(0, (now-(_streamFadeStartedAt||now))/1000);
     const baseWps=22 + Math.min(streamAgeSeconds*2.5, 28); // 22 → 50 wps over long answers
     const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*1.05 + 8, 90) : 0;
@@ -838,8 +870,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     // playback still catches up, but whole paragraphs no longer pop in at once.
     const waveCap=backlogWords>=160?3:2;
     wordsToReveal=Math.min(wordsToReveal,waveCap,backlogWords);
-    if(wordsToReveal<1){_streamFadeLastRevealCount=0;return {text:_streamFadeVisibleText,caughtUp:false,changed:false};}
-    _streamFadeLastRevealCount=Math.min(wordsToReveal, backlogWords);
+    if(wordsToReveal<1) return {text:_streamFadeVisibleText,caughtUp:false,changed:false};
     _streamFadeWordCarry=Math.max(0,_streamFadeWordCarry-wordsToReveal);
 
     let cut=0;
@@ -850,9 +881,15 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       wordsToReveal-=1;
     }
     if(cut<=0) cut=Math.min(remaining.length,4);
-    const paragraphBreak=remaining.slice(0,cut).search(/\n\s*\n/);
-    if(paragraphBreak>0) cut=paragraphBreak+2;
-    _streamFadeVisibleText+=remaining.slice(0,cut);
+    const chunk=remaining.slice(0,cut);
+    const paragraphMatch=chunk.match(/\n\s*\n/);
+    const paragraphBreak=paragraphMatch ? paragraphMatch.index : -1;
+    if(paragraphMatch) cut=paragraphBreak+paragraphMatch[0].length;
+    const revealed=remaining.slice(0,cut);
+    _streamFadeVisibleText+=revealed;
+    _streamFadeVisibleWords+=_streamFadeWordCountOf(revealed);
+    const pauseMs=_streamFadePauseAfter(revealed,paragraphBreak);
+    if(pauseMs) _streamFadeHoldUntilMs=now+pauseMs;
     if(_streamFadeVisibleText.length>targetText.length) _streamFadeVisibleText=targetText;
     return {text:_streamFadeVisibleText,caughtUp:_streamFadeVisibleText===targetText,changed:true};
   }
@@ -893,10 +930,10 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         // Let the last released words visibly finish their stagger + fade before
         // the final renderMessages() DOM replacement removes the live spans.
         const remainingAnimationMs=Math.max(_STREAM_FADE_MS, _streamFadeLatestAnimationEndAt-performance.now());
-        setTimeout(onDone, Math.min(remainingAnimationMs, _STREAM_FADE_WAVE_MS+_STREAM_FADE_MAX_STAGGER_MS));
+        setTimeout(onDone, Math.min(remainingAnimationMs, _STREAM_FADE_DONE_MAX_MS));
         return;
       }
-      setTimeout(()=>requestAnimationFrame(step), 16);
+      setTimeout(()=>requestAnimationFrame(step), 33);
     };
     step();
   }
diff --git a/static/style.css b/static/style.css
index 02429e76..11519fde 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3744,7 +3744,7 @@ main.main.showing-logs > #mainLogs{display:flex;}
 .logs-hint.warn{color:#f59e0b;border-color:rgba(245,158,11,.35);background:rgba(245,158,11,.08);}
 
 /* OpenWebUI-style streaming word fade (opt-in via Settings → Preferences).
-   Opacity-only fade; high-speed streams use a longer JS-driven wave duration. */
+   Opacity-only fade with JS-paced word/paragraph reveal. */
 .stream-fade-active .stream-fade-word{display:inline;}
 .stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,160ms) cubic-bezier(.2,.7,.2,1) both;will-change:opacity;}
 @keyframes stream-fade-word-in{from{opacity:0;}to{opacity:1;}}
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index e4d4aad1..8aadea62 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -50,16 +50,6 @@ def function_block(src: str, name: str) -> str:
     raise AssertionError(f"{name}() closing brace not found")
 
 
-def event_listener_block(src: str, event_name: str) -> str:
-    start = src.index(f"source.addEventListener('{event_name}'")
-    end = src.index("source.addEventListener(", start + 1)
-    return src[start:end]
-
-
-def compact(src: str) -> str:
-    return re.sub(r"\s+", "", src)
-
-
 def assert_contains_all(src: str, snippets: list[str]) -> None:
     for snippet in snippets:
         assert snippet in src
@@ -70,6 +60,7 @@ def fade_helper_script(performance_stub: str = "{_t:0,now(){return this._t;}}")
         function_block(MESSAGES_JS, name)
         for name in [
             "_streamFadeWordCountOf",
+            "_streamFadePauseAfter",
             "_resetStreamFadeState",
             "_streamFadeNextText",
         ]
@@ -83,11 +74,12 @@ let _streamFadeLastTargetWords=0;
 let _streamFadeLastArrivalMs=0;
 let _streamFadeArrivalWps=0;
 let _streamFadeLatestAnimationEndAt=0;
-let _streamFadeLastRevealCount=0;
 let _streamFadeAppendOffset=0;
+let _streamFadeVisibleWords=0;
+let _streamFadeHoldUntilMs=0;
 const _STREAM_FADE_MS=160;
-const _STREAM_FADE_WAVE_MS=320;
-const _STREAM_FADE_MAX_STAGGER_MS=520;
+const _STREAM_FADE_STAGGER_MS=12;
+const _STREAM_FADE_DONE_MAX_MS=220;
 const performance={performance_stub};
 {helpers}
 """
@@ -140,54 +132,18 @@ def test_preferences_ui_exposes_and_saves_fade_text_effect():
     assert_contains_all(apply_block, ["fadeTextEffect", f"{FADE_RUNTIME_FLAG}=!!fadeTextEffect"])
 
 
-def test_fade_helpers_and_constants_exist():
-    for name in [
-        "_resetStreamFadeState",
-        "_shouldUseStreamFade",
-        "_streamFadeNextText",
-        "_streamFadeWordCountOf",
-        "_renderStreamingFadeMarkdown",
-        "_streamFadeRenderer",
-        "_streamFadeSkipNode",
-        "_drainStreamFadeBeforeDone",
-    ]:
-        assert f"function {name}" in MESSAGES_JS
-
-    assert_contains_all(
-        MESSAGES_JS,
-        [
-            "const _STREAM_FADE_MS=160",
-            "const _STREAM_FADE_WAVE_MS=320",
-            "const _STREAM_FADE_MAX_STAGGER_MS=520",
-            "_streamFadeVisibleText",
-            "_streamFadeAppendOffset",
-            "_streamFadeArrivalWps",
-        ],
-    )
-
-
-def test_schedule_render_keeps_default_smd_path_when_fade_is_off():
+def test_stream_fade_uses_incremental_renderer_without_changing_default_path():
     block = function_block(MESSAGES_JS, "_scheduleRender")
-    assert "_shouldUseStreamFade()" in block
-    assert "_renderStreamingFadeMarkdown(displayText)" in block
-    assert "_smdWrite(displayText)" in block
-    assert "_smdNewParser(assistantBody)" in block
-    assert "?33:66" in compact(block)
-
-
-def test_fade_renderer_uses_playout_buffer_and_incremental_markdown():
-    next_block = function_block(MESSAGES_JS, "_streamFadeNextText")
     render_block = function_block(MESSAGES_JS, "_renderStreamingFadeMarkdown")
+    renderer_block = function_block(MESSAGES_JS, "_streamFadeRenderer")
+    cleanup_block = function_block(MESSAGES_JS, "_streamFadeBindCleanup")
 
     assert_contains_all(
-        next_block,
+        block,
         [
-            "targetText.startsWith(_streamFadeVisibleText)",
-            "wordsPerSecond",
-            "instantArrivalWps",
-            "backlogWords",
-            "streamAgeSeconds",
-            "caughtUp",
+            "_renderStreamingFadeMarkdown(displayText)",
+            "_smdWrite(displayText)",
+            "?33:66",
         ],
     )
     assert_contains_all(
@@ -201,52 +157,31 @@ def test_fade_renderer_uses_playout_buffer_and_incremental_markdown():
         ],
     )
     assert "renderMd ? renderMd(next.text||'')" in render_block
-    assert "_wrapStreamingFadeWords" not in MESSAGES_JS
-
-
-def test_fade_renderer_animates_new_text_and_cleans_up_spans():
-    block = function_block(MESSAGES_JS, "_streamFadeRenderer")
     assert_contains_all(
-        block,
+        renderer_block,
         [
-            "renderer.add_text",
-            "waveStepMs",
-            "animationDelay",
-            "--stream-fade-ms",
             "span.className='stream-fade-word is-new'",
-            "animationend",
-            "span.replaceWith(document.createTextNode",
-            "prefers-reduced-motion: reduce",
+            "_streamFadeReduceMotionEnabled()",
             "renderer.set_attr",
             "data-blocked-scheme",
             "_streamFadeLatestAnimationEndAt",
         ],
     )
+    assert_contains_all(
+        cleanup_block,
+        ["animationend", "span.replaceWith(document.createTextNode"],
+    )
+    assert "_wrapStreamingFadeWords" not in MESSAGES_JS
+
+
+def test_stream_fade_css_is_opacity_only_and_hides_live_cursor():
+    fade_css = STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :]
     assert "filter:" not in STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :].split(
         "[data-live-assistant", 1
     )[0]
     assert "translateY" not in STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :].split(
         "[data-live-assistant", 1
     )[0]
-
-
-def test_done_drain_finishes_fade_before_final_dom_replacement_and_blocks_late_mutations():
-    done_block = event_listener_block(MESSAGES_JS, "done")
-    drain_block = function_block(MESSAGES_JS, "_drainStreamFadeBeforeDone")
-
-    assert_contains_all(done_block, ["_terminalStateReached=true", "_drainStreamFadeBeforeDone(_finishDone)"])
-    assert_contains_all(drain_block, ["remainingAnimationMs", "_STREAM_FADE_MAX_STAGGER_MS", "requestAnimationFrame(step)"])
-
-    for event_name in ["token", "interim_assistant", "reasoning"]:
-        assert "if(_terminalStateReached||_streamFinalized) return;" in event_listener_block(MESSAGES_JS, event_name)
-
-
-def test_new_segments_reset_fade_state():
-    assert "_resetStreamFadeState()" in function_block(MESSAGES_JS, "_resetAssistantSegment")
-
-
-def test_fade_css_animates_words_and_hides_live_cursor():
-    fade_css = STYLE_CSS[STYLE_CSS.index("OpenWebUI-style streaming word fade") :]
     assert_contains_all(
         fade_css,
         [
@@ -260,25 +195,9 @@ def test_fade_css_animates_words_and_hides_live_cursor():
         ],
     )
     assert ".stream-fade-active .stream-fade-word{display:inline;}" in fade_css
-    assert ".stream-fade-active .stream-fade-word{display:inline;will-change:opacity;}" not in fade_css
 
 
-def test_stream_fade_next_text_executes_and_advances_playout():
-    script = (
-        fade_helper_script("{_t:0,now(){this._t+=33;return this._t;}}")
-        + r"""
-const target='one two three four five six seven eight nine ten eleven twelve';
-const first=_streamFadeNextText(target);
-const second=_streamFadeNextText(target);
-if (!first.text || !second.text) throw new Error('no text revealed');
-if (second.text.length < first.text.length) throw new Error('playout regressed');
-"""
-    )
-    result = run_node(script)
-    assert "ReferenceError" not in result.stderr
-
-
-def test_stream_fade_ramps_above_steady_arrival_rate():
+def test_stream_fade_playout_handles_fast_models_without_paragraph_pops():
     script = (
         fade_helper_script()
         + r"""
@@ -295,21 +214,11 @@ for(let frame=0;frame<240;frame++){
 const backlog=targetCount-shown;
 if(shown < 145) throw new Error(`too slow: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
 if(backlog > 15) throw new Error(`did not catch up: shown=${shown} target=${targetCount} backlog=${backlog} arrivalWps=${_streamFadeArrivalWps}`);
-"""
-    )
-    run_node(script)
-
-
-def test_stream_fade_caps_large_backlog_to_readable_waves():
-    script = (
-        fade_helper_script()
-        + r"""
-const words=Array.from({length:500},(_,i)=>'w'+i);
-const target=words.join(' ');
+const huge=Array.from({length:500},(_,i)=>'b'+i).join(' ');
 let previous=0;
 for(let frame=0;frame<40;frame++){
   performance._t += 16;
-  const out=_streamFadeNextText(target);
+  const out=_streamFadeNextText(huge);
   const shown=(out.text.match(/\S+/g)||[]).length;
   const revealed=shown-previous;
   previous=shown;
@@ -321,15 +230,33 @@ if(previous<50) throw new Error(`too slow under large backlog: ${previous}`);
     run_node(script)
 
 
-def test_stream_fade_does_not_reveal_across_multiple_paragraphs_in_one_frame():
+def test_stream_fade_respects_sentence_and_paragraph_boundaries():
     script = (
         fade_helper_script()
         + r"""
 const target='alpha beta gamma\n\nsecond paragraph starts here\n\nthird paragraph starts here';
 performance._t += 200;
-const out=_streamFadeNextText(target);
+let out=_streamFadeNextText(target);
 const breaks=(out.text.match(/\n\s*\n/g)||[]).length;
 if(breaks>1) throw new Error(`revealed multiple paragraph breaks: ${JSON.stringify(out.text)}`);
+_resetStreamFadeState();
+const pausedTarget='alpha beta.\n\nsecond paragraph starts here';
+out={text:''};
+for(let frame=0;frame<8&&!out.text.includes('.');frame++){
+  performance._t += 33;
+  out=_streamFadeNextText(pausedTarget);
+}
+if(!out.text.includes('.')) throw new Error(`expected first sentence: ${JSON.stringify(out.text)}`);
+const held=_streamFadeNextText(pausedTarget);
+if(held.changed) throw new Error('expected sentence pause to hold next reveal');
+performance._t += 50;
+for(let frame=0;frame<8&&!out.text.includes('\n\n');frame++){
+  performance._t += 33;
+  out=_streamFadeNextText(pausedTarget);
+}
+if(!out.text.includes('\n\n')) throw new Error(`expected paragraph break: ${JSON.stringify(out.text)}`);
+const afterBreak=_streamFadeNextText(pausedTarget);
+if(afterBreak.changed) throw new Error('expected paragraph pause to hold next reveal');
 """
     )
     run_node(script)

From d039270c236651bd4de781c7c89b33df2d6bd41b Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 14:46:17 -0600
Subject: [PATCH 07/12] WIP: work in progress on smooth-text-fade

---
 PR_NOTES.md        | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 static/messages.js |  6 ++---
 static/style.css   |  4 ++--
 3 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 PR_NOTES.md

diff --git a/PR_NOTES.md b/PR_NOTES.md
new file mode 100644
index 00000000..1f5699a8
--- /dev/null
+++ b/PR_NOTES.md
@@ -0,0 +1,60 @@
+# PR Notes: Opt-in Smooth Streaming Text Fade
+
+## Summary
+
+Adds an opt-in `Fade text effect` preference for live assistant responses. When enabled, newly revealed words fade in during streaming for a smoother ChatGPT/Codex-like feel while preserving the existing default streaming path when disabled.
+
+## User-facing behavior
+
+- New setting: `Settings -> Preferences -> Fade text effect`
+- Default: off
+- Runtime flag: `window._fadeTextEffect`
+- Fade mode uses a playout buffer so fast backend chunks do not land as large paragraph pops.
+- Visual reveal rate is intentionally capped for readability, especially with very fast models.
+- Live cursor is hidden while fade mode is active.
+- Reduced-motion users get non-animated text.
+
+## Implementation notes
+
+- Fade mode is locked per stream to avoid mid-stream preference toggle rewind/duplication.
+- Uses Hermes' existing incremental `streaming-markdown` parser with a custom renderer instead of full markdown re-renders.
+- Only newly appended words are wrapped and animated.
+- Animated spans are replaced with plain text by a delegated `animationend` handler, avoiding long-lived wrapper buildup without per-word listeners.
+- Reduced-motion preference is cached with a media-query listener instead of checked for every appended text node.
+- Unsafe streamed `href`/`src` values are blocked in the fade renderer `set_attr` path.
+- On `done`, fade mode drains buffered text, ends the parser to flush pending markdown, and waits for the final fade/stagger window before the final `renderMessages()` replacement.
+
+## Performance/readability tuning
+
+- Normal fade duration: `160ms`
+- Word stagger: `12ms`
+- Done drain wait cap: `220ms`
+- Visual playback is capped at `100 wps`.
+- Reveals at most `2 words/frame`, or `3 words/frame` only with very large backlog.
+- Reveal steps pause briefly after sentence punctuation and paragraph breaks so very fast models feel less overwhelming.
+- Target word counts reuse the already-visible word count plus backlog count, avoiding repeated full-response word scans on each tick.
+
+## Verification
+
+```bash
+node --check static/messages.js static/panels.js static/boot.js static/i18n.js
+git diff --check
+/Users/agent/.hermes/hermes-agent/venv/bin/python -m pytest tests/test_smooth_text_fade.py tests/test_1003_preferences_autosave.py tests/test_streaming_markdown.py -q
+/Users/agent/.hermes/hermes-agent/venv/bin/python -m py_compile api/config.py
+```
+
+Latest focused result:
+
+```text
+68 passed
+```
+
+## Manual QA suggested
+
+- Hard refresh after deployment.
+- Enable `Settings -> Preferences -> Fade text effect`.
+- Test short normal response.
+- Test long markdown response with headings, lists, links, code blocks, and tables.
+- Test very fast model output around 200-400 tok/s.
+- Test tool-call-heavy response.
+- Test OS/browser reduced-motion mode if available.
diff --git a/static/messages.js b/static/messages.js
index 31520ec7..5d641df0 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -538,9 +538,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   let _streamFadeHoldUntilMs=0;
   let _streamFadeReduceMotionMql=null;
   let _streamFadeReduceMotion=false;
-  const _STREAM_FADE_MS=160;
-  const _STREAM_FADE_STAGGER_MS=12;
-  const _STREAM_FADE_DONE_MAX_MS=220;
+  const _STREAM_FADE_MS=240;
+  const _STREAM_FADE_STAGGER_MS=16;
+  const _STREAM_FADE_DONE_MAX_MS=320;
   const _streamFadeEnabledForStream=window._fadeTextEffect===true;
 
   // rAF-throttled rendering: buffer tokens, render at most once per frame
diff --git a/static/style.css b/static/style.css
index 11519fde..f94cf0b6 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3746,8 +3746,8 @@ main.main.showing-logs > #mainLogs{display:flex;}
 /* OpenWebUI-style streaming word fade (opt-in via Settings → Preferences).
    Opacity-only fade with JS-paced word/paragraph reveal. */
 .stream-fade-active .stream-fade-word{display:inline;}
-.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,160ms) cubic-bezier(.2,.7,.2,1) both;will-change:opacity;}
-@keyframes stream-fade-word-in{from{opacity:0;}to{opacity:1;}}
+.stream-fade-word.is-new{animation:stream-fade-word-in var(--stream-fade-ms,240ms) cubic-bezier(.2,.7,.2,1) both;will-change:opacity;}
+@keyframes stream-fade-word-in{0%{opacity:0;}45%{opacity:.45;}100%{opacity:1;}}
 @media (prefers-reduced-motion: reduce){.stream-fade-word.is-new{animation:none;will-change:auto;}}
 [data-live-assistant="1"]:last-child .msg-body.stream-fade-active > :last-child::after,
 [data-live-assistant="1"]:last-child .msg-body.stream-fade-active:not(:has(> *))::after{display:none;content:none;}

From afc089c93b5dee7434e7ffa5f2fc90ddc44a780b Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 18:06:11 -0600
Subject: [PATCH 08/12] Tune smooth text fade playout

---
 static/messages.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index 5d641df0..c1198cf0 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -538,7 +538,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   let _streamFadeHoldUntilMs=0;
   let _streamFadeReduceMotionMql=null;
   let _streamFadeReduceMotion=false;
-  const _STREAM_FADE_MS=240;
+  const _STREAM_FADE_MS=200;
   const _STREAM_FADE_STAGGER_MS=16;
   const _STREAM_FADE_DONE_MAX_MS=320;
   const _streamFadeEnabledForStream=window._fadeTextEffect===true;
@@ -861,7 +861,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     const baseWps=22 + Math.min(streamAgeSeconds*2.5, 28); // 22 → 50 wps over long answers
     const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*1.05 + 8, 90) : 0;
     const backlogWps=backlogWords>0 ? Math.min(22 + backlogWords*1.1, 100) : 0;
-    const wordsPerSecond=Math.min(100, Math.max(baseWps, arrivalWps, backlogWps));
+    const wordsPerSecond=Math.min(160, Math.max(baseWps, arrivalWps, backlogWps));
 
     _streamFadeWordCarry+=elapsedMs*wordsPerSecond/1000;
     if(!_streamFadeVisibleText) _streamFadeWordCarry=Math.max(_streamFadeWordCarry,1);

From 5e2350e128b9b516f934547346a5850b4bed4e98 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 18:18:07 -0600
Subject: [PATCH 09/12] Harden smooth text fade before merge

---
 PR_NOTES.md                    | 12 ++++++------
 static/messages.js             | 18 +++++++++++++++---
 tests/test_smooth_text_fade.py | 16 ++++++++++++----
 3 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/PR_NOTES.md b/PR_NOTES.md
index 1f5699a8..72cbee2a 100644
--- a/PR_NOTES.md
+++ b/PR_NOTES.md
@@ -20,16 +20,16 @@ Adds an opt-in `Fade text effect` preference for live assistant responses. When
 - Uses Hermes' existing incremental `streaming-markdown` parser with a custom renderer instead of full markdown re-renders.
 - Only newly appended words are wrapped and animated.
 - Animated spans are replaced with plain text by a delegated `animationend` handler, avoiding long-lived wrapper buildup without per-word listeners.
-- Reduced-motion preference is cached with a media-query listener instead of checked for every appended text node.
+- Reduced-motion preference is cached with a media-query listener instead of checked for every appended text node, and terminal stream paths remove the listener.
 - Unsafe streamed `href`/`src` values are blocked in the fade renderer `set_attr` path.
 - On `done`, fade mode drains buffered text, ends the parser to flush pending markdown, and waits for the final fade/stagger window before the final `renderMessages()` replacement.
 
 ## Performance/readability tuning
 
-- Normal fade duration: `160ms`
-- Word stagger: `12ms`
-- Done drain wait cap: `220ms`
-- Visual playback is capped at `100 wps`.
+- Normal fade duration: `200ms`
+- Word stagger: `16ms`
+- Done drain wait cap: `320ms`
+- Visual playback is capped at `160 wps`.
 - Reveals at most `2 words/frame`, or `3 words/frame` only with very large backlog.
 - Reveal steps pause briefly after sentence punctuation and paragraph breaks so very fast models feel less overwhelming.
 - Target word counts reuse the already-visible word count plus backlog count, avoiding repeated full-response word scans on each tick.
@@ -46,7 +46,7 @@ git diff --check
 Latest focused result:
 
 ```text
-68 passed
+69 passed
 ```
 
 ## Manual QA suggested
diff --git a/static/messages.js b/static/messages.js
index c1198cf0..12d1525c 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -538,6 +538,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   let _streamFadeHoldUntilMs=0;
   let _streamFadeReduceMotionMql=null;
   let _streamFadeReduceMotion=false;
+  let _streamFadeReduceMotionOnChange=null;
   const _STREAM_FADE_MS=200;
   const _STREAM_FADE_STAGGER_MS=16;
   const _STREAM_FADE_DONE_MAX_MS=320;
@@ -727,12 +728,19 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     if(!_streamFadeReduceMotionMql){
       _streamFadeReduceMotionMql=window.matchMedia('(prefers-reduced-motion: reduce)');
       _streamFadeReduceMotion=!!_streamFadeReduceMotionMql.matches;
-      const onChange=e=>{_streamFadeReduceMotion=!!e.matches;};
-      try{_streamFadeReduceMotionMql.addEventListener('change',onChange);}
-      catch(_){try{_streamFadeReduceMotionMql.addListener(onChange);}catch(_){}}
+      _streamFadeReduceMotionOnChange=e=>{_streamFadeReduceMotion=!!e.matches;};
+      try{_streamFadeReduceMotionMql.addEventListener('change',_streamFadeReduceMotionOnChange);}
+      catch(_){try{_streamFadeReduceMotionMql.addListener(_streamFadeReduceMotionOnChange);}catch(_){}}
     }
     return _streamFadeReduceMotion;
   }
+  function _streamFadeCleanupReduceMotionListener(){
+    if(!_streamFadeReduceMotionMql||!_streamFadeReduceMotionOnChange) return;
+    try{_streamFadeReduceMotionMql.removeEventListener('change',_streamFadeReduceMotionOnChange);}
+    catch(_){try{_streamFadeReduceMotionMql.removeListener(_streamFadeReduceMotionOnChange);}catch(_){}}
+    _streamFadeReduceMotionMql=null;
+    _streamFadeReduceMotionOnChange=null;
+  }
   function _streamFadeBindCleanup(el){
     if(!el||el._streamFadeCleanupBound) return;
     el._streamFadeCleanupBound=true;
@@ -1245,6 +1253,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         // can reintroduce a stale thinking card or duplicate content.
         _streamFinalized=true;
         _cancelAnimationFramePendingStreamRender();
+        _streamFadeCleanupReduceMotionListener();
         if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
         // Finalize smd parser — flushes any remaining buffered markdown state
         // and runs Prism + copy buttons on the live segment before the DOM is replaced
@@ -1489,6 +1498,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
       _streamFinalized=true;
       _cancelAnimationFramePendingStreamRender();
+      _streamFadeCleanupReduceMotionListener();
       _smdEndParser();
       if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
       // Application-level error sent explicitly by the server (rate limit, crash, etc.)
@@ -1575,6 +1585,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
       _streamFinalized=true;
       _cancelAnimationFramePendingStreamRender();
+      _streamFadeCleanupReduceMotionListener();
       _smdEndParser();
       if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
       source.close();
@@ -1669,6 +1680,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     if(_persistTimer){clearTimeout(_persistTimer);_persistTimer=null;}
     _streamFinalized=true;
     _cancelAnimationFramePendingStreamRender();
+    _streamFadeCleanupReduceMotionListener();
     if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
     _clearOwnerInflightState();
     _closeSource();
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index 8aadea62..c984d1e7 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -77,9 +77,9 @@ let _streamFadeLatestAnimationEndAt=0;
 let _streamFadeAppendOffset=0;
 let _streamFadeVisibleWords=0;
 let _streamFadeHoldUntilMs=0;
-const _STREAM_FADE_MS=160;
-const _STREAM_FADE_STAGGER_MS=12;
-const _STREAM_FADE_DONE_MAX_MS=220;
+const _STREAM_FADE_MS=200;
+const _STREAM_FADE_STAGGER_MS=16;
+const _STREAM_FADE_DONE_MAX_MS=320;
 const performance={performance_stub};
 {helpers}
 """
@@ -187,7 +187,7 @@ def test_stream_fade_css_is_opacity_only_and_hides_live_cursor():
         [
             "@keyframes stream-fade-word-in",
             ".stream-fade-word.is-new",
-            "var(--stream-fade-ms,160ms) cubic-bezier(.2,.7,.2,1)",
+            "var(--stream-fade-ms,240ms) cubic-bezier(.2,.7,.2,1)",
             "prefers-reduced-motion: reduce",
             ".msg-body.stream-fade-active > :last-child::after",
             "display:none",
@@ -197,6 +197,14 @@ def test_stream_fade_css_is_opacity_only_and_hides_live_cursor():
     assert ".stream-fade-active .stream-fade-word{display:inline;}" in fade_css
 
 
+def test_stream_fade_reduced_motion_listener_is_cleaned_up_on_terminal_paths():
+    assert "_streamFadeReduceMotionOnChange" in MESSAGES_JS
+    assert "function _streamFadeCleanupReduceMotionListener()" in MESSAGES_JS
+    assert "removeEventListener('change',_streamFadeReduceMotionOnChange)" in MESSAGES_JS
+    assert "removeListener(_streamFadeReduceMotionOnChange)" in MESSAGES_JS
+    assert MESSAGES_JS.count("_streamFadeCleanupReduceMotionListener();") >= 4
+
+
 def test_stream_fade_playout_handles_fast_models_without_paragraph_pops():
     script = (
         fade_helper_script()

From 0b51245bd308d4195920eab9275fb8ded9b05d0b Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 18:20:08 -0600
Subject: [PATCH 10/12] Optimize smooth fade word append path

---
 static/messages.js             | 4 ++--
 tests/test_smooth_text_fade.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index 12d1525c..21faa2c7 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -762,6 +762,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       const wordRe=/(\S+)(\s*)/g;
       const value=String(text||'');
       const reduceMotion=_streamFadeReduceMotionEnabled();
+      const appendStartedAt=performance.now();
       let last=0, match, changed=false;
       while((match=wordRe.exec(value))){
         if(match.index>last) frag.appendChild(document.createTextNode(value.slice(last,match.index)));
@@ -776,11 +777,10 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         span.className='stream-fade-word is-new';
         const delayMs=_streamFadeAppendOffset*_STREAM_FADE_STAGGER_MS;
         span.style.animationDelay=delayMs+'ms';
-        span.style.setProperty('--stream-fade-ms',_STREAM_FADE_MS+'ms');
         span.textContent=match[1];
         frag.appendChild(span);
         _streamFadeAppendOffset+=1;
-        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,performance.now()+delayMs+_STREAM_FADE_MS);
+        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,appendStartedAt+delayMs+_STREAM_FADE_MS);
         if(match[2]) frag.appendChild(document.createTextNode(match[2]));
         last=match.index+match[0].length;
         changed=true;
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index c984d1e7..642c2b08 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -162,6 +162,7 @@ def test_stream_fade_uses_incremental_renderer_without_changing_default_path():
         [
             "span.className='stream-fade-word is-new'",
             "_streamFadeReduceMotionEnabled()",
+            "const appendStartedAt=performance.now()",
             "renderer.set_attr",
             "data-blocked-scheme",
             "_streamFadeLatestAnimationEndAt",

From c7ac4ba2d34f8a90ec912bff4d4c40c0a78e365f Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 18:24:52 -0600
Subject: [PATCH 11/12] rem

---
 PR_NOTES.md | 60 -----------------------------------------------------
 1 file changed, 60 deletions(-)
 delete mode 100644 PR_NOTES.md

diff --git a/PR_NOTES.md b/PR_NOTES.md
deleted file mode 100644
index 72cbee2a..00000000
--- a/PR_NOTES.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# PR Notes: Opt-in Smooth Streaming Text Fade
-
-## Summary
-
-Adds an opt-in `Fade text effect` preference for live assistant responses. When enabled, newly revealed words fade in during streaming for a smoother ChatGPT/Codex-like feel while preserving the existing default streaming path when disabled.
-
-## User-facing behavior
-
-- New setting: `Settings -> Preferences -> Fade text effect`
-- Default: off
-- Runtime flag: `window._fadeTextEffect`
-- Fade mode uses a playout buffer so fast backend chunks do not land as large paragraph pops.
-- Visual reveal rate is intentionally capped for readability, especially with very fast models.
-- Live cursor is hidden while fade mode is active.
-- Reduced-motion users get non-animated text.
-
-## Implementation notes
-
-- Fade mode is locked per stream to avoid mid-stream preference toggle rewind/duplication.
-- Uses Hermes' existing incremental `streaming-markdown` parser with a custom renderer instead of full markdown re-renders.
-- Only newly appended words are wrapped and animated.
-- Animated spans are replaced with plain text by a delegated `animationend` handler, avoiding long-lived wrapper buildup without per-word listeners.
-- Reduced-motion preference is cached with a media-query listener instead of checked for every appended text node, and terminal stream paths remove the listener.
-- Unsafe streamed `href`/`src` values are blocked in the fade renderer `set_attr` path.
-- On `done`, fade mode drains buffered text, ends the parser to flush pending markdown, and waits for the final fade/stagger window before the final `renderMessages()` replacement.
-
-## Performance/readability tuning
-
-- Normal fade duration: `200ms`
-- Word stagger: `16ms`
-- Done drain wait cap: `320ms`
-- Visual playback is capped at `160 wps`.
-- Reveals at most `2 words/frame`, or `3 words/frame` only with very large backlog.
-- Reveal steps pause briefly after sentence punctuation and paragraph breaks so very fast models feel less overwhelming.
-- Target word counts reuse the already-visible word count plus backlog count, avoiding repeated full-response word scans on each tick.
-
-## Verification
-
-```bash
-node --check static/messages.js static/panels.js static/boot.js static/i18n.js
-git diff --check
-/Users/agent/.hermes/hermes-agent/venv/bin/python -m pytest tests/test_smooth_text_fade.py tests/test_1003_preferences_autosave.py tests/test_streaming_markdown.py -q
-/Users/agent/.hermes/hermes-agent/venv/bin/python -m py_compile api/config.py
-```
-
-Latest focused result:
-
-```text
-69 passed
-```
-
-## Manual QA suggested
-
-- Hard refresh after deployment.
-- Enable `Settings -> Preferences -> Fade text effect`.
-- Test short normal response.
-- Test long markdown response with headings, lists, links, code blocks, and tables.
-- Test very fast model output around 200-400 tok/s.
-- Test tool-call-heavy response.
-- Test OS/browser reduced-motion mode if available.

From f23ee79938f9a4731d26441558a983b3c4f9abc3 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 20:50:38 -0600
Subject: [PATCH 12/12] Scale fade duration with playback speed

---
 static/messages.js             | 13 ++++++++++---
 tests/test_smooth_text_fade.py | 22 ++++++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index 21faa2c7..a561cf6e 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -536,10 +536,12 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
   let _streamFadeAppendOffset=0;
   let _streamFadeVisibleWords=0;
   let _streamFadeHoldUntilMs=0;
+  let _streamFadeCurrentMs=200;
   let _streamFadeReduceMotionMql=null;
   let _streamFadeReduceMotion=false;
   let _streamFadeReduceMotionOnChange=null;
   const _STREAM_FADE_MS=200;
+  const _STREAM_FADE_MAX_MS=350;
   const _STREAM_FADE_STAGGER_MS=16;
   const _STREAM_FADE_DONE_MAX_MS=320;
   const _streamFadeEnabledForStream=window._fadeTextEffect===true;
@@ -707,6 +709,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     _streamFadeAppendOffset=0;
     _streamFadeVisibleWords=0;
     _streamFadeHoldUntilMs=0;
+    _streamFadeCurrentMs=_STREAM_FADE_MS;
   }
   function _cancelAnimationFramePendingStreamRender(){
     if(_pendingRafHandle===null) return;
@@ -775,12 +778,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         }
         const span=document.createElement('span');
         span.className='stream-fade-word is-new';
+        const fadeMs=_streamFadeCurrentMs||_STREAM_FADE_MS;
         const delayMs=_streamFadeAppendOffset*_STREAM_FADE_STAGGER_MS;
         span.style.animationDelay=delayMs+'ms';
+        if(fadeMs!==_STREAM_FADE_MS) span.style.setProperty('--stream-fade-ms',fadeMs+'ms');
         span.textContent=match[1];
         frag.appendChild(span);
         _streamFadeAppendOffset+=1;
-        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,appendStartedAt+delayMs+_STREAM_FADE_MS);
+        _streamFadeLatestAnimationEndAt=Math.max(_streamFadeLatestAnimationEndAt,appendStartedAt+delayMs+fadeMs);
         if(match[2]) frag.appendChild(document.createTextNode(match[2]));
         last=match.index+match[0].length;
         changed=true;
@@ -867,9 +872,11 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
 
     const streamAgeSeconds=Math.max(0, (now-(_streamFadeStartedAt||now))/1000);
     const baseWps=22 + Math.min(streamAgeSeconds*2.5, 28); // 22 → 50 wps over long answers
-    const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*1.05 + 8, 90) : 0;
-    const backlogWps=backlogWords>0 ? Math.min(22 + backlogWords*1.1, 100) : 0;
+    const arrivalWps=_streamFadeArrivalWps ? Math.min(_streamFadeArrivalWps*1.05 + 8, 160) : 0;
+    const backlogWps=backlogWords>0 ? Math.min(22 + backlogWords*1.1, 160) : 0;
     const wordsPerSecond=Math.min(160, Math.max(baseWps, arrivalWps, backlogWps));
+    const speedFadeRatio=Math.max(0,Math.min(1,(wordsPerSecond-50)/(160-50)));
+    _streamFadeCurrentMs=Math.round(_STREAM_FADE_MS+(_STREAM_FADE_MAX_MS-_STREAM_FADE_MS)*speedFadeRatio);
 
     _streamFadeWordCarry+=elapsedMs*wordsPerSecond/1000;
     if(!_streamFadeVisibleText) _streamFadeWordCarry=Math.max(_streamFadeWordCarry,1);
diff --git a/tests/test_smooth_text_fade.py b/tests/test_smooth_text_fade.py
index 642c2b08..ee5f8ff7 100644
--- a/tests/test_smooth_text_fade.py
+++ b/tests/test_smooth_text_fade.py
@@ -77,7 +77,9 @@ let _streamFadeLatestAnimationEndAt=0;
 let _streamFadeAppendOffset=0;
 let _streamFadeVisibleWords=0;
 let _streamFadeHoldUntilMs=0;
+let _streamFadeCurrentMs=200;
 const _STREAM_FADE_MS=200;
+const _STREAM_FADE_MAX_MS=350;
 const _STREAM_FADE_STAGGER_MS=16;
 const _STREAM_FADE_DONE_MAX_MS=320;
 const performance={performance_stub};
@@ -163,6 +165,7 @@ def test_stream_fade_uses_incremental_renderer_without_changing_default_path():
             "span.className='stream-fade-word is-new'",
             "_streamFadeReduceMotionEnabled()",
             "const appendStartedAt=performance.now()",
+            "--stream-fade-ms",
             "renderer.set_attr",
             "data-blocked-scheme",
             "_streamFadeLatestAnimationEndAt",
@@ -206,6 +209,25 @@ def test_stream_fade_reduced_motion_listener_is_cleaned_up_on_terminal_paths():
     assert MESSAGES_JS.count("_streamFadeCleanupReduceMotionListener();") >= 4
 
 
+def test_stream_fade_duration_scales_up_with_playback_speed():
+    script = (
+        fade_helper_script()
+        + r"""
+const words=Array.from({length:260},(_,i)=>'w'+i).join(' ');
+performance._t += 33;
+let out=_streamFadeNextText('slow start');
+if(!out.changed) throw new Error('expected initial reveal');
+if(_streamFadeCurrentMs !== 200) throw new Error(`expected base fade 200ms, got ${_streamFadeCurrentMs}`);
+for(let frame=0;frame<20&&_streamFadeCurrentMs<350;frame++){
+  performance._t += 120;
+  out=_streamFadeNextText(words);
+}
+if(_streamFadeCurrentMs !== 350) throw new Error(`expected max fade 350ms, got ${_streamFadeCurrentMs}`);
+"""
+    )
+    run_node(script)
+
+
 def test_stream_fade_playout_handles_fast_models_without_paragraph_pops():
     script = (
         fade_helper_script()