Squash merge PR #1303: feat(ui): TTS playback of agent responses (#499)

Adds Web Speech API based text-to-speech for agent responses. Per-message speaker button in the action row, auto-read toggle for last assistant response, voice/rate/pitch controls in Settings. All TTS preferences stored in localStorage. Opt-in: TTS hidden by default until enabled in Settings. Strips markdown, code blocks, and MEDIA: paths via _stripForTTS() before speaking. Pauses synthesis when composer is focused. 26 regression tests in tests/test_499_tts_playback.py. i18n keys added for all 7 locales (en, ru, es, de, zh, zh-Hant, pt, ko). Closes #499. Co-authored-by: Felipe Linhares <fecolinhares@users.noreply.github.com>
2026-05-26 11:40:26 +00:00 · 2026-04-30 04:08:14 +00:00
parent d5160bc866
commit 28a7b78ae4
8 changed files with 494 additions and 3 deletions
@@ -851,6 +851,8 @@ function applyBotName(){
      if(typeof applyLocaleToDOM==='function')applyLocaleToDOM();
    }
    applyBotName();
+    // TTS: apply enabled state on boot so buttons show/hide correctly (#499)
+    if(typeof _applyTtsEnabled==='function') _applyTtsEnabled(localStorage.getItem('hermes-tts-enabled')==='true');
  }catch(e){
    window._sendKey='enter';
    window._showTokenUsage=false;
@@ -871,6 +873,7 @@ function applyBotName(){
      if(typeof applyLocaleToDOM==='function')applyLocaleToDOM();
    }
    applyBotName();
+    if(typeof _applyTtsEnabled==='function') _applyTtsEnabled(localStorage.getItem('hermes-tts-enabled')==='true');
  }
  // Non-blocking update check (fire-and-forget, once per tab session)
  // ?test_updates=1 in URL forces banner display for testing (bypasses sessionStorage guards)
@@ -441,6 +441,17 @@ const LOCALES = {
    // Settings detail
    settings_label_sound: 'Notification sound',
    settings_desc_sound: 'Play a sound when the assistant finishes a response.',
+    // TTS (#499)
+    tts_listen: 'Listen',
+    tts_not_supported: 'Speech synthesis not supported in this browser.',
+    settings_label_tts: 'Text-to-Speech for responses',
+    settings_desc_tts: "Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.",
+    settings_label_tts_auto_read: 'Auto-read responses aloud',
+    settings_desc_tts_auto_read: 'Automatically speak each new assistant response when it finishes. Pauses when you start typing.',
+    settings_label_tts_voice: 'Voice',
+    settings_desc_tts_voice: "Preferred voice. Populated from your browser's available voices.",
+    settings_label_tts_rate: 'Speech rate',
+    settings_label_tts_pitch: 'Speech pitch',
    settings_label_notifications: 'Browser notifications',
    settings_desc_notifications: 'Show a system notification when a response completes while the app is in the background.',
    settings_desc_token_usage: 'Displays input/output token count below each assistant reply. Also toggled with /usage.',
@@ -1511,6 +1522,17 @@ const LOCALES = {
    html_error: 'Не удалось загрузить предпросмотр HTML',
    html_open_full: 'Открыть на всю страницу',
    html_sandbox_label: 'Предпросмотр HTML',
+    // TTS (#499)
+    tts_listen: 'Прослушать',
+    tts_not_supported: 'Синтез речи не поддерживается',
+    settings_label_tts: 'Синтез речи для ответов',
+    settings_desc_tts: 'Показать кнопку динамика на сообщениях ассистента',
+    settings_label_tts_auto_read: 'Авто-чтение ответов',
+    settings_desc_tts_auto_read: 'Автоматически озвучивать ответы ассистента',
+    settings_label_tts_voice: 'Голос',
+    settings_desc_tts_voice: 'Выберите голос для синтеза речи',
+    settings_label_tts_rate: 'Скорость речи',
+    settings_label_tts_pitch: 'Тон речи',
  },

  es: {
@@ -2231,6 +2253,17 @@ const LOCALES = {
    html_error: 'Error al cargar la vista previa de HTML',
    html_open_full: 'Abrir página completa',
    html_sandbox_label: 'Vista previa de HTML',
+    // TTS (#499)
+    tts_listen: 'Escuchar',
+    tts_not_supported: 'Síntesis de voz no disponible',
+    settings_label_tts: 'Texto a voz para respuestas',
+    settings_desc_tts: 'Mostrar botón de altavoz en mensajes del asistente',
+    settings_label_tts_auto_read: 'Leer respuestas automáticamente',
+    settings_desc_tts_auto_read: 'Leer en voz alta las respuestas del asistente automáticamente',
+    settings_label_tts_voice: 'Voz',
+    settings_desc_tts_voice: 'Seleccionar voz para síntesis de voz',
+    settings_label_tts_rate: 'Velocidad de voz',
+    settings_label_tts_pitch: 'Tono de voz',
  },

  de: {
@@ -2955,7 +2988,18 @@ const LOCALES = {
    html_error: 'HTML-Vorschau konnte nicht geladen werden',
    html_open_full: 'Vollständige Seite öffnen',
    html_sandbox_label: 'HTML-Vorschau',
-},
+    // TTS (#499)
+    tts_listen: 'Anhören',
+    tts_not_supported: 'Sprachsynthese nicht verfügbar',
+    settings_label_tts: 'Text-zu-Sprache für Antworten',
+    settings_desc_tts: 'Lautsprecher-Symbol auf Assistenten-Nachrichten anzeigen',
+    settings_label_tts_auto_read: 'Antworten automatisch vorlesen',
+    settings_desc_tts_auto_read: 'Assistenten-Antworten automatisch vorlesen',
+    settings_label_tts_voice: 'Stimme',
+    settings_desc_tts_voice: 'Stimme für Sprachsynthese auswählen',
+    settings_label_tts_rate: 'Sprechgeschwindigkeit',
+    settings_label_tts_pitch: 'Tonhöhe',
+  },

  zh: {
    _lang: 'zh',
@@ -3673,6 +3717,17 @@ const LOCALES = {
    excalidraw_empty: '空图表',
    excalidraw_render_error: '渲染图表失败',
    excalidraw_simplified: '简化 SVG 预览 — 与 Excalidraw 画布不完全相同',
+    // TTS (#499)
+    tts_listen: '收听',
+    tts_not_supported: '语音合成不可用',
+    settings_label_tts: '回复语音合成',
+    settings_desc_tts: '在助手消息上显示扬声器按钮',
+    settings_label_tts_auto_read: '自动朗读回复',
+    settings_desc_tts_auto_read: '自动朗读助手回复',
+    settings_label_tts_voice: '语音',
+    settings_desc_tts_voice: '选择语音合成声音',
+    settings_label_tts_rate: '语速',
+    settings_label_tts_pitch: '音调',
  },

  // Traditional Chinese (zh-Hant)
@@ -4499,6 +4554,17 @@ const LOCALES = {
    html_error: 'HTML 預覽載入失敗',
    html_open_full: '開啟完整頁面',
    html_sandbox_label: 'HTML 預覽',
+    // TTS (#499)
+    tts_listen: '收聽',
+    tts_not_supported: '語音合成無法使用',
+    settings_label_tts: '回覆語音合成',
+    settings_desc_tts: '在助手訊息上顯示喇叭按鈕',
+    settings_label_tts_auto_read: '自動朗讀回覆',
+    settings_desc_tts_auto_read: '自動朗讀助手回覆',
+    settings_label_tts_voice: '語音',
+    settings_desc_tts_voice: '選擇語音合成聲音',
+    settings_label_tts_rate: '語速',
+    settings_label_tts_pitch: '音調',
  },


@@ -5138,7 +5204,18 @@ const LOCALES = {
    approval_skip: 'Pular',
    approval_skip_title: 'Pular este prompt de aprovação',
    approval_skip_all: 'Pular todos',
-    approval_skip_all_title: 'Pular todos prompts de aprovação nesta sessão'
+    approval_skip_all_title: 'Pular todos prompts de aprovação nesta sessão',
+    // TTS (#499)
+    tts_listen: 'Ouvir',
+    tts_not_supported: 'Síntese de voz não disponível',
+    settings_label_tts: 'Texto para voz nas respostas',
+    settings_desc_tts: 'Mostrar botão de alto-falante nas mensagens do assistente',
+    settings_label_tts_auto_read: 'Ler respostas automaticamente',
+    settings_desc_tts_auto_read: 'Ler automaticamente as respostas do assistente',
+    settings_label_tts_voice: 'Voz',
+    settings_desc_tts_voice: 'Selecionar voz para síntese de voz',
+    settings_label_tts_rate: 'Velocidade da fala',
+    settings_label_tts_pitch: 'Tom da fala',
  },
  ko: {
    _lang: 'ko',
@@ -5911,6 +5988,17 @@ const LOCALES = {
    excalidraw_empty: '빈 다이어그램',
    excalidraw_render_error: '다이어그램 렌더링 실패',
    excalidraw_simplified: '단순화된 SVG 미리보기 — Excalidraw 캔버스와 픽셀 동일하지 않음',
+    // TTS (#499)
+    tts_listen: '듣기',
+    tts_not_supported: '음성 합성을 사용할 수 없습니다',
+    settings_label_tts: '답변 음성 합성',
+    settings_desc_tts: '도움말 메시지에 스피커 버튼 표시',
+    settings_label_tts_auto_read: '답변 자동 읽기',
+    settings_desc_tts_auto_read: '도움말 답변을 자동으로 읽어줌',
+    settings_label_tts_voice: '음성',
+    settings_desc_tts_voice: '음성 합성 음성 선택',
+    settings_label_tts_rate: '말 속도',
+    settings_label_tts_pitch: '말 톤',
  },
 };

@@ -683,6 +683,41 @@
              </label>
              <div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_sound">Play a sound when the assistant finishes a response.</div>
            </div>
+            <div class="settings-field">
+              <label style="display:flex;align-items:center;gap:8px;cursor:pointer">
+                <input type="checkbox" id="settingsTtsEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
+                <span data-i18n="settings_label_tts">Text-to-Speech for responses</span>
+              </label>
+              <div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts">Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.</div>
+            </div>
+            <div class="settings-field">
+              <label style="display:flex;align-items:center;gap:8px;cursor:pointer">
+                <input type="checkbox" id="settingsTtsAutoRead" style="width:15px;height:15px;accent-color:var(--accent)">
+                <span data-i18n="settings_label_tts_auto_read">Auto-read responses aloud</span>
+              </label>
+              <div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_auto_read">Automatically speak each new assistant response when it finishes. Pauses when you start typing.</div>
+            </div>
+            <div class="settings-field">
+              <label for="settingsTtsVoice" data-i18n="settings_label_tts_voice">Voice</label>
+              <select id="settingsTtsVoice" style="width:100%;padding:8px;background:var(--code-bg);color:var(--text);border:1px solid var(--border2);border-radius:6px">
+                <option value="">Default system voice</option>
+              </select>
+              <div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_voice">Preferred voice. Populated from your browser's available voices.</div>
+            </div>
+            <div class="settings-field">
+              <label for="settingsTtsRate" data-i18n="settings_label_tts_rate">Speech rate</label>
+              <div style="display:flex;align-items:center;gap:12px;margin-top:4px">
+                <input type="range" id="settingsTtsRate" min="0.5" max="2" step="0.1" value="1" style="flex:1;accent-color:var(--accent)">
+                <span id="settingsTtsRateValue" style="font-size:12px;color:var(--muted);min-width:32px;text-align:right">1.0x</span>
+              </div>
+            </div>
+            <div class="settings-field">
+              <label for="settingsTtsPitch" data-i18n="settings_label_tts_pitch">Speech pitch</label>
+              <div style="display:flex;align-items:center;gap:12px;margin-top:4px">
+                <input type="range" id="settingsTtsPitch" min="0" max="2" step="0.1" value="1" style="flex:1;accent-color:var(--accent)">
+                <span id="settingsTtsPitchValue" style="font-size:12px;color:var(--muted);min-width:32px;text-align:right">1.0</span>
+              </div>
+            </div>
            <div class="settings-field">
              <label style="display:flex;align-items:center;gap:8px;cursor:pointer">
                <input type="checkbox" id="settingsNotificationsEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
@@ -34,6 +34,10 @@ function _markActiveSessionViewedOnReturn() {

 document.addEventListener('visibilitychange', _markActiveSessionViewedOnReturn);
 window.addEventListener('focus', _markActiveSessionViewedOnReturn);
+// TTS: pause speech synthesis when user focuses the composer (#499)
+const _msgEl=document.getElementById('msg');
+if(_msgEl) _msgEl.addEventListener('focus', ()=>{ if('speechSynthesis' in window && speechSynthesis.speaking) speechSynthesis.pause(); });
+if(_msgEl) _msgEl.addEventListener('blur', ()=>{ if('speechSynthesis' in window && speechSynthesis.paused) speechSynthesis.resume(); });

 async function send(){
  const text=$('msg').value.trim();
@@ -846,6 +850,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
        if(!S.messages.some(m=>m.role==='assistant'&&String(m.content||'').trim())&&!assistantText){removeThinking();S.messages.push({role:'assistant',content:'**No response received.** Check your API key and model selection.'});}
        if(isSessionViewed) _markSessionViewed(completedSid, completedSession.message_count ?? S.messages.length);
        syncTopbar();renderMessages();loadDir('.');
+        // TTS auto-read: speak the last assistant response if enabled (#499)
+        if(typeof autoReadLastAssistant==='function') setTimeout(()=>autoReadLastAssistant(), 300);
      }
      _queueDrainSid=activeSid;renderSessionList();setBusy(false);setStatus('');
      setComposerStatus('');
@@ -2603,6 +2603,13 @@ function _markSettingsDirty(){
  _settingsDirty = true;
 }

+// Apply TTS enabled state: show/hide TTS buttons on all assistant messages
+function _applyTtsEnabled(enabled){
+  document.querySelectorAll('.msg-tts-btn').forEach(btn=>{
+    btn.style.display=enabled?'':'none';
+  });
+}
+
 function _appearancePayloadFromUi(){
  return {
    theme: ($('settingsTheme')||{}).value || localStorage.getItem('hermes-theme') || 'dark',
@@ -2781,6 +2788,46 @@ async function loadSettingsPanel(){
    if(updateCb){updateCb.checked=settings.check_for_updates!==false;updateCb.addEventListener('change',_markSettingsDirty,{once:false});}
    const soundCb=$('settingsSoundEnabled');
    if(soundCb){soundCb.checked=!!settings.sound_enabled;soundCb.addEventListener('change',_markSettingsDirty,{once:false});}
+    // TTS settings (localStorage-only, no server round-trip needed)
+    const ttsEnabledCb=$('settingsTtsEnabled');
+    if(ttsEnabledCb){ttsEnabledCb.checked=localStorage.getItem('hermes-tts-enabled')==='true';ttsEnabledCb.onchange=function(){localStorage.setItem('hermes-tts-enabled',this.checked?'true':'false');_applyTtsEnabled(this.checked);};}
+    const ttsAutoReadCb=$('settingsTtsAutoRead');
+    if(ttsAutoReadCb){ttsAutoReadCb.checked=localStorage.getItem('hermes-tts-auto-read')==='true';ttsAutoReadCb.onchange=function(){localStorage.setItem('hermes-tts-auto-read',this.checked?'true':'false');};}
+    // Populate voice selector from speechSynthesis
+    const ttsVoiceSel=$('settingsTtsVoice');
+    if(ttsVoiceSel&&'speechSynthesis' in window){
+      const populateVoices=()=>{
+        const voices=speechSynthesis.getVoices();
+        const current=localStorage.getItem('hermes-tts-voice')||'';
+        ttsVoiceSel.innerHTML='<option value="">Default system voice</option>';
+        voices.forEach(v=>{
+          const opt=document.createElement('option');
+          opt.value=v.name;opt.textContent=v.name+(v.lang?' ('+v.lang+')':'');
+          if(v.name===current) opt.selected=true;
+          ttsVoiceSel.appendChild(opt);
+        });
+      };
+      populateVoices();
+      speechSynthesis.addEventListener('voiceschanged',populateVoices,{once:true});
+      ttsVoiceSel.onchange=function(){localStorage.setItem('hermes-tts-voice',this.value);};
+    }
+    // TTS rate/pitch sliders
+    const ttsRateSlider=$('settingsTtsRate');
+    const ttsRateValue=$('settingsTtsRateValue');
+    if(ttsRateSlider){
+      const savedRate=localStorage.getItem('hermes-tts-rate');
+      ttsRateSlider.value=savedRate||'1';
+      if(ttsRateValue) ttsRateValue.textContent=parseFloat(ttsRateSlider.value).toFixed(1)+'x';
+      ttsRateSlider.oninput=function(){if(ttsRateValue)ttsRateValue.textContent=parseFloat(this.value).toFixed(1)+'x';localStorage.setItem('hermes-tts-rate',this.value);};
+    }
+    const ttsPitchSlider=$('settingsTtsPitch');
+    const ttsPitchValue=$('settingsTtsPitchValue');
+    if(ttsPitchSlider){
+      const savedPitch=localStorage.getItem('hermes-tts-pitch');
+      ttsPitchSlider.value=savedPitch||'1';
+      if(ttsPitchValue) ttsPitchValue.textContent=parseFloat(ttsPitchSlider.value).toFixed(1);
+      ttsPitchSlider.oninput=function(){if(ttsPitchValue)ttsPitchValue.textContent=parseFloat(this.value).toFixed(1);localStorage.setItem('hermes-tts-pitch',this.value);};
+    }
    const notifCb=$('settingsNotificationsEnabled');
    if(notifCb){notifCb.checked=!!settings.notifications_enabled;notifCb.addEventListener('change',_markSettingsDirty,{once:false});}
    // show_thinking has no settings panel checkbox — controlled via /reasoning show|hide
@@ -1287,6 +1287,10 @@
 .msg-row:hover .msg-actions{opacity:1;}
 .msg-action-btn{background:none;border:none;color:var(--muted);cursor:pointer;font-size:13px;padding:2px 5px;border-radius:5px;transition:color .12s,background .12s;line-height:1;}
 .msg-action-btn:hover{color:var(--accent-text);background:var(--accent-bg);}
+/* TTS speaker button: hidden by default, shown when TTS enabled */
+.msg-tts-btn{display:none;}
+.msg-tts-btn[data-speaking="1"]{color:var(--accent);animation:tts-pulse 1s ease-in-out infinite;}
+@keyframes tts-pulse{0%,100%{opacity:1}50%{opacity:.5}}

 /* ── Edit message inline ── */
 .msg-edit-area{width:100%;background:rgba(255,255,255,.05);border:1px solid var(--accent-bg);border-radius:8px;color:var(--text);padding:10px 12px;font-size:14px;font-family:inherit;line-height:1.6;resize:none;outline:none;min-height:60px;box-sizing:border-box;box-shadow:0 0 0 3px var(--accent-bg);margin-top:4px;}
@@ -2033,6 +2033,118 @@ function copyMsg(btn){
  }).catch(()=>showToast(t('copy_failed')));
 }

+// ── TTS: Text-to-Speech via Web Speech API (#499) ──
+// Strips markdown, code blocks, and MEDIA: paths for clean speech output.
+function _stripForTTS(text){
+  // Remove code blocks entirely (```)
+  text=text.replace(/```[\s\S]*?```/g,' ');
+  // Remove inline code
+  text=text.replace(/`[^`]+`/g,' ');
+  // Strip bold/italic
+  text=text.replace(/\*\*(.+?)\*\*/g,'$1');
+  text=text.replace(/\*(.+?)\*/g,'$1');
+  text=text.replace(/__(.+?)__/g,'$1');
+  text=text.replace(/_(.+?)_/g,'$1');
+  // Strip headings
+  text=text.replace(/^#{1,6}\s+/gm,'');
+  // Strip links, keep text
+  text=text.replace(/\[([^\]]+)\]\([^)]+\)/g,'$1');
+  // Replace MEDIA: paths with a simple label
+  text=text.replace(/MEDIA:[^\s]+/g,'a file');
+  // Strip HTML tags that may leak through markdown
+  text=text.replace(/<[^>]+>/g,' ');
+  // Collapse whitespace
+  text=text.replace(/\s+/g,' ').trim();
+  return text;
+}
+
+let _ttsSpeaking=false;
+let _ttsCurrentUtterance=null;
+
+function speakMessage(btn){
+  if(!('speechSynthesis' in window)){
+    showToast(t('tts_not_supported')||'Speech synthesis not supported in this browser.');
+    return;
+  }
+  // If already speaking this message, stop
+  if(btn&&btn.dataset.speaking==='1'){
+    stopTTS();
+    return;
+  }
+  // Stop any current speech
+  stopTTS();
+
+  const row=btn?btn.closest('[data-raw-text]'):null;
+  const text=row?row.dataset.rawText:'';
+  if(!text) return;
+
+  const clean=_stripForTTS(text);
+  if(!clean) return;
+
+  const utter=new SpeechSynthesisUtterance(clean);
+
+  // Apply saved voice preference
+  const savedVoice=localStorage.getItem('hermes-tts-voice');
+  const voices=speechSynthesis.getVoices();
+  if(savedVoice&&voices.length){
+    const match=voices.find(v=>v.name===savedVoice);
+    if(match) utter.voice=match;
+  }
+
+  // Apply saved rate/pitch
+  const savedRate=parseFloat(localStorage.getItem('hermes-tts-rate'));
+  if(!isNaN(savedRate)) utter.rate= Math.min(2,Math.max(0.5,savedRate));
+  const savedPitch=parseFloat(localStorage.getItem('hermes-tts-pitch'));
+  if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
+
+  _ttsCurrentUtterance=utter;
+  _ttsSpeaking=true;
+  if(btn) btn.dataset.speaking='1';
+
+  utter.onend=()=>{ _ttsSpeaking=false; _ttsCurrentUtterance=null; if(btn) btn.dataset.speaking='0'; };
+  utter.onerror=()=>{ _ttsSpeaking=false; _ttsCurrentUtterance=null; if(btn) btn.dataset.speaking='0'; };
+
+  speechSynthesis.speak(utter);
+}
+
+function stopTTS(){
+  if('speechSynthesis' in window){
+    speechSynthesis.cancel();
+  }
+  _ttsSpeaking=false;
+  _ttsCurrentUtterance=null;
+  // Reset all speaking buttons
+  document.querySelectorAll('[data-speaking="1"]').forEach(btn=>{ btn.dataset.speaking='0'; });
+}
+
+function autoReadLastAssistant(){
+  if(!('speechSynthesis' in window)) return;
+  const pref=localStorage.getItem('hermes-tts-auto-read');
+  if(pref!=='true') return;
+  // Find the last assistant message segment in the DOM
+  const rows=document.querySelectorAll('.msg-row[data-role="assistant"], .assistant-segment[data-raw-text]');
+  if(!rows.length) return;
+  const last=rows[rows.length-1];
+  const text=last.dataset.rawText||'';
+  if(!text.trim()) return;
+  const clean=_stripForTTS(text);
+  if(!clean) return;
+
+  const utter=new SpeechSynthesisUtterance(clean);
+  const savedVoice=localStorage.getItem('hermes-tts-voice');
+  const voices=speechSynthesis.getVoices();
+  if(savedVoice&&voices.length){
+    const match=voices.find(v=>v.name===savedVoice);
+    if(match) utter.voice=match;
+  }
+  const savedRate=parseFloat(localStorage.getItem('hermes-tts-rate'));
+  if(!isNaN(savedRate)) utter.rate=Math.min(2,Math.max(0.5,savedRate));
+  const savedPitch=parseFloat(localStorage.getItem('hermes-tts-pitch'));
+  if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
+
+  speechSynthesis.speak(utter);
+}
+
 // ── Reconnect banner (B4/B5: reload resilience) ──
 const INFLIGHT_KEY = 'hermes-webui-inflight'; // localStorage key for in-flight session tracking
 const INFLIGHT_STATE_KEY = 'hermes-webui-inflight-state'; // localStorage snapshots for mid-stream reload recovery
@@ -2864,6 +2976,7 @@ function renderMessages(){
    const undoBtn  = isLastAssistant ? `<button class="msg-action-btn" title="${t('undo_exchange')}" onclick="undoLastExchange()">${li('undo',13)}</button>` : '';
    const retryBtn = isLastAssistant ? `<button class="msg-action-btn" title="${t('regenerate')}" onclick="regenerateResponse(this)">${li('rotate-ccw',13)}</button>` : '';
    const copyBtn  = `<button class="msg-copy-btn msg-action-btn" title="${t('copy')}" onclick="copyMsg(this)">${li('copy',13)}</button>`;
+    const ttsBtn   = !isUser ? `<button class="msg-action-btn msg-tts-btn" title="${t('tts_listen')||'Listen'}" onclick="speakMessage(this)">${li('volume-2',13)}</button>` : '';
    const tsVal=m._ts||m.timestamp;
    // _formatInServerTz handles fractional-hour offsets (India +0530 etc.)
    // correctly via offset arithmetic; bare toLocaleString is the browser-tz fallback.
@@ -2871,7 +2984,7 @@ function renderMessages(){
    const tsTitle=tsVal?(_fmtSv?_fmtSv(new Date(tsVal*1000),{}):new Date(tsVal*1000).toLocaleString()):'';
    const tsTime=_formatMessageFooterTimestamp(tsVal);
    const timeHtml = tsTime ? `<span class="msg-time" title="${esc(tsTitle)}">${tsTime}</span>` : '';
-    const footHtml = `<div class="msg-foot">${timeHtml}<span class="msg-actions">${editBtn}${copyBtn}${retryBtn}</span></div>`;
+    const footHtml = `<div class="msg-foot">${timeHtml}<span class="msg-actions">${editBtn}${ttsBtn}${copyBtn}${retryBtn}</span></div>`;

    if(_isContextCompactionMessage(m)){
      if(compressionState || referenceNode){
@@ -0,0 +1,195 @@
+"""
+Tests for #499: TTS playback of agent responses via Web Speech API.
+
+Verifies that TTS utility functions, speaker button rendering, and
+settings controls are present in the WebUI codebase.
+"""
+import os
+import re
+
+STATIC_DIR = os.path.join(os.path.dirname(__file__), '..', 'static')
+
+
+def _read(filename):
+    return open(os.path.join(STATIC_DIR, filename), encoding='utf-8').read()
+
+
+class TestTtsUtilityFunctions:
+    """TTS core functions exist in ui.js."""
+
+    def test_strip_for_tts_exists(self):
+        src = _read('ui.js')
+        assert 'function _stripForTTS(' in src, \
+            "_stripForTTS function not found in ui.js"
+
+    def test_speak_message_exists(self):
+        src = _read('ui.js')
+        assert 'function speakMessage(' in src, \
+            "speakMessage function not found in ui.js"
+
+    def test_stop_tts_exists(self):
+        src = _read('ui.js')
+        assert 'function stopTTS(' in src, \
+            "stopTTS function not found in ui.js"
+
+    def test_auto_read_exists(self):
+        src = _read('ui.js')
+        assert 'function autoReadLastAssistant(' in src, \
+            "autoReadLastAssistant function not found in ui.js"
+
+    def test_strip_code_blocks(self):
+        """_stripForTTS must remove ``` code blocks."""
+        src = _read('ui.js')
+        assert re.search(r'_stripForTTS.*```', src, re.DOTALL), \
+            "_stripForTTS must handle fenced code blocks"
+
+    def test_strip_media_paths(self):
+        """_stripForTTS must replace MEDIA: paths."""
+        src = _read('ui.js')
+        assert 'MEDIA:' in src and 'a file' in src, \
+            "_stripForTTS must replace MEDIA: paths"
+
+    def test_uses_speech_synthesis(self):
+        """speakMessage must use window.speechSynthesis."""
+        src = _read('ui.js')
+        assert 'SpeechSynthesisUtterance' in src, \
+            "speakMessage must create SpeechSynthesisUtterance"
+        assert 'speechSynthesis.speak' in src, \
+            "speakMessage must call speechSynthesis.speak"
+
+
+class TestTtsSpeakerButton:
+    """Speaker button is rendered on assistant messages."""
+
+    def test_tts_button_rendered(self):
+        """ttsBtn must be generated for non-user messages."""
+        src = _read('ui.js')
+        assert 'msg-tts-btn' in src, \
+            "TTS button class not found in ui.js"
+
+    def test_tts_button_not_on_user_messages(self):
+        """ttsBtn must only be added for non-user (assistant) messages."""
+        src = _read('ui.js')
+        # Find the ttsBtn definition — it should have !isUser guard
+        tts_line = [l for l in src.splitlines() if 'msg-tts-btn' in l][0]
+        assert '!isUser' in tts_line or 'isUser' in tts_line, \
+            "TTS button should have user-check guard"
+
+    def test_tts_button_in_footer(self):
+        """ttsBtn must be included in the msg-actions span."""
+        src = _read('ui.js')
+        # The footHtml line should include ttsBtn
+        foot_lines = [l for l in src.splitlines() if 'footHtml' in l and 'msg-actions' in l]
+        assert any('ttsBtn' in l for l in foot_lines), \
+            "ttsBtn not included in footHtml msg-actions"
+
+    def test_tts_button_uses_volume_icon(self):
+        """Speaker button should use volume-2 icon."""
+        src = _read('ui.js')
+        tts_line = [l for l in src.splitlines() if 'msg-tts-btn' in l][0]
+        assert 'volume-2' in tts_line, \
+            "TTS button should use volume-2 icon"
+
+
+class TestTtsSettings:
+    """TTS settings controls exist in the HTML and are wired in panels.js."""
+
+    def test_tts_enabled_checkbox(self):
+        src = _read('index.html')
+        assert 'settingsTtsEnabled' in src, \
+            "TTS enabled checkbox not found in index.html"
+
+    def test_tts_auto_read_checkbox(self):
+        src = _read('index.html')
+        assert 'settingsTtsAutoRead' in src, \
+            "TTS auto-read checkbox not found in index.html"
+
+    def test_tts_voice_selector(self):
+        src = _read('index.html')
+        assert 'settingsTtsVoice' in src, \
+            "TTS voice selector not found in index.html"
+
+    def test_tts_rate_slider(self):
+        src = _read('index.html')
+        assert 'settingsTtsRate' in src, \
+            "TTS rate slider not found in index.html"
+
+    def test_tts_pitch_slider(self):
+        src = _read('index.html')
+        assert 'settingsTtsPitch' in src, \
+            "TTS pitch slider not found in index.html"
+
+    def test_tts_settings_wired_in_panels(self):
+        """TTS settings must be initialized in loadSettingsPanel."""
+        src = _read('panels.js')
+        assert 'settingsTtsEnabled' in src, \
+            "TTS enabled setting not wired in panels.js"
+        assert '_applyTtsEnabled' in src, \
+            "_applyTtsEnabled not called in panels.js"
+
+    def test_apply_tts_enabled_function(self):
+        """_applyTtsEnabled must toggle msg-tts-btn display."""
+        src = _read('panels.js')
+        assert 'function _applyTtsEnabled(' in src, \
+            "_applyTtsEnabled function not found in panels.js"
+
+
+class TestTtsI18n:
+    """TTS i18n keys exist in the English locale."""
+
+    def test_tts_listen_key(self):
+        src = _read('i18n.js')
+        assert "tts_listen:" in src, \
+            "tts_listen key not found in i18n.js"
+
+    def test_tts_not_supported_key(self):
+        src = _read('i18n.js')
+        assert "tts_not_supported:" in src, \
+            "tts_not_supported key not found in i18n.js"
+
+    def test_tts_settings_keys(self):
+        src = _read('i18n.js')
+        for key in ['settings_label_tts', 'settings_label_tts_auto_read',
+                     'settings_label_tts_voice', 'settings_label_tts_rate',
+                     'settings_label_tts_pitch']:
+            assert f"{key}:" in src, f"{key} not found in i18n.js"
+
+
+class TestTtsAutoRead:
+    """Auto-read is triggered after SSE done event."""
+
+    def test_auto_read_called_in_messages(self):
+        src = _read('messages.js')
+        assert 'autoReadLastAssistant' in src, \
+            "autoReadLastAssistant not called in messages.js"
+
+    def test_tts_pause_on_composer_focus(self):
+        """Speech should pause when user focuses the composer."""
+        src = _read('messages.js')
+        assert 'speechSynthesis.pause' in src, \
+            "speechSynthesis.pause not called in messages.js"
+        assert 'speechSynthesis.resume' in src, \
+            "speechSynthesis.resume not called in messages.js"
+
+
+class TestTtsBoot:
+    """TTS enabled state is applied on page load."""
+
+    def test_apply_tts_on_boot(self):
+        src = _read('boot.js')
+        assert '_applyTtsEnabled' in src, \
+            "_applyTtsEnabled not called in boot.js"
+
+
+class TestTtsStyles:
+    """TTS CSS styles exist."""
+
+    def test_tts_button_hidden_default(self):
+        src = _read('style.css')
+        assert '.msg-tts-btn' in src, \
+            ".msg-tts-btn CSS class not found in style.css"
+
+    def test_tts_pulse_animation(self):
+        src = _read('style.css')
+        assert 'tts-pulse' in src, \
+            "tts-pulse animation not found in style.css"