Squash merge PR #1303: feat(ui): TTS playback of agent responses (#499)

Adds Web Speech API based text-to-speech for agent responses. Per-message
speaker button in the action row, auto-read toggle for last assistant
response, voice/rate/pitch controls in Settings. All TTS preferences
stored in localStorage. Opt-in: TTS hidden by default until enabled in
Settings.

Strips markdown, code blocks, and MEDIA: paths via _stripForTTS() before
speaking. Pauses synthesis when composer is focused.

26 regression tests in tests/test_499_tts_playback.py. i18n keys added
for all 7 locales (en, ru, es, de, zh, zh-Hant, pt, ko).

Closes #499.

Co-authored-by: Felipe Linhares <fecolinhares@users.noreply.github.com>
This commit is contained in:
Hermes Agent
2026-04-30 04:08:14 +00:00
parent d5160bc866
commit 28a7b78ae4
8 changed files with 494 additions and 3 deletions
+3
View File
@@ -851,6 +851,8 @@ function applyBotName(){
if(typeof applyLocaleToDOM==='function')applyLocaleToDOM();
}
applyBotName();
// TTS: apply enabled state on boot so buttons show/hide correctly (#499)
if(typeof _applyTtsEnabled==='function') _applyTtsEnabled(localStorage.getItem('hermes-tts-enabled')==='true');
}catch(e){
window._sendKey='enter';
window._showTokenUsage=false;
@@ -871,6 +873,7 @@ function applyBotName(){
if(typeof applyLocaleToDOM==='function')applyLocaleToDOM();
}
applyBotName();
if(typeof _applyTtsEnabled==='function') _applyTtsEnabled(localStorage.getItem('hermes-tts-enabled')==='true');
}
// Non-blocking update check (fire-and-forget, once per tab session)
// ?test_updates=1 in URL forces banner display for testing (bypasses sessionStorage guards)
+90 -2
View File
@@ -441,6 +441,17 @@ const LOCALES = {
// Settings detail
settings_label_sound: 'Notification sound',
settings_desc_sound: 'Play a sound when the assistant finishes a response.',
// TTS (#499)
tts_listen: 'Listen',
tts_not_supported: 'Speech synthesis not supported in this browser.',
settings_label_tts: 'Text-to-Speech for responses',
settings_desc_tts: "Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.",
settings_label_tts_auto_read: 'Auto-read responses aloud',
settings_desc_tts_auto_read: 'Automatically speak each new assistant response when it finishes. Pauses when you start typing.',
settings_label_tts_voice: 'Voice',
settings_desc_tts_voice: "Preferred voice. Populated from your browser's available voices.",
settings_label_tts_rate: 'Speech rate',
settings_label_tts_pitch: 'Speech pitch',
settings_label_notifications: 'Browser notifications',
settings_desc_notifications: 'Show a system notification when a response completes while the app is in the background.',
settings_desc_token_usage: 'Displays input/output token count below each assistant reply. Also toggled with /usage.',
@@ -1511,6 +1522,17 @@ const LOCALES = {
html_error: 'Не удалось загрузить предпросмотр HTML',
html_open_full: 'Открыть на всю страницу',
html_sandbox_label: 'Предпросмотр HTML',
// TTS (#499)
tts_listen: 'Прослушать',
tts_not_supported: 'Синтез речи не поддерживается',
settings_label_tts: 'Синтез речи для ответов',
settings_desc_tts: 'Показать кнопку динамика на сообщениях ассистента',
settings_label_tts_auto_read: 'Авто-чтение ответов',
settings_desc_tts_auto_read: 'Автоматически озвучивать ответы ассистента',
settings_label_tts_voice: 'Голос',
settings_desc_tts_voice: 'Выберите голос для синтеза речи',
settings_label_tts_rate: 'Скорость речи',
settings_label_tts_pitch: 'Тон речи',
},
es: {
@@ -2231,6 +2253,17 @@ const LOCALES = {
html_error: 'Error al cargar la vista previa de HTML',
html_open_full: 'Abrir página completa',
html_sandbox_label: 'Vista previa de HTML',
// TTS (#499)
tts_listen: 'Escuchar',
tts_not_supported: 'Síntesis de voz no disponible',
settings_label_tts: 'Texto a voz para respuestas',
settings_desc_tts: 'Mostrar botón de altavoz en mensajes del asistente',
settings_label_tts_auto_read: 'Leer respuestas automáticamente',
settings_desc_tts_auto_read: 'Leer en voz alta las respuestas del asistente automáticamente',
settings_label_tts_voice: 'Voz',
settings_desc_tts_voice: 'Seleccionar voz para síntesis de voz',
settings_label_tts_rate: 'Velocidad de voz',
settings_label_tts_pitch: 'Tono de voz',
},
de: {
@@ -2955,7 +2988,18 @@ const LOCALES = {
html_error: 'HTML-Vorschau konnte nicht geladen werden',
html_open_full: 'Vollständige Seite öffnen',
html_sandbox_label: 'HTML-Vorschau',
},
// TTS (#499)
tts_listen: 'Anhören',
tts_not_supported: 'Sprachsynthese nicht verfügbar',
settings_label_tts: 'Text-zu-Sprache für Antworten',
settings_desc_tts: 'Lautsprecher-Symbol auf Assistenten-Nachrichten anzeigen',
settings_label_tts_auto_read: 'Antworten automatisch vorlesen',
settings_desc_tts_auto_read: 'Assistenten-Antworten automatisch vorlesen',
settings_label_tts_voice: 'Stimme',
settings_desc_tts_voice: 'Stimme für Sprachsynthese auswählen',
settings_label_tts_rate: 'Sprechgeschwindigkeit',
settings_label_tts_pitch: 'Tonhöhe',
},
zh: {
_lang: 'zh',
@@ -3673,6 +3717,17 @@ const LOCALES = {
excalidraw_empty: '空图表',
excalidraw_render_error: '渲染图表失败',
excalidraw_simplified: '简化 SVG 预览 — 与 Excalidraw 画布不完全相同',
// TTS (#499)
tts_listen: '收听',
tts_not_supported: '语音合成不可用',
settings_label_tts: '回复语音合成',
settings_desc_tts: '在助手消息上显示扬声器按钮',
settings_label_tts_auto_read: '自动朗读回复',
settings_desc_tts_auto_read: '自动朗读助手回复',
settings_label_tts_voice: '语音',
settings_desc_tts_voice: '选择语音合成声音',
settings_label_tts_rate: '语速',
settings_label_tts_pitch: '音调',
},
// Traditional Chinese (zh-Hant)
@@ -4499,6 +4554,17 @@ const LOCALES = {
html_error: 'HTML 預覽載入失敗',
html_open_full: '開啟完整頁面',
html_sandbox_label: 'HTML 預覽',
// TTS (#499)
tts_listen: '收聽',
tts_not_supported: '語音合成無法使用',
settings_label_tts: '回覆語音合成',
settings_desc_tts: '在助手訊息上顯示喇叭按鈕',
settings_label_tts_auto_read: '自動朗讀回覆',
settings_desc_tts_auto_read: '自動朗讀助手回覆',
settings_label_tts_voice: '語音',
settings_desc_tts_voice: '選擇語音合成聲音',
settings_label_tts_rate: '語速',
settings_label_tts_pitch: '音調',
},
@@ -5138,7 +5204,18 @@ const LOCALES = {
approval_skip: 'Pular',
approval_skip_title: 'Pular este prompt de aprovação',
approval_skip_all: 'Pular todos',
approval_skip_all_title: 'Pular todos prompts de aprovação nesta sessão'
approval_skip_all_title: 'Pular todos prompts de aprovação nesta sessão',
// TTS (#499)
tts_listen: 'Ouvir',
tts_not_supported: 'Síntese de voz não disponível',
settings_label_tts: 'Texto para voz nas respostas',
settings_desc_tts: 'Mostrar botão de alto-falante nas mensagens do assistente',
settings_label_tts_auto_read: 'Ler respostas automaticamente',
settings_desc_tts_auto_read: 'Ler automaticamente as respostas do assistente',
settings_label_tts_voice: 'Voz',
settings_desc_tts_voice: 'Selecionar voz para síntese de voz',
settings_label_tts_rate: 'Velocidade da fala',
settings_label_tts_pitch: 'Tom da fala',
},
ko: {
_lang: 'ko',
@@ -5911,6 +5988,17 @@ const LOCALES = {
excalidraw_empty: '빈 다이어그램',
excalidraw_render_error: '다이어그램 렌더링 실패',
excalidraw_simplified: '단순화된 SVG 미리보기 — Excalidraw 캔버스와 픽셀 동일하지 않음',
// TTS (#499)
tts_listen: '듣기',
tts_not_supported: '음성 합성을 사용할 수 없습니다',
settings_label_tts: '답변 음성 합성',
settings_desc_tts: '도움말 메시지에 스피커 버튼 표시',
settings_label_tts_auto_read: '답변 자동 읽기',
settings_desc_tts_auto_read: '도움말 답변을 자동으로 읽어줌',
settings_label_tts_voice: '음성',
settings_desc_tts_voice: '음성 합성 음성 선택',
settings_label_tts_rate: '말 속도',
settings_label_tts_pitch: '말 톤',
},
};
+35
View File
@@ -683,6 +683,41 @@
</label>
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_sound">Play a sound when the assistant finishes a response.</div>
</div>
<div class="settings-field">
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
<input type="checkbox" id="settingsTtsEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
<span data-i18n="settings_label_tts">Text-to-Speech for responses</span>
</label>
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts">Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.</div>
</div>
<div class="settings-field">
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
<input type="checkbox" id="settingsTtsAutoRead" style="width:15px;height:15px;accent-color:var(--accent)">
<span data-i18n="settings_label_tts_auto_read">Auto-read responses aloud</span>
</label>
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_auto_read">Automatically speak each new assistant response when it finishes. Pauses when you start typing.</div>
</div>
<div class="settings-field">
<label for="settingsTtsVoice" data-i18n="settings_label_tts_voice">Voice</label>
<select id="settingsTtsVoice" style="width:100%;padding:8px;background:var(--code-bg);color:var(--text);border:1px solid var(--border2);border-radius:6px">
<option value="">Default system voice</option>
</select>
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_voice">Preferred voice. Populated from your browser's available voices.</div>
</div>
<div class="settings-field">
<label for="settingsTtsRate" data-i18n="settings_label_tts_rate">Speech rate</label>
<div style="display:flex;align-items:center;gap:12px;margin-top:4px">
<input type="range" id="settingsTtsRate" min="0.5" max="2" step="0.1" value="1" style="flex:1;accent-color:var(--accent)">
<span id="settingsTtsRateValue" style="font-size:12px;color:var(--muted);min-width:32px;text-align:right">1.0x</span>
</div>
</div>
<div class="settings-field">
<label for="settingsTtsPitch" data-i18n="settings_label_tts_pitch">Speech pitch</label>
<div style="display:flex;align-items:center;gap:12px;margin-top:4px">
<input type="range" id="settingsTtsPitch" min="0" max="2" step="0.1" value="1" style="flex:1;accent-color:var(--accent)">
<span id="settingsTtsPitchValue" style="font-size:12px;color:var(--muted);min-width:32px;text-align:right">1.0</span>
</div>
</div>
<div class="settings-field">
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
<input type="checkbox" id="settingsNotificationsEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
+6
View File
@@ -34,6 +34,10 @@ function _markActiveSessionViewedOnReturn() {
document.addEventListener('visibilitychange', _markActiveSessionViewedOnReturn);
window.addEventListener('focus', _markActiveSessionViewedOnReturn);
// TTS: pause speech synthesis when user focuses the composer (#499)
const _msgEl=document.getElementById('msg');
if(_msgEl) _msgEl.addEventListener('focus', ()=>{ if('speechSynthesis' in window && speechSynthesis.speaking) speechSynthesis.pause(); });
if(_msgEl) _msgEl.addEventListener('blur', ()=>{ if('speechSynthesis' in window && speechSynthesis.paused) speechSynthesis.resume(); });
async function send(){
const text=$('msg').value.trim();
@@ -846,6 +850,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
if(!S.messages.some(m=>m.role==='assistant'&&String(m.content||'').trim())&&!assistantText){removeThinking();S.messages.push({role:'assistant',content:'**No response received.** Check your API key and model selection.'});}
if(isSessionViewed) _markSessionViewed(completedSid, completedSession.message_count ?? S.messages.length);
syncTopbar();renderMessages();loadDir('.');
// TTS auto-read: speak the last assistant response if enabled (#499)
if(typeof autoReadLastAssistant==='function') setTimeout(()=>autoReadLastAssistant(), 300);
}
_queueDrainSid=activeSid;renderSessionList();setBusy(false);setStatus('');
setComposerStatus('');
+47
View File
@@ -2603,6 +2603,13 @@ function _markSettingsDirty(){
_settingsDirty = true;
}
// Apply TTS enabled state: show/hide TTS buttons on all assistant messages
function _applyTtsEnabled(enabled){
document.querySelectorAll('.msg-tts-btn').forEach(btn=>{
btn.style.display=enabled?'':'none';
});
}
function _appearancePayloadFromUi(){
return {
theme: ($('settingsTheme')||{}).value || localStorage.getItem('hermes-theme') || 'dark',
@@ -2781,6 +2788,46 @@ async function loadSettingsPanel(){
if(updateCb){updateCb.checked=settings.check_for_updates!==false;updateCb.addEventListener('change',_markSettingsDirty,{once:false});}
const soundCb=$('settingsSoundEnabled');
if(soundCb){soundCb.checked=!!settings.sound_enabled;soundCb.addEventListener('change',_markSettingsDirty,{once:false});}
// TTS settings (localStorage-only, no server round-trip needed)
const ttsEnabledCb=$('settingsTtsEnabled');
if(ttsEnabledCb){ttsEnabledCb.checked=localStorage.getItem('hermes-tts-enabled')==='true';ttsEnabledCb.onchange=function(){localStorage.setItem('hermes-tts-enabled',this.checked?'true':'false');_applyTtsEnabled(this.checked);};}
const ttsAutoReadCb=$('settingsTtsAutoRead');
if(ttsAutoReadCb){ttsAutoReadCb.checked=localStorage.getItem('hermes-tts-auto-read')==='true';ttsAutoReadCb.onchange=function(){localStorage.setItem('hermes-tts-auto-read',this.checked?'true':'false');};}
// Populate voice selector from speechSynthesis
const ttsVoiceSel=$('settingsTtsVoice');
if(ttsVoiceSel&&'speechSynthesis' in window){
const populateVoices=()=>{
const voices=speechSynthesis.getVoices();
const current=localStorage.getItem('hermes-tts-voice')||'';
ttsVoiceSel.innerHTML='<option value="">Default system voice</option>';
voices.forEach(v=>{
const opt=document.createElement('option');
opt.value=v.name;opt.textContent=v.name+(v.lang?' ('+v.lang+')':'');
if(v.name===current) opt.selected=true;
ttsVoiceSel.appendChild(opt);
});
};
populateVoices();
speechSynthesis.addEventListener('voiceschanged',populateVoices,{once:true});
ttsVoiceSel.onchange=function(){localStorage.setItem('hermes-tts-voice',this.value);};
}
// TTS rate/pitch sliders
const ttsRateSlider=$('settingsTtsRate');
const ttsRateValue=$('settingsTtsRateValue');
if(ttsRateSlider){
const savedRate=localStorage.getItem('hermes-tts-rate');
ttsRateSlider.value=savedRate||'1';
if(ttsRateValue) ttsRateValue.textContent=parseFloat(ttsRateSlider.value).toFixed(1)+'x';
ttsRateSlider.oninput=function(){if(ttsRateValue)ttsRateValue.textContent=parseFloat(this.value).toFixed(1)+'x';localStorage.setItem('hermes-tts-rate',this.value);};
}
const ttsPitchSlider=$('settingsTtsPitch');
const ttsPitchValue=$('settingsTtsPitchValue');
if(ttsPitchSlider){
const savedPitch=localStorage.getItem('hermes-tts-pitch');
ttsPitchSlider.value=savedPitch||'1';
if(ttsPitchValue) ttsPitchValue.textContent=parseFloat(ttsPitchSlider.value).toFixed(1);
ttsPitchSlider.oninput=function(){if(ttsPitchValue)ttsPitchValue.textContent=parseFloat(this.value).toFixed(1);localStorage.setItem('hermes-tts-pitch',this.value);};
}
const notifCb=$('settingsNotificationsEnabled');
if(notifCb){notifCb.checked=!!settings.notifications_enabled;notifCb.addEventListener('change',_markSettingsDirty,{once:false});}
// show_thinking has no settings panel checkbox — controlled via /reasoning show|hide
+4
View File
@@ -1287,6 +1287,10 @@
.msg-row:hover .msg-actions{opacity:1;}
.msg-action-btn{background:none;border:none;color:var(--muted);cursor:pointer;font-size:13px;padding:2px 5px;border-radius:5px;transition:color .12s,background .12s;line-height:1;}
.msg-action-btn:hover{color:var(--accent-text);background:var(--accent-bg);}
/* TTS speaker button: hidden by default, shown when TTS enabled */
.msg-tts-btn{display:none;}
.msg-tts-btn[data-speaking="1"]{color:var(--accent);animation:tts-pulse 1s ease-in-out infinite;}
@keyframes tts-pulse{0%,100%{opacity:1}50%{opacity:.5}}
/* ── Edit message inline ── */
.msg-edit-area{width:100%;background:rgba(255,255,255,.05);border:1px solid var(--accent-bg);border-radius:8px;color:var(--text);padding:10px 12px;font-size:14px;font-family:inherit;line-height:1.6;resize:none;outline:none;min-height:60px;box-sizing:border-box;box-shadow:0 0 0 3px var(--accent-bg);margin-top:4px;}
+114 -1
View File
@@ -2033,6 +2033,118 @@ function copyMsg(btn){
}).catch(()=>showToast(t('copy_failed')));
}
// ── TTS: Text-to-Speech via Web Speech API (#499) ──
// Strips markdown, code blocks, and MEDIA: paths for clean speech output.
function _stripForTTS(text){
// Remove code blocks entirely (```)
text=text.replace(/```[\s\S]*?```/g,' ');
// Remove inline code
text=text.replace(/`[^`]+`/g,' ');
// Strip bold/italic
text=text.replace(/\*\*(.+?)\*\*/g,'$1');
text=text.replace(/\*(.+?)\*/g,'$1');
text=text.replace(/__(.+?)__/g,'$1');
text=text.replace(/_(.+?)_/g,'$1');
// Strip headings
text=text.replace(/^#{1,6}\s+/gm,'');
// Strip links, keep text
text=text.replace(/\[([^\]]+)\]\([^)]+\)/g,'$1');
// Replace MEDIA: paths with a simple label
text=text.replace(/MEDIA:[^\s]+/g,'a file');
// Strip HTML tags that may leak through markdown
text=text.replace(/<[^>]+>/g,' ');
// Collapse whitespace
text=text.replace(/\s+/g,' ').trim();
return text;
}
let _ttsSpeaking=false;
let _ttsCurrentUtterance=null;
function speakMessage(btn){
if(!('speechSynthesis' in window)){
showToast(t('tts_not_supported')||'Speech synthesis not supported in this browser.');
return;
}
// If already speaking this message, stop
if(btn&&btn.dataset.speaking==='1'){
stopTTS();
return;
}
// Stop any current speech
stopTTS();
const row=btn?btn.closest('[data-raw-text]'):null;
const text=row?row.dataset.rawText:'';
if(!text) return;
const clean=_stripForTTS(text);
if(!clean) return;
const utter=new SpeechSynthesisUtterance(clean);
// Apply saved voice preference
const savedVoice=localStorage.getItem('hermes-tts-voice');
const voices=speechSynthesis.getVoices();
if(savedVoice&&voices.length){
const match=voices.find(v=>v.name===savedVoice);
if(match) utter.voice=match;
}
// Apply saved rate/pitch
const savedRate=parseFloat(localStorage.getItem('hermes-tts-rate'));
if(!isNaN(savedRate)) utter.rate= Math.min(2,Math.max(0.5,savedRate));
const savedPitch=parseFloat(localStorage.getItem('hermes-tts-pitch'));
if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
_ttsCurrentUtterance=utter;
_ttsSpeaking=true;
if(btn) btn.dataset.speaking='1';
utter.onend=()=>{ _ttsSpeaking=false; _ttsCurrentUtterance=null; if(btn) btn.dataset.speaking='0'; };
utter.onerror=()=>{ _ttsSpeaking=false; _ttsCurrentUtterance=null; if(btn) btn.dataset.speaking='0'; };
speechSynthesis.speak(utter);
}
function stopTTS(){
if('speechSynthesis' in window){
speechSynthesis.cancel();
}
_ttsSpeaking=false;
_ttsCurrentUtterance=null;
// Reset all speaking buttons
document.querySelectorAll('[data-speaking="1"]').forEach(btn=>{ btn.dataset.speaking='0'; });
}
function autoReadLastAssistant(){
if(!('speechSynthesis' in window)) return;
const pref=localStorage.getItem('hermes-tts-auto-read');
if(pref!=='true') return;
// Find the last assistant message segment in the DOM
const rows=document.querySelectorAll('.msg-row[data-role="assistant"], .assistant-segment[data-raw-text]');
if(!rows.length) return;
const last=rows[rows.length-1];
const text=last.dataset.rawText||'';
if(!text.trim()) return;
const clean=_stripForTTS(text);
if(!clean) return;
const utter=new SpeechSynthesisUtterance(clean);
const savedVoice=localStorage.getItem('hermes-tts-voice');
const voices=speechSynthesis.getVoices();
if(savedVoice&&voices.length){
const match=voices.find(v=>v.name===savedVoice);
if(match) utter.voice=match;
}
const savedRate=parseFloat(localStorage.getItem('hermes-tts-rate'));
if(!isNaN(savedRate)) utter.rate=Math.min(2,Math.max(0.5,savedRate));
const savedPitch=parseFloat(localStorage.getItem('hermes-tts-pitch'));
if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
speechSynthesis.speak(utter);
}
// ── Reconnect banner (B4/B5: reload resilience) ──
const INFLIGHT_KEY = 'hermes-webui-inflight'; // localStorage key for in-flight session tracking
const INFLIGHT_STATE_KEY = 'hermes-webui-inflight-state'; // localStorage snapshots for mid-stream reload recovery
@@ -2864,6 +2976,7 @@ function renderMessages(){
const undoBtn = isLastAssistant ? `<button class="msg-action-btn" title="${t('undo_exchange')}" onclick="undoLastExchange()">${li('undo',13)}</button>` : '';
const retryBtn = isLastAssistant ? `<button class="msg-action-btn" title="${t('regenerate')}" onclick="regenerateResponse(this)">${li('rotate-ccw',13)}</button>` : '';
const copyBtn = `<button class="msg-copy-btn msg-action-btn" title="${t('copy')}" onclick="copyMsg(this)">${li('copy',13)}</button>`;
const ttsBtn = !isUser ? `<button class="msg-action-btn msg-tts-btn" title="${t('tts_listen')||'Listen'}" onclick="speakMessage(this)">${li('volume-2',13)}</button>` : '';
const tsVal=m._ts||m.timestamp;
// _formatInServerTz handles fractional-hour offsets (India +0530 etc.)
// correctly via offset arithmetic; bare toLocaleString is the browser-tz fallback.
@@ -2871,7 +2984,7 @@ function renderMessages(){
const tsTitle=tsVal?(_fmtSv?_fmtSv(new Date(tsVal*1000),{}):new Date(tsVal*1000).toLocaleString()):'';
const tsTime=_formatMessageFooterTimestamp(tsVal);
const timeHtml = tsTime ? `<span class="msg-time" title="${esc(tsTitle)}">${tsTime}</span>` : '';
const footHtml = `<div class="msg-foot">${timeHtml}<span class="msg-actions">${editBtn}${copyBtn}${retryBtn}</span></div>`;
const footHtml = `<div class="msg-foot">${timeHtml}<span class="msg-actions">${editBtn}${ttsBtn}${copyBtn}${retryBtn}</span></div>`;
if(_isContextCompactionMessage(m)){
if(compressionState || referenceNode){
+195
View File
@@ -0,0 +1,195 @@
"""
Tests for #499: TTS playback of agent responses via Web Speech API.
Verifies that TTS utility functions, speaker button rendering, and
settings controls are present in the WebUI codebase.
"""
import os
import re
STATIC_DIR = os.path.join(os.path.dirname(__file__), '..', 'static')
def _read(filename):
return open(os.path.join(STATIC_DIR, filename), encoding='utf-8').read()
class TestTtsUtilityFunctions:
"""TTS core functions exist in ui.js."""
def test_strip_for_tts_exists(self):
src = _read('ui.js')
assert 'function _stripForTTS(' in src, \
"_stripForTTS function not found in ui.js"
def test_speak_message_exists(self):
src = _read('ui.js')
assert 'function speakMessage(' in src, \
"speakMessage function not found in ui.js"
def test_stop_tts_exists(self):
src = _read('ui.js')
assert 'function stopTTS(' in src, \
"stopTTS function not found in ui.js"
def test_auto_read_exists(self):
src = _read('ui.js')
assert 'function autoReadLastAssistant(' in src, \
"autoReadLastAssistant function not found in ui.js"
def test_strip_code_blocks(self):
"""_stripForTTS must remove ``` code blocks."""
src = _read('ui.js')
assert re.search(r'_stripForTTS.*```', src, re.DOTALL), \
"_stripForTTS must handle fenced code blocks"
def test_strip_media_paths(self):
"""_stripForTTS must replace MEDIA: paths."""
src = _read('ui.js')
assert 'MEDIA:' in src and 'a file' in src, \
"_stripForTTS must replace MEDIA: paths"
def test_uses_speech_synthesis(self):
"""speakMessage must use window.speechSynthesis."""
src = _read('ui.js')
assert 'SpeechSynthesisUtterance' in src, \
"speakMessage must create SpeechSynthesisUtterance"
assert 'speechSynthesis.speak' in src, \
"speakMessage must call speechSynthesis.speak"
class TestTtsSpeakerButton:
"""Speaker button is rendered on assistant messages."""
def test_tts_button_rendered(self):
"""ttsBtn must be generated for non-user messages."""
src = _read('ui.js')
assert 'msg-tts-btn' in src, \
"TTS button class not found in ui.js"
def test_tts_button_not_on_user_messages(self):
"""ttsBtn must only be added for non-user (assistant) messages."""
src = _read('ui.js')
# Find the ttsBtn definition — it should have !isUser guard
tts_line = [l for l in src.splitlines() if 'msg-tts-btn' in l][0]
assert '!isUser' in tts_line or 'isUser' in tts_line, \
"TTS button should have user-check guard"
def test_tts_button_in_footer(self):
"""ttsBtn must be included in the msg-actions span."""
src = _read('ui.js')
# The footHtml line should include ttsBtn
foot_lines = [l for l in src.splitlines() if 'footHtml' in l and 'msg-actions' in l]
assert any('ttsBtn' in l for l in foot_lines), \
"ttsBtn not included in footHtml msg-actions"
def test_tts_button_uses_volume_icon(self):
"""Speaker button should use volume-2 icon."""
src = _read('ui.js')
tts_line = [l for l in src.splitlines() if 'msg-tts-btn' in l][0]
assert 'volume-2' in tts_line, \
"TTS button should use volume-2 icon"
class TestTtsSettings:
"""TTS settings controls exist in the HTML and are wired in panels.js."""
def test_tts_enabled_checkbox(self):
src = _read('index.html')
assert 'settingsTtsEnabled' in src, \
"TTS enabled checkbox not found in index.html"
def test_tts_auto_read_checkbox(self):
src = _read('index.html')
assert 'settingsTtsAutoRead' in src, \
"TTS auto-read checkbox not found in index.html"
def test_tts_voice_selector(self):
src = _read('index.html')
assert 'settingsTtsVoice' in src, \
"TTS voice selector not found in index.html"
def test_tts_rate_slider(self):
src = _read('index.html')
assert 'settingsTtsRate' in src, \
"TTS rate slider not found in index.html"
def test_tts_pitch_slider(self):
src = _read('index.html')
assert 'settingsTtsPitch' in src, \
"TTS pitch slider not found in index.html"
def test_tts_settings_wired_in_panels(self):
"""TTS settings must be initialized in loadSettingsPanel."""
src = _read('panels.js')
assert 'settingsTtsEnabled' in src, \
"TTS enabled setting not wired in panels.js"
assert '_applyTtsEnabled' in src, \
"_applyTtsEnabled not called in panels.js"
def test_apply_tts_enabled_function(self):
"""_applyTtsEnabled must toggle msg-tts-btn display."""
src = _read('panels.js')
assert 'function _applyTtsEnabled(' in src, \
"_applyTtsEnabled function not found in panels.js"
class TestTtsI18n:
"""TTS i18n keys exist in the English locale."""
def test_tts_listen_key(self):
src = _read('i18n.js')
assert "tts_listen:" in src, \
"tts_listen key not found in i18n.js"
def test_tts_not_supported_key(self):
src = _read('i18n.js')
assert "tts_not_supported:" in src, \
"tts_not_supported key not found in i18n.js"
def test_tts_settings_keys(self):
src = _read('i18n.js')
for key in ['settings_label_tts', 'settings_label_tts_auto_read',
'settings_label_tts_voice', 'settings_label_tts_rate',
'settings_label_tts_pitch']:
assert f"{key}:" in src, f"{key} not found in i18n.js"
class TestTtsAutoRead:
"""Auto-read is triggered after SSE done event."""
def test_auto_read_called_in_messages(self):
src = _read('messages.js')
assert 'autoReadLastAssistant' in src, \
"autoReadLastAssistant not called in messages.js"
def test_tts_pause_on_composer_focus(self):
"""Speech should pause when user focuses the composer."""
src = _read('messages.js')
assert 'speechSynthesis.pause' in src, \
"speechSynthesis.pause not called in messages.js"
assert 'speechSynthesis.resume' in src, \
"speechSynthesis.resume not called in messages.js"
class TestTtsBoot:
"""TTS enabled state is applied on page load."""
def test_apply_tts_on_boot(self):
src = _read('boot.js')
assert '_applyTtsEnabled' in src, \
"_applyTtsEnabled not called in boot.js"
class TestTtsStyles:
"""TTS CSS styles exist."""
def test_tts_button_hidden_default(self):
src = _read('style.css')
assert '.msg-tts-btn' in src, \
".msg-tts-btn CSS class not found in style.css"
def test_tts_pulse_animation(self):
src = _read('style.css')
assert 'tts-pulse' in src, \
"tts-pulse animation not found in style.css"