mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-26 11:40:26 +00:00
Adds Web Speech API based text-to-speech for agent responses. Per-message speaker button in the action row, auto-read toggle for last assistant response, voice/rate/pitch controls in Settings. All TTS preferences stored in localStorage. Opt-in: TTS hidden by default until enabled in Settings. Strips markdown, code blocks, and MEDIA: paths via _stripForTTS() before speaking. Pauses synthesis when composer is focused. 26 regression tests in tests/test_499_tts_playback.py. i18n keys added for all 7 locales (en, ru, es, de, zh, zh-Hant, pt, ko). Closes #499. Co-authored-by: Felipe Linhares <fecolinhares@users.noreply.github.com>
This commit is contained in:
@@ -851,6 +851,8 @@ function applyBotName(){
|
||||
if(typeof applyLocaleToDOM==='function')applyLocaleToDOM();
|
||||
}
|
||||
applyBotName();
|
||||
// TTS: apply enabled state on boot so buttons show/hide correctly (#499)
|
||||
if(typeof _applyTtsEnabled==='function') _applyTtsEnabled(localStorage.getItem('hermes-tts-enabled')==='true');
|
||||
}catch(e){
|
||||
window._sendKey='enter';
|
||||
window._showTokenUsage=false;
|
||||
@@ -871,6 +873,7 @@ function applyBotName(){
|
||||
if(typeof applyLocaleToDOM==='function')applyLocaleToDOM();
|
||||
}
|
||||
applyBotName();
|
||||
if(typeof _applyTtsEnabled==='function') _applyTtsEnabled(localStorage.getItem('hermes-tts-enabled')==='true');
|
||||
}
|
||||
// Non-blocking update check (fire-and-forget, once per tab session)
|
||||
// ?test_updates=1 in URL forces banner display for testing (bypasses sessionStorage guards)
|
||||
|
||||
+90
-2
@@ -441,6 +441,17 @@ const LOCALES = {
|
||||
// Settings detail
|
||||
settings_label_sound: 'Notification sound',
|
||||
settings_desc_sound: 'Play a sound when the assistant finishes a response.',
|
||||
// TTS (#499)
|
||||
tts_listen: 'Listen',
|
||||
tts_not_supported: 'Speech synthesis not supported in this browser.',
|
||||
settings_label_tts: 'Text-to-Speech for responses',
|
||||
settings_desc_tts: "Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.",
|
||||
settings_label_tts_auto_read: 'Auto-read responses aloud',
|
||||
settings_desc_tts_auto_read: 'Automatically speak each new assistant response when it finishes. Pauses when you start typing.',
|
||||
settings_label_tts_voice: 'Voice',
|
||||
settings_desc_tts_voice: "Preferred voice. Populated from your browser's available voices.",
|
||||
settings_label_tts_rate: 'Speech rate',
|
||||
settings_label_tts_pitch: 'Speech pitch',
|
||||
settings_label_notifications: 'Browser notifications',
|
||||
settings_desc_notifications: 'Show a system notification when a response completes while the app is in the background.',
|
||||
settings_desc_token_usage: 'Displays input/output token count below each assistant reply. Also toggled with /usage.',
|
||||
@@ -1511,6 +1522,17 @@ const LOCALES = {
|
||||
html_error: 'Не удалось загрузить предпросмотр HTML',
|
||||
html_open_full: 'Открыть на всю страницу',
|
||||
html_sandbox_label: 'Предпросмотр HTML',
|
||||
// TTS (#499)
|
||||
tts_listen: 'Прослушать',
|
||||
tts_not_supported: 'Синтез речи не поддерживается',
|
||||
settings_label_tts: 'Синтез речи для ответов',
|
||||
settings_desc_tts: 'Показать кнопку динамика на сообщениях ассистента',
|
||||
settings_label_tts_auto_read: 'Авто-чтение ответов',
|
||||
settings_desc_tts_auto_read: 'Автоматически озвучивать ответы ассистента',
|
||||
settings_label_tts_voice: 'Голос',
|
||||
settings_desc_tts_voice: 'Выберите голос для синтеза речи',
|
||||
settings_label_tts_rate: 'Скорость речи',
|
||||
settings_label_tts_pitch: 'Тон речи',
|
||||
},
|
||||
|
||||
es: {
|
||||
@@ -2231,6 +2253,17 @@ const LOCALES = {
|
||||
html_error: 'Error al cargar la vista previa de HTML',
|
||||
html_open_full: 'Abrir página completa',
|
||||
html_sandbox_label: 'Vista previa de HTML',
|
||||
// TTS (#499)
|
||||
tts_listen: 'Escuchar',
|
||||
tts_not_supported: 'Síntesis de voz no disponible',
|
||||
settings_label_tts: 'Texto a voz para respuestas',
|
||||
settings_desc_tts: 'Mostrar botón de altavoz en mensajes del asistente',
|
||||
settings_label_tts_auto_read: 'Leer respuestas automáticamente',
|
||||
settings_desc_tts_auto_read: 'Leer en voz alta las respuestas del asistente automáticamente',
|
||||
settings_label_tts_voice: 'Voz',
|
||||
settings_desc_tts_voice: 'Seleccionar voz para síntesis de voz',
|
||||
settings_label_tts_rate: 'Velocidad de voz',
|
||||
settings_label_tts_pitch: 'Tono de voz',
|
||||
},
|
||||
|
||||
de: {
|
||||
@@ -2955,7 +2988,18 @@ const LOCALES = {
|
||||
html_error: 'HTML-Vorschau konnte nicht geladen werden',
|
||||
html_open_full: 'Vollständige Seite öffnen',
|
||||
html_sandbox_label: 'HTML-Vorschau',
|
||||
},
|
||||
// TTS (#499)
|
||||
tts_listen: 'Anhören',
|
||||
tts_not_supported: 'Sprachsynthese nicht verfügbar',
|
||||
settings_label_tts: 'Text-zu-Sprache für Antworten',
|
||||
settings_desc_tts: 'Lautsprecher-Symbol auf Assistenten-Nachrichten anzeigen',
|
||||
settings_label_tts_auto_read: 'Antworten automatisch vorlesen',
|
||||
settings_desc_tts_auto_read: 'Assistenten-Antworten automatisch vorlesen',
|
||||
settings_label_tts_voice: 'Stimme',
|
||||
settings_desc_tts_voice: 'Stimme für Sprachsynthese auswählen',
|
||||
settings_label_tts_rate: 'Sprechgeschwindigkeit',
|
||||
settings_label_tts_pitch: 'Tonhöhe',
|
||||
},
|
||||
|
||||
zh: {
|
||||
_lang: 'zh',
|
||||
@@ -3673,6 +3717,17 @@ const LOCALES = {
|
||||
excalidraw_empty: '空图表',
|
||||
excalidraw_render_error: '渲染图表失败',
|
||||
excalidraw_simplified: '简化 SVG 预览 — 与 Excalidraw 画布不完全相同',
|
||||
// TTS (#499)
|
||||
tts_listen: '收听',
|
||||
tts_not_supported: '语音合成不可用',
|
||||
settings_label_tts: '回复语音合成',
|
||||
settings_desc_tts: '在助手消息上显示扬声器按钮',
|
||||
settings_label_tts_auto_read: '自动朗读回复',
|
||||
settings_desc_tts_auto_read: '自动朗读助手回复',
|
||||
settings_label_tts_voice: '语音',
|
||||
settings_desc_tts_voice: '选择语音合成声音',
|
||||
settings_label_tts_rate: '语速',
|
||||
settings_label_tts_pitch: '音调',
|
||||
},
|
||||
|
||||
// Traditional Chinese (zh-Hant)
|
||||
@@ -4499,6 +4554,17 @@ const LOCALES = {
|
||||
html_error: 'HTML 預覽載入失敗',
|
||||
html_open_full: '開啟完整頁面',
|
||||
html_sandbox_label: 'HTML 預覽',
|
||||
// TTS (#499)
|
||||
tts_listen: '收聽',
|
||||
tts_not_supported: '語音合成無法使用',
|
||||
settings_label_tts: '回覆語音合成',
|
||||
settings_desc_tts: '在助手訊息上顯示喇叭按鈕',
|
||||
settings_label_tts_auto_read: '自動朗讀回覆',
|
||||
settings_desc_tts_auto_read: '自動朗讀助手回覆',
|
||||
settings_label_tts_voice: '語音',
|
||||
settings_desc_tts_voice: '選擇語音合成聲音',
|
||||
settings_label_tts_rate: '語速',
|
||||
settings_label_tts_pitch: '音調',
|
||||
},
|
||||
|
||||
|
||||
@@ -5138,7 +5204,18 @@ const LOCALES = {
|
||||
approval_skip: 'Pular',
|
||||
approval_skip_title: 'Pular este prompt de aprovação',
|
||||
approval_skip_all: 'Pular todos',
|
||||
approval_skip_all_title: 'Pular todos prompts de aprovação nesta sessão'
|
||||
approval_skip_all_title: 'Pular todos prompts de aprovação nesta sessão',
|
||||
// TTS (#499)
|
||||
tts_listen: 'Ouvir',
|
||||
tts_not_supported: 'Síntese de voz não disponível',
|
||||
settings_label_tts: 'Texto para voz nas respostas',
|
||||
settings_desc_tts: 'Mostrar botão de alto-falante nas mensagens do assistente',
|
||||
settings_label_tts_auto_read: 'Ler respostas automaticamente',
|
||||
settings_desc_tts_auto_read: 'Ler automaticamente as respostas do assistente',
|
||||
settings_label_tts_voice: 'Voz',
|
||||
settings_desc_tts_voice: 'Selecionar voz para síntese de voz',
|
||||
settings_label_tts_rate: 'Velocidade da fala',
|
||||
settings_label_tts_pitch: 'Tom da fala',
|
||||
},
|
||||
ko: {
|
||||
_lang: 'ko',
|
||||
@@ -5911,6 +5988,17 @@ const LOCALES = {
|
||||
excalidraw_empty: '빈 다이어그램',
|
||||
excalidraw_render_error: '다이어그램 렌더링 실패',
|
||||
excalidraw_simplified: '단순화된 SVG 미리보기 — Excalidraw 캔버스와 픽셀 동일하지 않음',
|
||||
// TTS (#499)
|
||||
tts_listen: '듣기',
|
||||
tts_not_supported: '음성 합성을 사용할 수 없습니다',
|
||||
settings_label_tts: '답변 음성 합성',
|
||||
settings_desc_tts: '도움말 메시지에 스피커 버튼 표시',
|
||||
settings_label_tts_auto_read: '답변 자동 읽기',
|
||||
settings_desc_tts_auto_read: '도움말 답변을 자동으로 읽어줌',
|
||||
settings_label_tts_voice: '음성',
|
||||
settings_desc_tts_voice: '음성 합성 음성 선택',
|
||||
settings_label_tts_rate: '말 속도',
|
||||
settings_label_tts_pitch: '말 톤',
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -683,6 +683,41 @@
|
||||
</label>
|
||||
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_sound">Play a sound when the assistant finishes a response.</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
|
||||
<input type="checkbox" id="settingsTtsEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
|
||||
<span data-i18n="settings_label_tts">Text-to-Speech for responses</span>
|
||||
</label>
|
||||
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts">Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
|
||||
<input type="checkbox" id="settingsTtsAutoRead" style="width:15px;height:15px;accent-color:var(--accent)">
|
||||
<span data-i18n="settings_label_tts_auto_read">Auto-read responses aloud</span>
|
||||
</label>
|
||||
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_auto_read">Automatically speak each new assistant response when it finishes. Pauses when you start typing.</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label for="settingsTtsVoice" data-i18n="settings_label_tts_voice">Voice</label>
|
||||
<select id="settingsTtsVoice" style="width:100%;padding:8px;background:var(--code-bg);color:var(--text);border:1px solid var(--border2);border-radius:6px">
|
||||
<option value="">Default system voice</option>
|
||||
</select>
|
||||
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_voice">Preferred voice. Populated from your browser's available voices.</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label for="settingsTtsRate" data-i18n="settings_label_tts_rate">Speech rate</label>
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-top:4px">
|
||||
<input type="range" id="settingsTtsRate" min="0.5" max="2" step="0.1" value="1" style="flex:1;accent-color:var(--accent)">
|
||||
<span id="settingsTtsRateValue" style="font-size:12px;color:var(--muted);min-width:32px;text-align:right">1.0x</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label for="settingsTtsPitch" data-i18n="settings_label_tts_pitch">Speech pitch</label>
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-top:4px">
|
||||
<input type="range" id="settingsTtsPitch" min="0" max="2" step="0.1" value="1" style="flex:1;accent-color:var(--accent)">
|
||||
<span id="settingsTtsPitchValue" style="font-size:12px;color:var(--muted);min-width:32px;text-align:right">1.0</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
|
||||
<input type="checkbox" id="settingsNotificationsEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
|
||||
|
||||
@@ -34,6 +34,10 @@ function _markActiveSessionViewedOnReturn() {
|
||||
|
||||
document.addEventListener('visibilitychange', _markActiveSessionViewedOnReturn);
|
||||
window.addEventListener('focus', _markActiveSessionViewedOnReturn);
|
||||
// TTS: pause speech synthesis when user focuses the composer (#499)
|
||||
const _msgEl=document.getElementById('msg');
|
||||
if(_msgEl) _msgEl.addEventListener('focus', ()=>{ if('speechSynthesis' in window && speechSynthesis.speaking) speechSynthesis.pause(); });
|
||||
if(_msgEl) _msgEl.addEventListener('blur', ()=>{ if('speechSynthesis' in window && speechSynthesis.paused) speechSynthesis.resume(); });
|
||||
|
||||
async function send(){
|
||||
const text=$('msg').value.trim();
|
||||
@@ -846,6 +850,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
|
||||
if(!S.messages.some(m=>m.role==='assistant'&&String(m.content||'').trim())&&!assistantText){removeThinking();S.messages.push({role:'assistant',content:'**No response received.** Check your API key and model selection.'});}
|
||||
if(isSessionViewed) _markSessionViewed(completedSid, completedSession.message_count ?? S.messages.length);
|
||||
syncTopbar();renderMessages();loadDir('.');
|
||||
// TTS auto-read: speak the last assistant response if enabled (#499)
|
||||
if(typeof autoReadLastAssistant==='function') setTimeout(()=>autoReadLastAssistant(), 300);
|
||||
}
|
||||
_queueDrainSid=activeSid;renderSessionList();setBusy(false);setStatus('');
|
||||
setComposerStatus('');
|
||||
|
||||
@@ -2603,6 +2603,13 @@ function _markSettingsDirty(){
|
||||
_settingsDirty = true;
|
||||
}
|
||||
|
||||
// Apply TTS enabled state: show/hide TTS buttons on all assistant messages
|
||||
function _applyTtsEnabled(enabled){
|
||||
document.querySelectorAll('.msg-tts-btn').forEach(btn=>{
|
||||
btn.style.display=enabled?'':'none';
|
||||
});
|
||||
}
|
||||
|
||||
function _appearancePayloadFromUi(){
|
||||
return {
|
||||
theme: ($('settingsTheme')||{}).value || localStorage.getItem('hermes-theme') || 'dark',
|
||||
@@ -2781,6 +2788,46 @@ async function loadSettingsPanel(){
|
||||
if(updateCb){updateCb.checked=settings.check_for_updates!==false;updateCb.addEventListener('change',_markSettingsDirty,{once:false});}
|
||||
const soundCb=$('settingsSoundEnabled');
|
||||
if(soundCb){soundCb.checked=!!settings.sound_enabled;soundCb.addEventListener('change',_markSettingsDirty,{once:false});}
|
||||
// TTS settings (localStorage-only, no server round-trip needed)
|
||||
const ttsEnabledCb=$('settingsTtsEnabled');
|
||||
if(ttsEnabledCb){ttsEnabledCb.checked=localStorage.getItem('hermes-tts-enabled')==='true';ttsEnabledCb.onchange=function(){localStorage.setItem('hermes-tts-enabled',this.checked?'true':'false');_applyTtsEnabled(this.checked);};}
|
||||
const ttsAutoReadCb=$('settingsTtsAutoRead');
|
||||
if(ttsAutoReadCb){ttsAutoReadCb.checked=localStorage.getItem('hermes-tts-auto-read')==='true';ttsAutoReadCb.onchange=function(){localStorage.setItem('hermes-tts-auto-read',this.checked?'true':'false');};}
|
||||
// Populate voice selector from speechSynthesis
|
||||
const ttsVoiceSel=$('settingsTtsVoice');
|
||||
if(ttsVoiceSel&&'speechSynthesis' in window){
|
||||
const populateVoices=()=>{
|
||||
const voices=speechSynthesis.getVoices();
|
||||
const current=localStorage.getItem('hermes-tts-voice')||'';
|
||||
ttsVoiceSel.innerHTML='<option value="">Default system voice</option>';
|
||||
voices.forEach(v=>{
|
||||
const opt=document.createElement('option');
|
||||
opt.value=v.name;opt.textContent=v.name+(v.lang?' ('+v.lang+')':'');
|
||||
if(v.name===current) opt.selected=true;
|
||||
ttsVoiceSel.appendChild(opt);
|
||||
});
|
||||
};
|
||||
populateVoices();
|
||||
speechSynthesis.addEventListener('voiceschanged',populateVoices,{once:true});
|
||||
ttsVoiceSel.onchange=function(){localStorage.setItem('hermes-tts-voice',this.value);};
|
||||
}
|
||||
// TTS rate/pitch sliders
|
||||
const ttsRateSlider=$('settingsTtsRate');
|
||||
const ttsRateValue=$('settingsTtsRateValue');
|
||||
if(ttsRateSlider){
|
||||
const savedRate=localStorage.getItem('hermes-tts-rate');
|
||||
ttsRateSlider.value=savedRate||'1';
|
||||
if(ttsRateValue) ttsRateValue.textContent=parseFloat(ttsRateSlider.value).toFixed(1)+'x';
|
||||
ttsRateSlider.oninput=function(){if(ttsRateValue)ttsRateValue.textContent=parseFloat(this.value).toFixed(1)+'x';localStorage.setItem('hermes-tts-rate',this.value);};
|
||||
}
|
||||
const ttsPitchSlider=$('settingsTtsPitch');
|
||||
const ttsPitchValue=$('settingsTtsPitchValue');
|
||||
if(ttsPitchSlider){
|
||||
const savedPitch=localStorage.getItem('hermes-tts-pitch');
|
||||
ttsPitchSlider.value=savedPitch||'1';
|
||||
if(ttsPitchValue) ttsPitchValue.textContent=parseFloat(ttsPitchSlider.value).toFixed(1);
|
||||
ttsPitchSlider.oninput=function(){if(ttsPitchValue)ttsPitchValue.textContent=parseFloat(this.value).toFixed(1);localStorage.setItem('hermes-tts-pitch',this.value);};
|
||||
}
|
||||
const notifCb=$('settingsNotificationsEnabled');
|
||||
if(notifCb){notifCb.checked=!!settings.notifications_enabled;notifCb.addEventListener('change',_markSettingsDirty,{once:false});}
|
||||
// show_thinking has no settings panel checkbox — controlled via /reasoning show|hide
|
||||
|
||||
@@ -1287,6 +1287,10 @@
|
||||
.msg-row:hover .msg-actions{opacity:1;}
|
||||
.msg-action-btn{background:none;border:none;color:var(--muted);cursor:pointer;font-size:13px;padding:2px 5px;border-radius:5px;transition:color .12s,background .12s;line-height:1;}
|
||||
.msg-action-btn:hover{color:var(--accent-text);background:var(--accent-bg);}
|
||||
/* TTS speaker button: hidden by default, shown when TTS enabled */
|
||||
.msg-tts-btn{display:none;}
|
||||
.msg-tts-btn[data-speaking="1"]{color:var(--accent);animation:tts-pulse 1s ease-in-out infinite;}
|
||||
@keyframes tts-pulse{0%,100%{opacity:1}50%{opacity:.5}}
|
||||
|
||||
/* ── Edit message inline ── */
|
||||
.msg-edit-area{width:100%;background:rgba(255,255,255,.05);border:1px solid var(--accent-bg);border-radius:8px;color:var(--text);padding:10px 12px;font-size:14px;font-family:inherit;line-height:1.6;resize:none;outline:none;min-height:60px;box-sizing:border-box;box-shadow:0 0 0 3px var(--accent-bg);margin-top:4px;}
|
||||
|
||||
+114
-1
@@ -2033,6 +2033,118 @@ function copyMsg(btn){
|
||||
}).catch(()=>showToast(t('copy_failed')));
|
||||
}
|
||||
|
||||
// ── TTS: Text-to-Speech via Web Speech API (#499) ──
|
||||
// Strips markdown, code blocks, and MEDIA: paths for clean speech output.
|
||||
function _stripForTTS(text){
|
||||
// Remove code blocks entirely (```)
|
||||
text=text.replace(/```[\s\S]*?```/g,' ');
|
||||
// Remove inline code
|
||||
text=text.replace(/`[^`]+`/g,' ');
|
||||
// Strip bold/italic
|
||||
text=text.replace(/\*\*(.+?)\*\*/g,'$1');
|
||||
text=text.replace(/\*(.+?)\*/g,'$1');
|
||||
text=text.replace(/__(.+?)__/g,'$1');
|
||||
text=text.replace(/_(.+?)_/g,'$1');
|
||||
// Strip headings
|
||||
text=text.replace(/^#{1,6}\s+/gm,'');
|
||||
// Strip links, keep text
|
||||
text=text.replace(/\[([^\]]+)\]\([^)]+\)/g,'$1');
|
||||
// Replace MEDIA: paths with a simple label
|
||||
text=text.replace(/MEDIA:[^\s]+/g,'a file');
|
||||
// Strip HTML tags that may leak through markdown
|
||||
text=text.replace(/<[^>]+>/g,' ');
|
||||
// Collapse whitespace
|
||||
text=text.replace(/\s+/g,' ').trim();
|
||||
return text;
|
||||
}
|
||||
|
||||
let _ttsSpeaking=false;
|
||||
let _ttsCurrentUtterance=null;
|
||||
|
||||
function speakMessage(btn){
|
||||
if(!('speechSynthesis' in window)){
|
||||
showToast(t('tts_not_supported')||'Speech synthesis not supported in this browser.');
|
||||
return;
|
||||
}
|
||||
// If already speaking this message, stop
|
||||
if(btn&&btn.dataset.speaking==='1'){
|
||||
stopTTS();
|
||||
return;
|
||||
}
|
||||
// Stop any current speech
|
||||
stopTTS();
|
||||
|
||||
const row=btn?btn.closest('[data-raw-text]'):null;
|
||||
const text=row?row.dataset.rawText:'';
|
||||
if(!text) return;
|
||||
|
||||
const clean=_stripForTTS(text);
|
||||
if(!clean) return;
|
||||
|
||||
const utter=new SpeechSynthesisUtterance(clean);
|
||||
|
||||
// Apply saved voice preference
|
||||
const savedVoice=localStorage.getItem('hermes-tts-voice');
|
||||
const voices=speechSynthesis.getVoices();
|
||||
if(savedVoice&&voices.length){
|
||||
const match=voices.find(v=>v.name===savedVoice);
|
||||
if(match) utter.voice=match;
|
||||
}
|
||||
|
||||
// Apply saved rate/pitch
|
||||
const savedRate=parseFloat(localStorage.getItem('hermes-tts-rate'));
|
||||
if(!isNaN(savedRate)) utter.rate= Math.min(2,Math.max(0.5,savedRate));
|
||||
const savedPitch=parseFloat(localStorage.getItem('hermes-tts-pitch'));
|
||||
if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
|
||||
|
||||
_ttsCurrentUtterance=utter;
|
||||
_ttsSpeaking=true;
|
||||
if(btn) btn.dataset.speaking='1';
|
||||
|
||||
utter.onend=()=>{ _ttsSpeaking=false; _ttsCurrentUtterance=null; if(btn) btn.dataset.speaking='0'; };
|
||||
utter.onerror=()=>{ _ttsSpeaking=false; _ttsCurrentUtterance=null; if(btn) btn.dataset.speaking='0'; };
|
||||
|
||||
speechSynthesis.speak(utter);
|
||||
}
|
||||
|
||||
function stopTTS(){
|
||||
if('speechSynthesis' in window){
|
||||
speechSynthesis.cancel();
|
||||
}
|
||||
_ttsSpeaking=false;
|
||||
_ttsCurrentUtterance=null;
|
||||
// Reset all speaking buttons
|
||||
document.querySelectorAll('[data-speaking="1"]').forEach(btn=>{ btn.dataset.speaking='0'; });
|
||||
}
|
||||
|
||||
function autoReadLastAssistant(){
|
||||
if(!('speechSynthesis' in window)) return;
|
||||
const pref=localStorage.getItem('hermes-tts-auto-read');
|
||||
if(pref!=='true') return;
|
||||
// Find the last assistant message segment in the DOM
|
||||
const rows=document.querySelectorAll('.msg-row[data-role="assistant"], .assistant-segment[data-raw-text]');
|
||||
if(!rows.length) return;
|
||||
const last=rows[rows.length-1];
|
||||
const text=last.dataset.rawText||'';
|
||||
if(!text.trim()) return;
|
||||
const clean=_stripForTTS(text);
|
||||
if(!clean) return;
|
||||
|
||||
const utter=new SpeechSynthesisUtterance(clean);
|
||||
const savedVoice=localStorage.getItem('hermes-tts-voice');
|
||||
const voices=speechSynthesis.getVoices();
|
||||
if(savedVoice&&voices.length){
|
||||
const match=voices.find(v=>v.name===savedVoice);
|
||||
if(match) utter.voice=match;
|
||||
}
|
||||
const savedRate=parseFloat(localStorage.getItem('hermes-tts-rate'));
|
||||
if(!isNaN(savedRate)) utter.rate=Math.min(2,Math.max(0.5,savedRate));
|
||||
const savedPitch=parseFloat(localStorage.getItem('hermes-tts-pitch'));
|
||||
if(!isNaN(savedPitch)) utter.pitch=Math.min(2,Math.max(0,savedPitch));
|
||||
|
||||
speechSynthesis.speak(utter);
|
||||
}
|
||||
|
||||
// ── Reconnect banner (B4/B5: reload resilience) ──
|
||||
const INFLIGHT_KEY = 'hermes-webui-inflight'; // localStorage key for in-flight session tracking
|
||||
const INFLIGHT_STATE_KEY = 'hermes-webui-inflight-state'; // localStorage snapshots for mid-stream reload recovery
|
||||
@@ -2864,6 +2976,7 @@ function renderMessages(){
|
||||
const undoBtn = isLastAssistant ? `<button class="msg-action-btn" title="${t('undo_exchange')}" onclick="undoLastExchange()">${li('undo',13)}</button>` : '';
|
||||
const retryBtn = isLastAssistant ? `<button class="msg-action-btn" title="${t('regenerate')}" onclick="regenerateResponse(this)">${li('rotate-ccw',13)}</button>` : '';
|
||||
const copyBtn = `<button class="msg-copy-btn msg-action-btn" title="${t('copy')}" onclick="copyMsg(this)">${li('copy',13)}</button>`;
|
||||
const ttsBtn = !isUser ? `<button class="msg-action-btn msg-tts-btn" title="${t('tts_listen')||'Listen'}" onclick="speakMessage(this)">${li('volume-2',13)}</button>` : '';
|
||||
const tsVal=m._ts||m.timestamp;
|
||||
// _formatInServerTz handles fractional-hour offsets (India +0530 etc.)
|
||||
// correctly via offset arithmetic; bare toLocaleString is the browser-tz fallback.
|
||||
@@ -2871,7 +2984,7 @@ function renderMessages(){
|
||||
const tsTitle=tsVal?(_fmtSv?_fmtSv(new Date(tsVal*1000),{}):new Date(tsVal*1000).toLocaleString()):'';
|
||||
const tsTime=_formatMessageFooterTimestamp(tsVal);
|
||||
const timeHtml = tsTime ? `<span class="msg-time" title="${esc(tsTitle)}">${tsTime}</span>` : '';
|
||||
const footHtml = `<div class="msg-foot">${timeHtml}<span class="msg-actions">${editBtn}${copyBtn}${retryBtn}</span></div>`;
|
||||
const footHtml = `<div class="msg-foot">${timeHtml}<span class="msg-actions">${editBtn}${ttsBtn}${copyBtn}${retryBtn}</span></div>`;
|
||||
|
||||
if(_isContextCompactionMessage(m)){
|
||||
if(compressionState || referenceNode){
|
||||
|
||||
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
Tests for #499: TTS playback of agent responses via Web Speech API.
|
||||
|
||||
Verifies that TTS utility functions, speaker button rendering, and
|
||||
settings controls are present in the WebUI codebase.
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
|
||||
STATIC_DIR = os.path.join(os.path.dirname(__file__), '..', 'static')
|
||||
|
||||
|
||||
def _read(filename):
|
||||
return open(os.path.join(STATIC_DIR, filename), encoding='utf-8').read()
|
||||
|
||||
|
||||
class TestTtsUtilityFunctions:
|
||||
"""TTS core functions exist in ui.js."""
|
||||
|
||||
def test_strip_for_tts_exists(self):
|
||||
src = _read('ui.js')
|
||||
assert 'function _stripForTTS(' in src, \
|
||||
"_stripForTTS function not found in ui.js"
|
||||
|
||||
def test_speak_message_exists(self):
|
||||
src = _read('ui.js')
|
||||
assert 'function speakMessage(' in src, \
|
||||
"speakMessage function not found in ui.js"
|
||||
|
||||
def test_stop_tts_exists(self):
|
||||
src = _read('ui.js')
|
||||
assert 'function stopTTS(' in src, \
|
||||
"stopTTS function not found in ui.js"
|
||||
|
||||
def test_auto_read_exists(self):
|
||||
src = _read('ui.js')
|
||||
assert 'function autoReadLastAssistant(' in src, \
|
||||
"autoReadLastAssistant function not found in ui.js"
|
||||
|
||||
def test_strip_code_blocks(self):
|
||||
"""_stripForTTS must remove ``` code blocks."""
|
||||
src = _read('ui.js')
|
||||
assert re.search(r'_stripForTTS.*```', src, re.DOTALL), \
|
||||
"_stripForTTS must handle fenced code blocks"
|
||||
|
||||
def test_strip_media_paths(self):
|
||||
"""_stripForTTS must replace MEDIA: paths."""
|
||||
src = _read('ui.js')
|
||||
assert 'MEDIA:' in src and 'a file' in src, \
|
||||
"_stripForTTS must replace MEDIA: paths"
|
||||
|
||||
def test_uses_speech_synthesis(self):
|
||||
"""speakMessage must use window.speechSynthesis."""
|
||||
src = _read('ui.js')
|
||||
assert 'SpeechSynthesisUtterance' in src, \
|
||||
"speakMessage must create SpeechSynthesisUtterance"
|
||||
assert 'speechSynthesis.speak' in src, \
|
||||
"speakMessage must call speechSynthesis.speak"
|
||||
|
||||
|
||||
class TestTtsSpeakerButton:
|
||||
"""Speaker button is rendered on assistant messages."""
|
||||
|
||||
def test_tts_button_rendered(self):
|
||||
"""ttsBtn must be generated for non-user messages."""
|
||||
src = _read('ui.js')
|
||||
assert 'msg-tts-btn' in src, \
|
||||
"TTS button class not found in ui.js"
|
||||
|
||||
def test_tts_button_not_on_user_messages(self):
|
||||
"""ttsBtn must only be added for non-user (assistant) messages."""
|
||||
src = _read('ui.js')
|
||||
# Find the ttsBtn definition — it should have !isUser guard
|
||||
tts_line = [l for l in src.splitlines() if 'msg-tts-btn' in l][0]
|
||||
assert '!isUser' in tts_line or 'isUser' in tts_line, \
|
||||
"TTS button should have user-check guard"
|
||||
|
||||
def test_tts_button_in_footer(self):
|
||||
"""ttsBtn must be included in the msg-actions span."""
|
||||
src = _read('ui.js')
|
||||
# The footHtml line should include ttsBtn
|
||||
foot_lines = [l for l in src.splitlines() if 'footHtml' in l and 'msg-actions' in l]
|
||||
assert any('ttsBtn' in l for l in foot_lines), \
|
||||
"ttsBtn not included in footHtml msg-actions"
|
||||
|
||||
def test_tts_button_uses_volume_icon(self):
|
||||
"""Speaker button should use volume-2 icon."""
|
||||
src = _read('ui.js')
|
||||
tts_line = [l for l in src.splitlines() if 'msg-tts-btn' in l][0]
|
||||
assert 'volume-2' in tts_line, \
|
||||
"TTS button should use volume-2 icon"
|
||||
|
||||
|
||||
class TestTtsSettings:
|
||||
"""TTS settings controls exist in the HTML and are wired in panels.js."""
|
||||
|
||||
def test_tts_enabled_checkbox(self):
|
||||
src = _read('index.html')
|
||||
assert 'settingsTtsEnabled' in src, \
|
||||
"TTS enabled checkbox not found in index.html"
|
||||
|
||||
def test_tts_auto_read_checkbox(self):
|
||||
src = _read('index.html')
|
||||
assert 'settingsTtsAutoRead' in src, \
|
||||
"TTS auto-read checkbox not found in index.html"
|
||||
|
||||
def test_tts_voice_selector(self):
|
||||
src = _read('index.html')
|
||||
assert 'settingsTtsVoice' in src, \
|
||||
"TTS voice selector not found in index.html"
|
||||
|
||||
def test_tts_rate_slider(self):
|
||||
src = _read('index.html')
|
||||
assert 'settingsTtsRate' in src, \
|
||||
"TTS rate slider not found in index.html"
|
||||
|
||||
def test_tts_pitch_slider(self):
|
||||
src = _read('index.html')
|
||||
assert 'settingsTtsPitch' in src, \
|
||||
"TTS pitch slider not found in index.html"
|
||||
|
||||
def test_tts_settings_wired_in_panels(self):
|
||||
"""TTS settings must be initialized in loadSettingsPanel."""
|
||||
src = _read('panels.js')
|
||||
assert 'settingsTtsEnabled' in src, \
|
||||
"TTS enabled setting not wired in panels.js"
|
||||
assert '_applyTtsEnabled' in src, \
|
||||
"_applyTtsEnabled not called in panels.js"
|
||||
|
||||
def test_apply_tts_enabled_function(self):
|
||||
"""_applyTtsEnabled must toggle msg-tts-btn display."""
|
||||
src = _read('panels.js')
|
||||
assert 'function _applyTtsEnabled(' in src, \
|
||||
"_applyTtsEnabled function not found in panels.js"
|
||||
|
||||
|
||||
class TestTtsI18n:
|
||||
"""TTS i18n keys exist in the English locale."""
|
||||
|
||||
def test_tts_listen_key(self):
|
||||
src = _read('i18n.js')
|
||||
assert "tts_listen:" in src, \
|
||||
"tts_listen key not found in i18n.js"
|
||||
|
||||
def test_tts_not_supported_key(self):
|
||||
src = _read('i18n.js')
|
||||
assert "tts_not_supported:" in src, \
|
||||
"tts_not_supported key not found in i18n.js"
|
||||
|
||||
def test_tts_settings_keys(self):
|
||||
src = _read('i18n.js')
|
||||
for key in ['settings_label_tts', 'settings_label_tts_auto_read',
|
||||
'settings_label_tts_voice', 'settings_label_tts_rate',
|
||||
'settings_label_tts_pitch']:
|
||||
assert f"{key}:" in src, f"{key} not found in i18n.js"
|
||||
|
||||
|
||||
class TestTtsAutoRead:
|
||||
"""Auto-read is triggered after SSE done event."""
|
||||
|
||||
def test_auto_read_called_in_messages(self):
|
||||
src = _read('messages.js')
|
||||
assert 'autoReadLastAssistant' in src, \
|
||||
"autoReadLastAssistant not called in messages.js"
|
||||
|
||||
def test_tts_pause_on_composer_focus(self):
|
||||
"""Speech should pause when user focuses the composer."""
|
||||
src = _read('messages.js')
|
||||
assert 'speechSynthesis.pause' in src, \
|
||||
"speechSynthesis.pause not called in messages.js"
|
||||
assert 'speechSynthesis.resume' in src, \
|
||||
"speechSynthesis.resume not called in messages.js"
|
||||
|
||||
|
||||
class TestTtsBoot:
|
||||
"""TTS enabled state is applied on page load."""
|
||||
|
||||
def test_apply_tts_on_boot(self):
|
||||
src = _read('boot.js')
|
||||
assert '_applyTtsEnabled' in src, \
|
||||
"_applyTtsEnabled not called in boot.js"
|
||||
|
||||
|
||||
class TestTtsStyles:
|
||||
"""TTS CSS styles exist."""
|
||||
|
||||
def test_tts_button_hidden_default(self):
|
||||
src = _read('style.css')
|
||||
assert '.msg-tts-btn' in src, \
|
||||
".msg-tts-btn CSS class not found in style.css"
|
||||
|
||||
def test_tts_pulse_animation(self):
|
||||
src = _read('style.css')
|
||||
assert 'tts-pulse' in src, \
|
||||
"tts-pulse animation not found in style.css"
|
||||
Reference in New Issue
Block a user