mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 11:10:18 +00:00
Merge pull request #1490 from nesquena/stage-271
v0.50.271 — Composer voice buttons UX (#1488)
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
# Hermes Web UI -- Changelog
|
||||
|
||||
## [v0.50.271] — 2026-05-02
|
||||
|
||||
### Changed (1 self-built PR)
|
||||
|
||||
- **Composer voice buttons: distinct icon, distinct labels, opt-in voice mode** (#1488, self-built, closes #1488) — the composer footer rendered two near-identical mic icons whose tooltips both said "Voice input": one was push-to-talk dictation (older feature), the other was turn-based hands-free voice mode (newer). After researching how ChatGPT, Claude, and Gemini handle the same problem, this PR adopts the industry convention: **mic = dictation, audio-waveform = voice mode**. (1) Voice-mode button now uses Lucide's `audio-lines` glyph (six vertical bars of varying height — the universal "two-way voice conversation" icon, also registered in `LI_PATHS` for reuse). (2) Distinct, localized tooltips: `voice_dictate: 'Dictate'` (with `voice_dictate_active: 'Stop dictation'` flip-state) and `voice_mode_toggle: 'Voice mode'` (with `voice_mode_toggle_active: 'Exit voice mode'` flip-state). The legacy `voice_toggle` key (which resolved to "Voice input" in every locale and caused the duplicate-tooltip bug) is removed. (3) Voice mode is now **opt-in** via Settings → Preferences → "Hands-free voice mode button" — default off keeps the composer uncluttered for the broad-majority case (plain dictation only). The dictation mic stays visible by default, unchanged. Toggle is `localStorage`-backed (`hermes-voice-mode-button`), and `panels.js`'s onchange handler calls `window._applyVoiceModePref()` so the audio-waveform button appears/disappears immediately with no reload. 17 new regression tests in `tests/test_issue1488_composer_voice_buttons.py` pin: distinct static + i18n titles, audio-lines glyph shape (≥5 vertical-bar paths, no leftover mic-with-sparkles rect), all 4 new keys in all 9 locales, removal of stale `voice_toggle`, English labels match ChatGPT/Gemini convention, pref gating (no unconditional `display=''` left in boot.js), Settings checkbox + i18n, panels.js wiring, and active-state tooltip flips. Browser-verified end-to-end on port 8789 (default 1 mic / pref-on 2 distinct icons / live re-apply via Settings). (`static/index.html`, `static/icons.js`, `static/i18n.js`, `static/boot.js`, `static/panels.js`, `tests/test_issue1488_composer_voice_buttons.py`)
|
||||
|
||||
## [v0.50.270] — 2026-05-02
|
||||
|
||||
### Fixed (1 contributor PR)
|
||||
|
||||
+1
-1
@@ -3,7 +3,7 @@
|
||||
> Goal: Full 1:1 parity with the Hermes CLI experience via a clean dark web UI.
|
||||
> Everything you can do from the CLI terminal, you can do from this UI.
|
||||
>
|
||||
> Last updated: v0.50.270 (May 02, 2026) — 3849 tests collected
|
||||
> Last updated: v0.50.271 (May 02, 2026) — 3866 tests collected
|
||||
> Tests: `pytest tests/ --collect-only -q`
|
||||
> Source: <repo>/
|
||||
|
||||
|
||||
+2
-2
@@ -1835,8 +1835,8 @@ Bridged CLI sessions:
|
||||
|
||||
---
|
||||
|
||||
*Last updated: v0.50.270, May 02, 2026*
|
||||
*Total automated tests collected: 3849*
|
||||
*Last updated: v0.50.271, May 02, 2026*
|
||||
*Total automated tests collected: 3866*
|
||||
*Regression gate: tests/test_regressions.py*
|
||||
*Run: pytest tests/ -v --timeout=60*
|
||||
*Source: <repo>/*
|
||||
|
||||
+20
-4
@@ -236,6 +236,9 @@ $('btnAttach').onclick=()=>$('fileInput').click();
|
||||
function _setRecording(on){
|
||||
window._micActive=on;
|
||||
btn.classList.toggle('recording',on);
|
||||
// Active-state title flips so the tooltip is honest about what
|
||||
// pressing the button will do (#1488).
|
||||
btn.title = on ? t('voice_dictate_active') : t('voice_dictate');
|
||||
status.style.display=on?'':'none';
|
||||
if(statusText) statusText.textContent=on?'Listening':'Listening';
|
||||
if(!on){ _finalText=''; _prefix=''; }
|
||||
@@ -429,8 +432,21 @@ window._micPendingSend=window._micPendingSend||false;
|
||||
|
||||
if(!modeBtn||!bar||!indicator||!label) return;
|
||||
|
||||
// Show the voice mode button — browser supports both STT and TTS
|
||||
modeBtn.style.display='';
|
||||
// Voice-mode button is gated behind a Preferences toggle (#1488).
|
||||
// Default off — keeps the composer footer uncluttered for users who
|
||||
// only need plain dictation. The hands-free conversation feature is
|
||||
// a power-user surface; explicit opt-in avoids the visual confusion
|
||||
// of two near-identical mic icons.
|
||||
function _voiceModePrefEnabled(){
|
||||
try{ return localStorage.getItem('hermes-voice-mode-button')==='true'; }
|
||||
catch(_){ return false; }
|
||||
}
|
||||
function _applyVoiceModePref(){
|
||||
modeBtn.style.display = _voiceModePrefEnabled() ? '' : 'none';
|
||||
}
|
||||
_applyVoiceModePref();
|
||||
// Expose so the settings pane can re-apply immediately on toggle.
|
||||
window._applyVoiceModePref = _applyVoiceModePref;
|
||||
|
||||
let _voiceModeActive=false;
|
||||
let _voiceModeState='idle'; // idle | listening | thinking | speaking
|
||||
@@ -643,7 +659,7 @@ window._micPendingSend=window._micPendingSend||false;
|
||||
function _activate(){
|
||||
_voiceModeActive=true;
|
||||
modeBtn.classList.add('active');
|
||||
modeBtn.title=t('voice_mode_active');
|
||||
modeBtn.title=t('voice_mode_toggle_active');
|
||||
showToast(t('voice_mode_active'),1500);
|
||||
// If the agent is busy, wait — state will be 'thinking' and we'll detect completion
|
||||
if(typeof S!=='undefined'&&S.busy){
|
||||
@@ -660,7 +676,7 @@ window._micPendingSend=window._micPendingSend||false;
|
||||
_voiceModeState='idle';
|
||||
_voiceModeThinkingSid=null;
|
||||
modeBtn.classList.remove('active');
|
||||
modeBtn.title=t('voice_toggle');
|
||||
modeBtn.title=t('voice_mode_toggle');
|
||||
bar.style.display='none';
|
||||
clearTimeout(_silenceTimer);
|
||||
try{ if(_recognition) _recognition.abort(); }catch(_){}
|
||||
|
||||
+72
-9
@@ -15,8 +15,12 @@ const LOCALES = {
|
||||
mic_no_speech: 'No speech detected. Try again.',
|
||||
mic_network: 'Speech recognition unavailable.',
|
||||
mic_error: 'Voice input error: ',
|
||||
// Composer voice buttons (#1488 — distinct labels for dictation vs voice mode)
|
||||
voice_dictate: 'Dictate',
|
||||
voice_dictate_active: 'Stop dictation',
|
||||
voice_mode_toggle: 'Voice mode',
|
||||
voice_mode_toggle_active: 'Exit voice mode',
|
||||
// Turn-based voice mode (#1333)
|
||||
voice_toggle: 'Voice input',
|
||||
voice_listening: 'Listening…',
|
||||
voice_speaking: 'Speaking…',
|
||||
voice_thinking: 'Thinking…',
|
||||
@@ -494,6 +498,9 @@ const LOCALES = {
|
||||
settings_desc_tts: "Show a speaker button on each assistant message to read it aloud using your browser's speech synthesis.",
|
||||
settings_label_tts_auto_read: 'Auto-read responses aloud',
|
||||
settings_desc_tts_auto_read: 'Automatically speak each new assistant response when it finishes. Pauses when you start typing.',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button',
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.',
|
||||
settings_label_tts_voice: 'Voice',
|
||||
settings_desc_tts_voice: "Preferred voice. Populated from your browser's available voices.",
|
||||
settings_label_tts_rate: 'Speech rate',
|
||||
@@ -887,7 +894,11 @@ const LOCALES = {
|
||||
mic_network: '音声認識を利用できません。',
|
||||
mic_error: '音声入力エラー: ',
|
||||
// Turn-based voice mode (#1333)
|
||||
voice_toggle: '音声入力',
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'ディクテーション',
|
||||
voice_dictate_active: 'ディクテーション停止',
|
||||
voice_mode_toggle: '音声モード',
|
||||
voice_mode_toggle_active: '音声モードを終了',
|
||||
voice_listening: '聞き取り中…',
|
||||
voice_speaking: '発話中…',
|
||||
voice_thinking: '考え中…',
|
||||
@@ -1365,6 +1376,9 @@ const LOCALES = {
|
||||
settings_desc_tts: 'アシスタントの各メッセージにスピーカーボタンを表示し、ブラウザの音声合成で読み上げます。',
|
||||
settings_label_tts_auto_read: '応答を自動で読み上げ',
|
||||
settings_desc_tts_auto_read: '新しいアシスタント応答が完了するたびに自動で読み上げます。入力中は一時停止します。',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'ハンズフリー音声モードのボタン',
|
||||
settings_desc_voice_mode: '音声波形ボタンをディクテーションマイクの隣に表示します。発話の合間に自動送信し、返答を読み上げます。音声認識と TTS の両方をサポートするブラウザが必要です。',
|
||||
settings_label_tts_voice: '声',
|
||||
settings_desc_tts_voice: '優先する声。ブラウザで利用可能な声から選択されます。',
|
||||
settings_label_tts_rate: '読み上げ速度',
|
||||
@@ -1756,7 +1770,11 @@ const LOCALES = {
|
||||
mic_no_speech: 'Речь не распознана. Попробуйте ещё раз.',
|
||||
mic_network: 'Распознавание речи недоступно.',
|
||||
mic_error: 'Ошибка ввода речи: ',
|
||||
voice_toggle: 'Голосовой ввод',
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Диктовка',
|
||||
voice_dictate_active: 'Остановить диктовку',
|
||||
voice_mode_toggle: 'Голосовой режим',
|
||||
voice_mode_toggle_active: 'Выйти из голосового режима',
|
||||
voice_listening: 'Слушаю…',
|
||||
voice_speaking: 'Говорю…',
|
||||
voice_thinking: 'Думаю…',
|
||||
@@ -2517,6 +2535,9 @@ const LOCALES = {
|
||||
settings_desc_tts: 'Показать кнопку динамика на сообщениях ассистента',
|
||||
settings_label_tts_auto_read: 'Авто-чтение ответов',
|
||||
settings_desc_tts_auto_read: 'Автоматически озвучивать ответы ассистента',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Кнопка режима свободных рук',
|
||||
settings_desc_voice_mode: 'Показывать кнопку голосового режима (аудиоволны) рядом с микрофоном диктовки. Hermes автоматически отправляет реплики после паузы и зачитывает ответы вслух. Требуется браузер с поддержкой распознавания речи и TTS.',
|
||||
settings_label_tts_voice: 'Голос',
|
||||
settings_desc_tts_voice: 'Выберите голос для синтеза речи',
|
||||
settings_label_tts_rate: 'Скорость речи',
|
||||
@@ -3316,6 +3337,9 @@ const LOCALES = {
|
||||
settings_desc_tts: 'Mostrar botón de altavoz en mensajes del asistente',
|
||||
settings_label_tts_auto_read: 'Leer respuestas automáticamente',
|
||||
settings_desc_tts_auto_read: 'Leer en voz alta las respuestas del asistente automáticamente',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button', // TODO: translate
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.', // TODO: translate
|
||||
settings_label_tts_voice: 'Voz',
|
||||
settings_desc_tts_voice: 'Seleccionar voz para síntesis de voz',
|
||||
settings_label_tts_rate: 'Velocidad de voz',
|
||||
@@ -3358,7 +3382,11 @@ const LOCALES = {
|
||||
voice_mode_off: 'Voice mode off', // TODO: translate
|
||||
voice_speaking: 'Speaking…', // TODO: translate
|
||||
voice_thinking: 'Thinking…', // TODO: translate
|
||||
voice_toggle: 'Voice input', // TODO: translate
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Dictate', // TODO: translate
|
||||
voice_dictate_active: 'Stop dictation', // TODO: translate
|
||||
voice_mode_toggle: 'Voice mode', // TODO: translate
|
||||
voice_mode_toggle_active: 'Exit voice mode', // TODO: translate
|
||||
subagent_children: 'Subagent sessions', // TODO: translate
|
||||
},
|
||||
|
||||
@@ -4124,6 +4152,9 @@ const LOCALES = {
|
||||
settings_desc_tts: 'Lautsprecher-Symbol auf Assistenten-Nachrichten anzeigen',
|
||||
settings_label_tts_auto_read: 'Antworten automatisch vorlesen',
|
||||
settings_desc_tts_auto_read: 'Assistenten-Antworten automatisch vorlesen',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button', // TODO: translate
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.', // TODO: translate
|
||||
settings_label_tts_voice: 'Stimme',
|
||||
settings_desc_tts_voice: 'Stimme für Sprachsynthese auswählen',
|
||||
settings_label_tts_rate: 'Sprechgeschwindigkeit',
|
||||
@@ -4167,7 +4198,11 @@ const LOCALES = {
|
||||
voice_mode_off: 'Voice mode off', // TODO: translate
|
||||
voice_speaking: 'Speaking…', // TODO: translate
|
||||
voice_thinking: 'Thinking…', // TODO: translate
|
||||
voice_toggle: 'Voice input', // TODO: translate
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Dictate', // TODO: translate
|
||||
voice_dictate_active: 'Stop dictation', // TODO: translate
|
||||
voice_mode_toggle: 'Voice mode', // TODO: translate
|
||||
voice_mode_toggle_active: 'Exit voice mode', // TODO: translate
|
||||
subagent_children: 'Subagent sessions', // TODO: translate
|
||||
},
|
||||
|
||||
@@ -4928,6 +4963,9 @@ const LOCALES = {
|
||||
settings_desc_tts: '在助手消息上显示扬声器按钮',
|
||||
settings_label_tts_auto_read: '自动朗读回复',
|
||||
settings_desc_tts_auto_read: '自动朗读助手回复',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button', // TODO: translate
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.', // TODO: translate
|
||||
settings_label_tts_voice: '语音',
|
||||
settings_desc_tts_voice: '选择语音合成声音',
|
||||
settings_label_tts_rate: '语速',
|
||||
@@ -4970,7 +5008,11 @@ const LOCALES = {
|
||||
voice_mode_off: 'Voice mode off', // TODO: translate
|
||||
voice_speaking: 'Speaking…', // TODO: translate
|
||||
voice_thinking: 'Thinking…', // TODO: translate
|
||||
voice_toggle: 'Voice input', // TODO: translate
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Dictate', // TODO: translate
|
||||
voice_dictate_active: 'Stop dictation', // TODO: translate
|
||||
voice_mode_toggle: 'Voice mode', // TODO: translate
|
||||
voice_mode_toggle_active: 'Exit voice mode', // TODO: translate
|
||||
subagent_children: 'Subagent sessions', // TODO: translate
|
||||
},
|
||||
|
||||
@@ -5838,6 +5880,9 @@ const LOCALES = {
|
||||
settings_desc_tts: '在助手訊息上顯示喇叭按鈕',
|
||||
settings_label_tts_auto_read: '自動朗讀回覆',
|
||||
settings_desc_tts_auto_read: '自動朗讀助手回覆',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button', // TODO: translate
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.', // TODO: translate
|
||||
settings_label_tts_voice: '語音',
|
||||
settings_desc_tts_voice: '選擇語音合成聲音',
|
||||
settings_label_tts_rate: '語速',
|
||||
@@ -5881,7 +5926,11 @@ const LOCALES = {
|
||||
voice_mode_off: 'Voice mode off', // TODO: translate
|
||||
voice_speaking: 'Speaking…', // TODO: translate
|
||||
voice_thinking: 'Thinking…', // TODO: translate
|
||||
voice_toggle: 'Voice input', // TODO: translate
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Dictate', // TODO: translate
|
||||
voice_dictate_active: 'Stop dictation', // TODO: translate
|
||||
voice_mode_toggle: 'Voice mode', // TODO: translate
|
||||
voice_mode_toggle_active: 'Exit voice mode', // TODO: translate
|
||||
subagent_children: 'Subagent sessions', // TODO: translate
|
||||
},
|
||||
|
||||
@@ -6561,6 +6610,9 @@ const LOCALES = {
|
||||
settings_desc_tts: 'Mostrar botão de alto-falante nas mensagens do assistente',
|
||||
settings_label_tts_auto_read: 'Ler respostas automaticamente',
|
||||
settings_desc_tts_auto_read: 'Ler automaticamente as respostas do assistente',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button', // TODO: translate
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.', // TODO: translate
|
||||
settings_label_tts_voice: 'Voz',
|
||||
settings_desc_tts_voice: 'Selecionar voz para síntese de voz',
|
||||
settings_label_tts_rate: 'Velocidade da fala',
|
||||
@@ -6603,7 +6655,11 @@ const LOCALES = {
|
||||
voice_mode_off: 'Voice mode off', // TODO: translate
|
||||
voice_speaking: 'Speaking…', // TODO: translate
|
||||
voice_thinking: 'Thinking…', // TODO: translate
|
||||
voice_toggle: 'Voice input', // TODO: translate
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Dictate', // TODO: translate
|
||||
voice_dictate_active: 'Stop dictation', // TODO: translate
|
||||
voice_mode_toggle: 'Voice mode', // TODO: translate
|
||||
voice_mode_toggle_active: 'Exit voice mode', // TODO: translate
|
||||
subagent_children: 'Subagent sessions', // TODO: translate
|
||||
// login-flow keys (issue #1442)
|
||||
sign_out_failed: 'Falha ao sair: ',
|
||||
@@ -7421,6 +7477,9 @@ const LOCALES = {
|
||||
settings_desc_tts: '도움말 메시지에 스피커 버튼 표시',
|
||||
settings_label_tts_auto_read: '답변 자동 읽기',
|
||||
settings_desc_tts_auto_read: '도움말 답변을 자동으로 읽어줌',
|
||||
// Composer voice-mode pref (#1488)
|
||||
settings_label_voice_mode: 'Hands-free voice mode button', // TODO: translate
|
||||
settings_desc_voice_mode: 'Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.', // TODO: translate
|
||||
settings_label_tts_voice: '음성',
|
||||
settings_desc_tts_voice: '음성 합성 음성 선택',
|
||||
settings_label_tts_rate: '말 속도',
|
||||
@@ -7463,7 +7522,11 @@ const LOCALES = {
|
||||
voice_mode_off: 'Voice mode off', // TODO: translate
|
||||
voice_speaking: 'Speaking…', // TODO: translate
|
||||
voice_thinking: 'Thinking…', // TODO: translate
|
||||
voice_toggle: 'Voice input', // TODO: translate
|
||||
// Composer voice buttons (#1488)
|
||||
voice_dictate: 'Dictate', // TODO: translate
|
||||
voice_dictate_active: 'Stop dictation', // TODO: translate
|
||||
voice_mode_toggle: 'Voice mode', // TODO: translate
|
||||
voice_mode_toggle_active: 'Exit voice mode', // TODO: translate
|
||||
subagent_children: 'Subagent sessions', // TODO: translate
|
||||
},
|
||||
};
|
||||
|
||||
@@ -64,6 +64,8 @@ const LI_PATHS = {
|
||||
'git-branch': '<line x1="6" y1="3" x2="6" y2="15"/><circle cx="18" cy="6" r="3"/><circle cx="6" cy="18" r="3"/><path d="M18 9a9 9 0 0 1-9 9"/>',
|
||||
// Audio / TTS
|
||||
'volume-2': '<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/><path d="M19.07 4.93a10 10 0 0 1 0 14.14"/><path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>',
|
||||
// Voice-mode button — universal "two-way voice conversation" glyph (matches ChatGPT/Gemini)
|
||||
'audio-lines': '<path d="M2 10v4"/><path d="M6 6v12"/><path d="M10 3v18"/><path d="M14 8v8"/><path d="M18 5v14"/><path d="M22 10v4"/>',
|
||||
// Queue pill chevron (ui.js queue indicator)
|
||||
'chevron-up': '<polyline points="18 15 12 9 6 15"/>',
|
||||
// Insights panel stat cards (panels.js)
|
||||
|
||||
+17
-9
@@ -389,7 +389,7 @@
|
||||
<button class="icon-btn" id="btnAttach" title="Attach files">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21.44 11.05l-9.19 9.19a6 6 0 0 1-8.49-8.49l9.19-9.19a4 4 0 0 1 5.66 5.66l-9.2 9.19a2 2 0 0 1-2.83-2.83l8.49-8.48"/></svg>
|
||||
</button>
|
||||
<button class="icon-btn mic-btn" id="btnMic" title="Voice input" style="display:none">
|
||||
<button class="icon-btn mic-btn" id="btnMic" title="Dictate" data-i18n-title="voice_dictate" style="display:none">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<rect x="9" y="1" width="6" height="12" rx="3"/>
|
||||
<path d="M5 10a7 7 0 0 0 14 0"/>
|
||||
@@ -397,14 +397,15 @@
|
||||
<line x1="8" y1="23" x2="16" y2="23"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button class="icon-btn voice-mode-btn" id="btnVoiceMode" title="Turn-based voice mode" style="display:none" data-i18n-title="voice_toggle">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
|
||||
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
|
||||
<line x1="12" y1="19" x2="12" y2="23"/>
|
||||
<line x1="8" y1="23" x2="16" y2="23"/>
|
||||
<path d="M20 3l-1.5 1.5" opacity=".5"/>
|
||||
<path d="M4 3l1.5 1.5" opacity=".5"/>
|
||||
<button class="icon-btn voice-mode-btn" id="btnVoiceMode" title="Voice mode" data-i18n-title="voice_mode_toggle" style="display:none">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||||
<!-- Lucide audio-lines: signals two-way voice conversation, matches ChatGPT/Gemini convention. -->
|
||||
<path d="M2 10v4"/>
|
||||
<path d="M6 6v12"/>
|
||||
<path d="M10 3v18"/>
|
||||
<path d="M14 8v8"/>
|
||||
<path d="M18 5v14"/>
|
||||
<path d="M22 10v4"/>
|
||||
</svg>
|
||||
</button>
|
||||
<div class="composer-divider" aria-hidden="true"></div>
|
||||
@@ -784,6 +785,13 @@
|
||||
</label>
|
||||
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_tts_auto_read">Automatically speak each new assistant response when it finishes. Pauses when you start typing.</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label style="display:flex;align-items:center;gap:8px;cursor:pointer">
|
||||
<input type="checkbox" id="settingsVoiceModeEnabled" style="width:15px;height:15px;accent-color:var(--accent)">
|
||||
<span data-i18n="settings_label_voice_mode">Hands-free voice mode button</span>
|
||||
</label>
|
||||
<div style="font-size:11px;color:var(--muted);margin-top:4px" data-i18n="settings_desc_voice_mode">Show the voice-mode button (audio waveform) next to the dictation mic. Lets you speak naturally — Hermes auto-sends after a pause and reads replies aloud. Requires a browser that supports both speech recognition and TTS.</div>
|
||||
</div>
|
||||
<div class="settings-field">
|
||||
<label for="settingsTtsVoice" data-i18n="settings_label_tts_voice">Voice</label>
|
||||
<select id="settingsTtsVoice" style="width:100%;padding:8px;background:var(--code-bg);color:var(--text);border:1px solid var(--border2);border-radius:6px">
|
||||
|
||||
@@ -3083,6 +3083,17 @@ async function loadSettingsPanel(){
|
||||
if(ttsEnabledCb){ttsEnabledCb.checked=localStorage.getItem('hermes-tts-enabled')==='true';ttsEnabledCb.onchange=function(){localStorage.setItem('hermes-tts-enabled',this.checked?'true':'false');_applyTtsEnabled(this.checked);};}
|
||||
const ttsAutoReadCb=$('settingsTtsAutoRead');
|
||||
if(ttsAutoReadCb){ttsAutoReadCb.checked=localStorage.getItem('hermes-tts-auto-read')==='true';ttsAutoReadCb.onchange=function(){localStorage.setItem('hermes-tts-auto-read',this.checked?'true':'false');};}
|
||||
// Voice-mode button visibility (#1488). localStorage-only; no server round-trip.
|
||||
// Toggling re-applies immediately via the boot.js helper so the user sees
|
||||
// the audio-waveform button appear/disappear without a reload.
|
||||
const voiceModeCb=$('settingsVoiceModeEnabled');
|
||||
if(voiceModeCb){
|
||||
voiceModeCb.checked=localStorage.getItem('hermes-voice-mode-button')==='true';
|
||||
voiceModeCb.onchange=function(){
|
||||
localStorage.setItem('hermes-voice-mode-button',this.checked?'true':'false');
|
||||
if(typeof window._applyVoiceModePref==='function') window._applyVoiceModePref();
|
||||
};
|
||||
}
|
||||
// Populate voice selector from speechSynthesis
|
||||
const ttsVoiceSel=$('settingsTtsVoice');
|
||||
if(ttsVoiceSel&&'speechSynthesis' in window){
|
||||
|
||||
@@ -0,0 +1,270 @@
|
||||
"""Tests for #1488 — composer voice buttons (dictation vs voice mode).
|
||||
|
||||
The composer footer shows two voice-related buttons that look identical and
|
||||
share the same tooltip ("Voice input") in master. This module pins the fix:
|
||||
|
||||
1. The buttons MUST have distinct, descriptive tooltips bound to i18n keys.
|
||||
2. The voice-mode button MUST use the audio-lines (waveform) icon — the
|
||||
industry-standard glyph for two-way voice conversation, matching ChatGPT
|
||||
and Gemini.
|
||||
3. The voice-mode button MUST be hidden by default and surface behind a
|
||||
Preferences toggle so the default composer footer stays uncluttered.
|
||||
4. The dictation button (the older feature) MUST stay visible by default,
|
||||
unchanged.
|
||||
5. All four new i18n keys (active and idle states for both buttons) MUST
|
||||
exist in every locale.
|
||||
6. The legacy `voice_toggle` i18n key MUST be removed everywhere — its
|
||||
string was identical to the dictation tooltip and caused the bug.
|
||||
"""
|
||||
import re
|
||||
|
||||
|
||||
def _src(name: str) -> str:
|
||||
with open(f"static/{name}") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
class TestComposerVoiceButtonHTML:
|
||||
"""index.html composer markup for the dictation + voice-mode buttons."""
|
||||
|
||||
def test_dictation_button_has_dictate_i18n_key(self):
|
||||
"""btnMic must bind data-i18n-title="voice_dictate" so its tooltip
|
||||
is distinct from the voice-mode button and localizable."""
|
||||
html = _src("index.html")
|
||||
m = re.search(
|
||||
r'<button[^>]*\bid="btnMic"[^>]*>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert m, "btnMic <button> tag must exist"
|
||||
tag = m.group(0)
|
||||
assert 'data-i18n-title="voice_dictate"' in tag, \
|
||||
"btnMic must have data-i18n-title=\"voice_dictate\" — without " \
|
||||
"it the tooltip stays as the static fallback and ignores locale."
|
||||
# Static fallback should also match (read by users with stale i18n)
|
||||
assert 'title="Dictate"' in tag, \
|
||||
"btnMic static title fallback must say 'Dictate' (not 'Voice input')."
|
||||
|
||||
def test_voice_mode_button_has_voice_mode_i18n_key(self):
|
||||
"""btnVoiceMode must bind data-i18n-title="voice_mode_toggle"."""
|
||||
html = _src("index.html")
|
||||
m = re.search(
|
||||
r'<button[^>]*\bid="btnVoiceMode"[^>]*>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert m, "btnVoiceMode <button> tag must exist"
|
||||
tag = m.group(0)
|
||||
assert 'data-i18n-title="voice_mode_toggle"' in tag, \
|
||||
"btnVoiceMode must use data-i18n-title=\"voice_mode_toggle\". " \
|
||||
"The legacy key 'voice_toggle' resolved to 'Voice input' and " \
|
||||
"made btnMic and btnVoiceMode appear identical."
|
||||
assert 'voice_toggle"' not in tag, \
|
||||
"Stale voice_toggle reference still on btnVoiceMode — must be voice_mode_toggle."
|
||||
|
||||
def test_buttons_have_distinct_static_titles(self):
|
||||
"""The static title attributes must differ as a fallback for users
|
||||
whose i18n hasn't loaded yet (e.g. very early page load)."""
|
||||
html = _src("index.html")
|
||||
mic = re.search(r'<button[^>]*\bid="btnMic"[^>]*>', html, re.DOTALL)
|
||||
vm = re.search(r'<button[^>]*\bid="btnVoiceMode"[^>]*>', html, re.DOTALL)
|
||||
assert mic and vm
|
||||
mic_title = re.search(r'\btitle="([^"]+)"', mic.group(0)).group(1)
|
||||
vm_title = re.search(r'\btitle="([^"]+)"', vm.group(0)).group(1)
|
||||
assert mic_title != vm_title, \
|
||||
f"Static titles must differ; both say {mic_title!r}"
|
||||
assert "voice input" not in mic_title.lower(), \
|
||||
f"btnMic static title still says 'Voice input': {mic_title!r}"
|
||||
assert "voice input" not in vm_title.lower(), \
|
||||
f"btnVoiceMode static title still says 'Voice input': {vm_title!r}"
|
||||
|
||||
def test_voice_mode_uses_audio_lines_glyph(self):
|
||||
"""btnVoiceMode SVG must use the audio-lines (waveform) shape.
|
||||
We detect the pattern by looking for the 6 vertical-bar paths
|
||||
characteristic of Lucide's audio-lines icon."""
|
||||
html = _src("index.html")
|
||||
# Extract the full button (open tag through </button>)
|
||||
m = re.search(
|
||||
r'<button[^>]*\bid="btnVoiceMode"[^>]*>(.+?)</button>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
assert m, "btnVoiceMode element must be parseable"
|
||||
body = m.group(1)
|
||||
# Lucide audio-lines path data — six <path d="M{x} {y}v{h}"/> entries.
|
||||
bars = re.findall(r'<path d="M\d+\s+\d+v\d+"', body)
|
||||
assert len(bars) >= 5, (
|
||||
f"btnVoiceMode SVG must use audio-lines (>=5 vertical-bar paths); "
|
||||
f"found {len(bars)}. Visual confusion bug returns if reverted to "
|
||||
f"the old 'mic with sparkles' shape."
|
||||
)
|
||||
# Must NOT contain the old mic-shaped rect (rx="3" capsule) — that's
|
||||
# the dictation glyph and using it again recreates #1488.
|
||||
assert 'rect x="9" y="1" width="6" height="12" rx="3"' not in body, \
|
||||
"btnVoiceMode regressed to mic shape — the visual confusion bug returns."
|
||||
|
||||
|
||||
class TestComposerVoiceButtonI18n:
|
||||
"""i18n.js must define the four new keys and remove the stale voice_toggle."""
|
||||
|
||||
REQUIRED_KEYS = (
|
||||
"voice_dictate",
|
||||
"voice_dictate_active",
|
||||
"voice_mode_toggle",
|
||||
"voice_mode_toggle_active",
|
||||
)
|
||||
|
||||
LOCALES = ("en", "ja", "ru", "es", "de", "zh", "zh-Hant", "pt", "ko")
|
||||
|
||||
def test_legacy_voice_toggle_key_removed(self):
|
||||
"""The old key whose string was 'Voice input' caused the duplicate-
|
||||
tooltip bug. It must no longer appear in i18n.js."""
|
||||
src = _src("i18n.js")
|
||||
# Match the property name only (not strings that happen to mention it).
|
||||
leftover = re.findall(r'\bvoice_toggle\s*:', src)
|
||||
assert not leftover, (
|
||||
f"Stale voice_toggle: key still in i18n.js ({len(leftover)} "
|
||||
f"occurrences). Replace with voice_mode_toggle / voice_dictate."
|
||||
)
|
||||
|
||||
def test_all_locales_define_new_keys(self):
|
||||
"""Every locale block must define all 4 new composer voice-button keys."""
|
||||
src = _src("i18n.js")
|
||||
for key in self.REQUIRED_KEYS:
|
||||
count = len(re.findall(rf'\b{re.escape(key)}\s*:', src))
|
||||
assert count == len(self.LOCALES), (
|
||||
f"i18n key {key!r} appears {count} times — expected one per "
|
||||
f"locale ({len(self.LOCALES)} locales: {self.LOCALES}). "
|
||||
f"Each locale block must define all four composer voice keys."
|
||||
)
|
||||
|
||||
def test_english_dictate_label_is_dictate(self):
|
||||
"""English voice_dictate must read 'Dictate' (not 'Voice input')."""
|
||||
src = _src("i18n.js")
|
||||
# Find the en block (first occurrence of voice_dictate is in en)
|
||||
m = re.search(r"\bvoice_dictate\s*:\s*'([^']+)'", src)
|
||||
assert m, "voice_dictate key not found"
|
||||
assert m.group(1) == "Dictate", \
|
||||
f"English voice_dictate should be 'Dictate'; got {m.group(1)!r}"
|
||||
|
||||
def test_english_voice_mode_label_is_voice_mode(self):
|
||||
"""English voice_mode_toggle must read 'Voice mode' — matches
|
||||
ChatGPT/Gemini convention (industry-standard label)."""
|
||||
src = _src("i18n.js")
|
||||
# Find the FIRST voice_mode_toggle in the file (en block) but skip
|
||||
# _active suffix variant — use a lookahead to assert no _active.
|
||||
m = re.search(r"\bvoice_mode_toggle\s*:\s*'([^']+)'", src)
|
||||
assert m, "voice_mode_toggle key not found"
|
||||
assert m.group(1) == "Voice mode", \
|
||||
f"English voice_mode_toggle should be 'Voice mode'; got {m.group(1)!r}"
|
||||
|
||||
|
||||
class TestVoiceModePreferenceGate:
|
||||
"""boot.js must hide btnVoiceMode by default, surface it via Preferences."""
|
||||
|
||||
def test_voice_mode_pref_is_localstorage_backed(self):
|
||||
"""The pref reads from localStorage key 'hermes-voice-mode-button'."""
|
||||
src = _src("boot.js")
|
||||
assert "'hermes-voice-mode-button'" in src, (
|
||||
"boot.js must read/write the localStorage key 'hermes-voice-mode-button' "
|
||||
"for the voice-mode visibility pref."
|
||||
)
|
||||
|
||||
def test_voice_mode_button_hidden_until_pref_enabled(self):
|
||||
"""Default state of btnVoiceMode display must be 'none'; visibility
|
||||
gated by the pref check, not unconditional."""
|
||||
src = _src("boot.js")
|
||||
# Find the voice-mode pref helper. Must NOT contain an
|
||||
# unconditional `modeBtn.style.display='';` (the master bug).
|
||||
# Instead, the function _applyVoiceModePref must be the source of truth.
|
||||
assert "_applyVoiceModePref" in src, \
|
||||
"boot.js must expose _applyVoiceModePref so settings toggle re-applies live."
|
||||
assert "_voiceModePrefEnabled" in src, \
|
||||
"boot.js must define _voiceModePrefEnabled to read the pref."
|
||||
# The pre-existing `modeBtn.style.display='';` line must be gone.
|
||||
# We allow `style.display = _voiceModePrefEnabled() ? '' : 'none'`.
|
||||
assert "modeBtn.style.display='';" not in src, (
|
||||
"boot.js still contains unconditional `modeBtn.style.display='';` — "
|
||||
"this defeats the Preferences gate and reintroduces #1488."
|
||||
)
|
||||
|
||||
def test_settings_pane_has_voice_mode_checkbox(self):
|
||||
"""index.html Preferences pane must include the toggle checkbox."""
|
||||
html = _src("index.html")
|
||||
assert 'id="settingsVoiceModeEnabled"' in html, \
|
||||
"Preferences pane must include #settingsVoiceModeEnabled checkbox."
|
||||
assert 'data-i18n="settings_label_voice_mode"' in html, \
|
||||
"Voice-mode pref label must use data-i18n='settings_label_voice_mode'."
|
||||
assert 'data-i18n="settings_desc_voice_mode"' in html, \
|
||||
"Voice-mode pref description must use data-i18n='settings_desc_voice_mode'."
|
||||
|
||||
def test_settings_pane_has_voice_mode_i18n_keys(self):
|
||||
"""The two new pref-label i18n keys must exist in every locale."""
|
||||
src = _src("i18n.js")
|
||||
for key in ("settings_label_voice_mode", "settings_desc_voice_mode"):
|
||||
count = len(re.findall(rf'\b{re.escape(key)}\s*:', src))
|
||||
assert count == 9, (
|
||||
f"Preferences i18n key {key!r} appears {count} times — "
|
||||
f"expected 9 (one per locale)."
|
||||
)
|
||||
|
||||
def test_panels_js_wires_voice_mode_pref(self):
|
||||
"""panels.js must read the checkbox state, persist to localStorage,
|
||||
and call _applyVoiceModePref so the change is live without reload."""
|
||||
src = _src("panels.js")
|
||||
assert "settingsVoiceModeEnabled" in src, \
|
||||
"panels.js must reference the #settingsVoiceModeEnabled checkbox."
|
||||
assert "'hermes-voice-mode-button'" in src, \
|
||||
"panels.js must persist the pref to localStorage key 'hermes-voice-mode-button'."
|
||||
assert "_applyVoiceModePref" in src, \
|
||||
"panels.js onchange handler must call window._applyVoiceModePref() " \
|
||||
"so the button appears/disappears immediately."
|
||||
|
||||
|
||||
class TestActiveStateTooltips:
|
||||
"""When recording / in voice mode, tooltips should flip to the
|
||||
'stop' variants so the affordance is honest."""
|
||||
|
||||
def test_dictation_active_tooltip_changes_when_recording(self):
|
||||
"""_setRecording(on) should flip btnMic.title to voice_dictate_active."""
|
||||
src = _src("boot.js")
|
||||
m = re.search(r"function _setRecording\(on\)\{.*?\n \}", src, re.DOTALL)
|
||||
assert m, "_setRecording function must exist"
|
||||
body = m.group(0)
|
||||
assert "voice_dictate_active" in body, (
|
||||
"_setRecording must flip the tooltip to voice_dictate_active when "
|
||||
"recording starts so the user knows pressing it now stops dictation."
|
||||
)
|
||||
assert "voice_dictate'" in body or "voice_dictate\"" in body, \
|
||||
"_setRecording must restore voice_dictate when recording stops."
|
||||
|
||||
def test_voice_mode_active_tooltip(self):
|
||||
"""_activate() should set modeBtn.title to voice_mode_toggle_active."""
|
||||
src = _src("boot.js")
|
||||
m = re.search(r"function _activate\(\)\{.*?\n \}", src, re.DOTALL)
|
||||
assert m, "_activate function must exist"
|
||||
body = m.group(0)
|
||||
assert "voice_mode_toggle_active" in body, (
|
||||
"_activate must flip the tooltip to voice_mode_toggle_active so "
|
||||
"the next click obviously exits voice mode."
|
||||
)
|
||||
|
||||
def test_voice_mode_idle_tooltip(self):
|
||||
"""_deactivate() should set modeBtn.title back to voice_mode_toggle."""
|
||||
src = _src("boot.js")
|
||||
m = re.search(r"function _deactivate\(\)\{.*?\n \}", src, re.DOTALL)
|
||||
assert m, "_deactivate function must exist"
|
||||
body = m.group(0)
|
||||
assert re.search(r"voice_mode_toggle['\"]", body), (
|
||||
"_deactivate must restore voice_mode_toggle (idle title) when "
|
||||
"the user exits voice mode."
|
||||
)
|
||||
|
||||
|
||||
class TestAudioLinesIconRegistered:
|
||||
"""The audio-lines icon should be in LI_PATHS for any future reuse via li()."""
|
||||
|
||||
def test_audio_lines_in_li_paths(self):
|
||||
src = _src("icons.js")
|
||||
assert "'audio-lines'" in src, \
|
||||
"audio-lines must be registered in LI_PATHS for li('audio-lines') reuse."
|
||||
Reference in New Issue
Block a user