feat(chat): redesign attachments with ContentBlock format and file downloads (#397)

- Redesign attachment handling using Anthropic-style ContentBlock array format with discriminated unions (text, image, file types) - Add frontend file download functionality supporting both ContentBlock and Markdown formats with authentication tokens - Fix multi-process conflict causing SQLite database resets by eliminating redundant nodemon instances - Update chat store to build ContentBlock arrays from attachments - Improve image handling with base64 conversion for upstream API Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-26 14:00:14 +00:00 · 2026-05-02 15:39:01 +08:00
parent caa9162f28
commit 793003fcf6
14 changed files with 331 additions and 30 deletions
@@ -1,6 +1,6 @@
 {
  "name": "hermes-web-ui",
-  "version": "0.5.5",
+  "version": "0.5.6",
  "description": "Self-hosted AI chat dashboard for Hermes Agent — multi-model (Claude, GPT, Gemini, DeepSeek) web UI with Telegram, Discord, Slack, WhatsApp integration",
  "repository": {
    "type": "git",
@@ -1,13 +1,18 @@
 import { io, type Socket } from 'socket.io-client'
 import { request, getBaseUrlValue, getApiKey } from '../client'

+export type ContentBlock =
+  | { type: 'text'; text: string }
+  | { type: 'image'; name: string; path: string; media_type: string }
+  | { type: 'file'; name: string; path: string; media_type?: string }
+
 export interface ChatMessage {
  role: 'user' | 'assistant' | 'system'
-  content: string
+  content: string | ContentBlock[]
 }

 export interface StartRunRequest {
-  input: string | ChatMessage[]
+  input: string | ContentBlock[]
  instructions?: string
  session_id?: string
  model?: string
@@ -1,9 +1,10 @@
 <script setup lang="ts">
-import type { Message } from "@/stores/hermes/chat";
+import type { Message, ContentBlock } from "@/stores/hermes/chat";
 import { computed, onBeforeUnmount, onMounted, ref, watchEffect } from "vue";
 import { useI18n } from "vue-i18n";
 import { useMessage } from "naive-ui";
 import { downloadFile } from "@/api/hermes/download";
+	import { getApiKey } from "@/api/client";
 import { copyToClipboard } from "@/utils/clipboard";
 import MarkdownRenderer from "./MarkdownRenderer.vue";
 import { parseThinking, countThinkingChars } from "@/utils/thinking-parser";
@@ -23,6 +24,55 @@ const { t } = useI18n();
 const toast = useMessage();

 const isSystem = computed(() => props.message.role === "system");
+
+// Parse ContentBlock[] from JSON string
+const contentBlocks = computed(() => {
+  const content = props.message.content || '';
+  if (!content.trim()) return null;
+
+  try {
+    // Try to parse as ContentBlock[] array
+    const parsed = JSON.parse(content);
+    if (Array.isArray(parsed) && parsed.length > 0 && 'type' in parsed[0]) {
+      return parsed as ContentBlock[];
+    }
+  } catch {
+    // Not valid JSON, treat as plain text
+  }
+
+  return null;
+});
+
+// Check if content is in ContentBlock[] format
+const isContentBlockArray = computed(() => contentBlocks.value !== null);
+
+// Extract text content from ContentBlock[] for display
+const displayText = computed(() => {
+  if (!isContentBlockArray.value) {
+    return props.message.content || '';
+  }
+
+  // Extract text from blocks
+  return contentBlocks.value!
+    .filter(block => block.type === 'text')
+    .map(block => block.text)
+    .join('\n');
+});
+
+// Extract files from ContentBlock[]
+const contentFiles = computed(() => {
+  if (!isContentBlockArray.value) return null;
+
+  return contentBlocks.value!.filter(block => block.type === 'image' || block.type === 'file');
+});
+
+// Generate download URL with auth token
+function getDownloadUrl(path: string, name: string): string {
+	const token = getApiKey();
+	const base = `/api/hermes/download?path=${encodeURIComponent(path)}&name=${encodeURIComponent(name)}`;
+	return token ? `${base}&token=${encodeURIComponent(token)}` : base;
+}
+
 const toolExpanded = ref(false);
 const previewUrl = ref<string | null>(null);

@@ -156,11 +206,29 @@ function formatSize(bytes: number): string {
 */
 function getFilePathFromContent(attName: string): string | null {
  const content = props.message.content || "";
+
+  // Try ContentBlock[] format first
+  try {
+    const parsed = JSON.parse(content);
+    if (Array.isArray(parsed) && parsed.length > 0 && 'type' in parsed[0]) {
+      const fileBlock = parsed.find((block: any) =>
+        block.type === 'file' && block.name === attName
+      );
+      if (fileBlock && (fileBlock as any).path) {
+        return (fileBlock as any).path;
+      }
+    }
+  } catch {
+    // Not valid JSON, continue to regex matching
+  }
+
+  // Fallback to markdown format: [File: name](path)
  const regex = /\[File:\s*([^\]]+)\]\(([^)]+)\)/g;
  let match: RegExpExecArray | null;
  while ((match = regex.exec(content)) !== null) {
    if (match[1].trim() === attName.trim()) return match[2];
  }
+
  return null;
 }

@@ -517,10 +585,57 @@ onBeforeUnmount(() => {
              </div>
            </div>
            <MarkdownRenderer
-              v-if="parsedThinking.body"
+              v-if="parsedThinking.body && message.role === 'assistant'"
              :content="parsedThinking.body"
            />

+            <!-- Render user message content -->
+            <template v-if="message.role === 'user'">
+              <!-- ContentBlock[] format -->
+              <template v-if="isContentBlockArray">
+                <div v-if="contentFiles && contentFiles.length > 0" class="msg-attachments">
+                  <div
+                    v-for="(file, idx) in contentFiles"
+                    :key="idx"
+                    class="msg-attachment"
+                    :class="{ image: file.type === 'image' }"
+                  >
+                    <template v-if="file.type === 'image'">
+                      <img
+                        :src="getDownloadUrl(file.path, file.name)"
+                        :alt="file.name"
+                        class="msg-attachment-thumb"
+                        @click="previewUrl = getDownloadUrl(file.path, file.name)"
+                      />
+                    </template>
+                    <template v-else>
+                      <div
+                        class="msg-attachment-file"
+                        @click="downloadFile(file.path, file.name).catch(err => toast.error(err.message || t('download.downloadFailed')))"
+                        style="cursor: pointer;"
+                        :title="t('download.downloadFile')"
+                      >
+                        <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+                          <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z" />
+                          <polyline points="14 2 14 8 20 8" />
+                        </svg>
+                        <span class="att-name">{{ file.name }}</span>
+                      </div>
+                    </template>
+                  </div>
+                </div>
+                <MarkdownRenderer v-if="displayText" :content="displayText" />
+              </template>
+              <!-- Plain text format -->
+              <MarkdownRenderer v-else-if="message.content" :content="message.content" />
+            </template>
+
+            <!-- Render assistant message content -->
+            <MarkdownRenderer
+              v-if="message.role === 'assistant' && message.content && !parsedThinking.body"
+              :content="message.content"
+            />
+
            <span v-if="message.isStreaming && !message.content" class="streaming-dots">
              <span></span><span></span><span></span>
            </span>
@@ -5,6 +5,20 @@ export interface ChangelogEntry {
 }

 export const changelog: ChangelogEntry[] = [
+  {
+    version: '0.5.6',
+    date: '2026-05-02',
+    changes: [
+      'changelog.new_0_5_6_1',
+      'changelog.new_0_5_6_2',
+      'changelog.new_0_5_6_3',
+      'changelog.new_0_5_6_4',
+      'changelog.new_0_5_6_5',
+      'changelog.new_0_5_6_6',
+      'changelog.new_0_5_6_7',
+      'changelog.new_0_5_6_8',
+    ],
+  },
  {
    version: '0.5.5',
    date: '2026-05-01',
@@ -80,14 +94,4 @@ export const changelog: ChangelogEntry[] = [
    date: '2026-04-26',
    changes: ['changelog.new_0_4_8_1', 'changelog.new_0_4_8_2', 'changelog.new_0_4_8_3', 'changelog.new_0_4_8_4', 'changelog.new_0_4_8_5', 'changelog.new_0_4_8_7', 'changelog.new_0_4_8_8', 'changelog.new_0_4_8_9', 'changelog.new_0_4_8_10'],
  },
-  {
-    version: '0.4.7',
-    date: '2026-04-25',
-    changes: ['changelog.new_0_4_7_1', 'changelog.new_0_4_7_2', 'changelog.new_0_4_7_3', 'changelog.new_0_4_7_4', 'changelog.new_0_4_7_5', 'changelog.new_0_4_7_6', 'changelog.new_0_4_7_7', 'changelog.new_0_4_7_8', 'changelog.new_0_4_7_9', 'changelog.new_0_4_7_10', 'changelog.new_0_4_7_11'],
-  },
-  {
-    version: '0.4.5',
-    date: '2026-04-24',
-    changes: ['changelog.new_0_4_5_1', 'changelog.new_0_4_5_2', 'changelog.new_0_4_5_3', 'changelog.new_0_4_5_4', 'changelog.new_0_4_5_5', 'changelog.new_0_4_5_6', 'changelog.new_0_4_5_7', 'changelog.new_0_4_5_8'],
-  },
 ]
@@ -589,6 +589,14 @@ jobTriggered: 'Job ausgelost',
  // Anderungsprotokoll
  changelog: {

+    new_0_5_6_1: 'Sprachwiedergabe mit Web Speech API: manuelle Taste, Auto-Play-Schalter, Regenbogen-Randanimation und Mobile-Optimierung',
+    new_0_5_6_2: 'Robuster LLM-JSON-Parser mit Toleranz für Python-Format und Textextraktion aus Streaming-Events',
+    new_0_5_6_3: 'Skills-Verbesserungen: Nutzungsstatistiken, Quellfilterung, archivierte Skills, Herkunft und Pin-Toggle',
+    new_0_5_6_4: 'Erweiterte tägliche Nutzungsstatistiken mit detaillierter Token-Aufschlüsselung inkl. Cache-Lese-/Schreibtrennung',
+    new_0_5_6_5: 'Sitzungshistorium-Umfang geklärt mit verbesserten Beschreibungen in Chat- und Historienansichten',
+    new_0_5_6_6: 'Attachment-Verarbeitung neu gestaltet mit Anthropic-Stil ContentBlock-Array-Format (Text, Bild, Datei)',
+    new_0_5_6_7: 'Frontend-Dateidownload-Funktion für ContentBlock- und Markdown-Formate mit Authentifizierung hinzugefügt',
+    new_0_5_6_8: 'Multi-Prozess-Konflikt behoben, der SQLite-Database-Resets verursacht hat, durch Entfernen redundanter nodemon-Instanzen',
    new_0_5_5_1: '🎉 Tag der Arbeit! Heute wird nicht gearbeitet, bitte habt Verständnis',
    new_0_5_5_2: 'Verlaufsseite für Hermes-Sitzungshistorie hinzugefügt',
    new_0_5_5_3: 'Verlaufsseite verwaltet Sitzungen unabhängig ohne Störung des aktiven Chats',
@@ -759,6 +759,14 @@ export default {

  // Changelog
  changelog: {
+    new_0_5_6_1: 'Add voice playback feature with Web Speech API: manual button, auto-play toggle, rainbow border animation, and mobile optimization',
+    new_0_5_6_2: 'Add robust LLM JSON parser with tolerance for Python format and extract text from streaming events',
+    new_0_5_6_3: 'Add Skills enhancements: usage stats, source filtering, archived skills, provenance, and pin toggle',
+    new_0_5_6_4: 'Expand daily usage stats with detailed token breakdown including cache read/write separation',
+    new_0_5_6_5: 'Clarify session history scope with improved descriptions in chat and history views',
+    new_0_5_6_6: 'Redesigned attachment handling using Anthropic-style ContentBlock array format with type discriminated unions (text, image, file)',
+    new_0_5_6_7: 'Added frontend file download functionality supporting both ContentBlock and Markdown formats with authentication',
+    new_0_5_6_8: 'Fixed multi-process conflict causing SQLite database resets by eliminating redundant nodemon instances',
    new_0_5_5_1: '🎉 Happy Labor Day! No work this Labor Day, please bear with us if there are any issues',
    new_0_5_5_2: 'Add History page for browsing Hermes session history',
    new_0_5_5_3: 'History page manages session state independently without interfering with active chat',
@@ -589,6 +589,14 @@ jobTriggered: 'Job ejecutado',
  // Registro de cambios
  changelog: {

+    new_0_5_6_1: 'Reproducción de voz con Web Speech API: botón manual, interruptor auto-play, animación de borde arcoíris y optimización móvil',
+    new_0_5_6_2: 'Parser LLM JSON robusto con tolerancia a formato Python y extracción de texto de eventos streaming',
+    new_0_5_6_3: 'Mejoras de Skills: estadísticas de uso, filtrado de fuentes, skills archivados, procedencia y toggle de fijado',
+    new_0_5_6_4: 'Estadísticas diarias de uso expandidas con desglose detallado de tokens incluyendo separación de lectura/escritura de caché',
+    new_0_5_6_5: 'Ámbito del historial de sesiones aclarado con descripciones mejoradas en vistas de chat e historial',
+    new_0_5_6_6: 'Rediseñado el manejo de adjuntos usando formato de matriz ContentBlock estilo Anthropic (texto, imagen, archivo)',
+    new_0_5_6_7: 'Añadida funcionalidad de descarga de archivos en frontend soportando formatos ContentBlock y Markdown con autenticación',
+    new_0_5_6_8: 'Corregido conflicto de múltiples procesos que causaba reinicios de base de datos SQLite eliminando instancias nodemon redundantes',
    new_0_5_5_1: '🎉 ¡Feliz Día del Trabajo! Hoy no se trabaja, agradezcan su comprensión',
    new_0_5_5_2: 'Añadida página de historial para sesiones Hermes',
    new_0_5_5_3: 'La página de historial gestiona sesiones de forma independiente',
@@ -589,6 +589,14 @@ jobTriggered: 'Job declenche',
  // Journal des modifications
  changelog: {

+    new_0_5_6_1: 'Lecture vocale avec Web Speech API: bouton manuel, interrupteur auto-play, animation bordure arc-en-ciel et optimisation mobile',
+    new_0_5_6_2: 'Parseur JSON LLM robuste avec tolérance au format Python et extraction de texte des événements streaming',
+    new_0_5_6_3: 'Améliorations des Skills: statistiques d\'utilisation, filtrage par source, compétences archivées, provenance et toggle d\'épinglage',
+    new_0_5_6_4: 'Statistiques d\'utilisation quotidiennes étendues avec répartition détaillée des tokens incluant la séparation lecture/écriture du cache',
+    new_0_5_6_5: 'Portée de l\'historique des sessions clarifiée avec descriptions améliorées dans les vues chat et historique',
+    new_0_5_6_6: 'Repensé la gestion des pièces jointes en utilisant le format de tableau ContentBlock style Anthropic (texte, image, fichier)',
+    new_0_5_6_7: 'Ajouté la fonctionnalité de téléchargement de fichiers frontend supportant les formats ContentBlock et Markdown avec authentification',
+    new_0_5_6_8: 'Corrigé le conflit multi-processus causant des réinitialisations de base de données SQLite en éliminant les instances nodemon redondantes',
    new_0_5_5_1: '🎉 Joyeuse Fête du Travail! Pas de travail aujourd\'hui, merci de votre compréhension',
    new_0_5_5_2: 'Ajout d\'une page d\'historique pour les sessions Hermes',
    new_0_5_5_3: 'La page d\'historique gère les sessions de manière indépendante',
@@ -589,6 +589,14 @@ export default {
  // 更新履歴
  changelog: {

+    new_0_5_6_1: '音声再生機能：Web Speech API使用、手動再生ボタン、自動再生スイッチ、虹色ボーダーアニメーション、モバイル最適化',
+    new_0_5_6_2: '堅牢なLLM JSONパーサー：Python形式対応、ストリーミングイベントからのテキスト抽出',
+    new_0_5_6_3: 'Skills機能強化：使用統計、ソースフィルター、アーカイブ済みスキル、来歴、ピン留め切替',
+    new_0_5_6_4: '日次使用統計の拡張：キャッシュ読み取り/書き込み分離を含む詳細なトークン内訳',
+    new_0_5_6_5: 'セッション履歴範囲の明確化：チャットと履歴ビューの説明を改善',
+    new_0_5_6_6: 'AnthropicスタイルのContentBlock配列形式（テキスト、画像、ファイル）を使用して添付ファイル処理を再設計',
+    new_0_5_6_7: 'ContentBlockおよびMarkdown形式をサポートし、認証付きのフロントエンドファイルダウンロード機能を追加',
+    new_0_5_6_8: '重複するnodemonインスタンスを削除し、SQLiteデータベースのリセットを引き起こすマルチプロセス競合を修正',
    new_0_5_5_1: '🎉 労働者の日！今日はお休みです、何卒ご理解ください',
    new_0_5_5_2: 'Hermesセッション履歴ページを追加',
    new_0_5_5_3: '履歴ページはアクティブチャットに干渉せずにセッション管理',
@@ -589,6 +589,14 @@ export default {
  // 변경 이력
  changelog: {

+    new_0_5_6_1: '음성 재생 기능: Web Speech API, 수동 재생 버튼, 자동 재생 스위치, 무지개 테두리 애니메이션, 모바일 최적화',
+    new_0_5_6_2: '강력한 LLM JSON 파서: Python 형식 허용, 스트리밍 이벤트에서 텍스트 추출',
+    new_0_5_6_3: 'Skils 기능 향상: 사용 통계, 소스 필터링, 보관된 스킬, 출처, 고정 토글',
+    new_0_5_6_4: '확장된 일일 사용 통계: 캐시 읽기/쓰기 분리를 포함한 상세한 토큰 분석',
+    new_0_5_6_5: '세션 기록 범위 명확화: 채팅 및 기록 보기의 설명 개선',
+    new_0_5_6_6: 'Anthropic 스타일의 ContentBlock 배열 형식(텍스트, 이미지, 파일)을 사용하여 첨부파일 처리를 재설계',
+    new_0_5_6_7: '인증이 포함된 ContentBlock 및 Markdown 형식을 지원하는 프론트엔드 파일 다운로드 기능 추가',
+    new_0_5_6_8: '중복된 nodemon 인스턴스를 제거하여 SQLite 데이터베이스 재설정을 일으키는 다중 프로세스 충돌 수정',
    new_0_5_5_1: '🎉 노동절 감사합니다! 오늘은 쉬니까 양해 부탁드립니다',
    new_0_5_5_2: 'Hermes 세션 기록 페이지 추가',
    new_0_5_5_3: '기록 페이지는 독립적으로 세션 관리',
@@ -589,6 +589,14 @@ jobTriggered: 'Job acionado',
  // Registro de alteracoes
  changelog: {

+    new_0_5_6_1: 'Reprodução de voz: Web Speech API, botão manual, interruptor auto-play, animação de borda arco-íris e otimização móvel',
+    new_0_5_6_2: 'Parser JSON LLM robusto com tolerância ao formato Python e extração de texto de eventos streaming',
+    new_0_5_6_3: 'Melhorias de Skills: estatísticas de uso, filtragem de fonte, skills arquivados, procedência e toggle de fixação',
+    new_0_5_6_4: 'Estatísticas diárias de uso expandidas com breakdown detalhado de tokens incluindo separação de leitura/escrita de cache',
+    new_0_5_6_5: 'Escopo do histórico de sessões clarificado com descrições melhoradas nas visualizações de chat e histórico',
+    new_0_5_6_6: 'Processamento de anexos reprojetado usando formato de matriz ContentBlock estilo Anthropic (texto, imagem, arquivo)',
+    new_0_5_6_7: 'Adicionada funcionalidade de download de arquivos frontend suportando formatos ContentBlock e Markdown com autenticação',
+    new_0_5_6_8: 'Corrigido conflito de múltiplos processos que causava redefinições do banco de dados SQLite eliminando instâncias nodemon redundantes',
    new_0_5_5_1: '🎉 Feliz Dia do Trabalhador! Hoje não se trabalha, obrigado pela compreensão',
    new_0_5_5_2: 'Adicionada página de histórico para sessões Hermes',
    new_0_5_5_3: 'Página de histórico gerencia sessões de forma independente',
@@ -761,6 +761,14 @@ export default {

  // 更新日志
  changelog: {
+    new_0_5_6_1: '新增语音播放功能：使用 Web Speech API，支持手动播放按钮、自动播放开关、彩虹边框动画和移动端优化',
+    new_0_5_6_2: '新增健壮的 LLM JSON 解析器，兼容 Python 格式并从流式事件中提取文本',
+    new_0_5_6_3: 'Skills 功能增强：使用统计、来源过滤、归档技能、来源追溯和置顶切换',
+    new_0_5_6_4: '扩展每日使用统计，包含详细的 token 细分，分离缓存读/写统计',
+    new_0_5_6_5: '优化会话历史范围说明，改进聊天和历史视图的描述',
+    new_0_5_6_6: '重新设计附件处理，采用 Anthropic 风格的 ContentBlock 数组格式，支持类型区分（文本、图片、文件）',
+    new_0_5_6_7: '新增前端文件下载功能，支持 ContentBlock 和 Markdown 两种格式，带身份验证',
+    new_0_5_6_8: '修复多进程冲突导致的 SQLite 数据库重置问题，清理冗余 nodemon 进程',
    new_0_5_5_1: '🎉 五一劳动节快乐！这个劳动节就不劳动啦，如果有问题大家忍忍',
    new_0_5_5_2: '新增历史页面，用于浏览 Hermes 会话历史记录',
    new_0_5_5_3: '历史页面独立管理会话状态，不影响当前聊天页面的活动会话',
@@ -1,4 +1,4 @@
-import { startRunViaSocket, resumeSession, registerSessionHandlers, unregisterSessionHandlers, type RunEvent } from '@/api/hermes/chat'
+import { startRunViaSocket, resumeSession, registerSessionHandlers, unregisterSessionHandlers, type RunEvent, type ContentBlock as ContentBlockImport } from '@/api/hermes/chat'
 import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
 import { getApiKey } from '@/api/client'
 import { defineStore } from 'pinia'
@@ -7,6 +7,9 @@ import { useAppStore } from './app'
 import { useProfilesStore } from './profiles'
 import { detectThinkingBoundary } from '@/utils/thinking-parser'

+// Re-export ContentBlock for convenience
+export type ContentBlock = ContentBlockImport
+
 export interface Attachment {
  id: string
  name: string
@@ -74,6 +77,47 @@ async function uploadFiles(attachments: Attachment[]): Promise<{ name: string; p
  return data.files
 }

+async function buildContentBlocks(
+  content: string,
+  attachments?: Attachment[],
+  uploadedFiles?: { name: string; path: string }[]
+): Promise<ContentBlock[]> {
+  const blocks: ContentBlock[] = []
+
+  // Add text block if content is not empty
+  if (content.trim()) {
+    blocks.push({ type: 'text', text: content.trim() })
+  }
+
+  // Add attachment blocks using uploaded file paths
+  if (attachments && attachments.length > 0 && uploadedFiles) {
+    for (let i = 0; i < uploadedFiles.length; i++) {
+      const uploaded = uploadedFiles[i]
+      const attachment = attachments[i]
+
+      // Check if it's an image
+      if (attachment?.type.startsWith('image/')) {
+        blocks.push({
+          type: 'image',
+          name: uploaded.name,
+          path: uploaded.path,
+          media_type: attachment.type,
+        })
+      } else {
+        // Other files
+        blocks.push({
+          type: 'file',
+          name: uploaded.name,
+          path: uploaded.path,
+          media_type: attachment?.type,
+        })
+      }
+    }
+  }
+
+  return blocks
+}
+
 function mapHermesMessages(msgs: HermesMessage[]): Message[] {
  // Build lookups from assistant messages with tool_calls
  const toolNameMap = new Map<string, string>()
@@ -608,11 +652,13 @@ export const useChatStore = defineStore('chat', () => {

    try {

-      // Upload attachments and build input with file paths
-      let inputText = content.trim()
+      // Build input in Anthropic format
+      let input: string | ContentBlock[]
      if (attachments && attachments.length > 0) {
+        // Has attachments: upload first, then build content blocks
        const uploaded = await uploadFiles(attachments)
-        // Replace blob URLs with persistent download URLs on the user message
+
+        // Update attachment URLs on the user message for display
        const token = getApiKey()
        const urlMap = new Map(uploaded.map(f => {
          const base = `/api/hermes/download?path=${encodeURIComponent(f.path)}&name=${encodeURIComponent(f.name)}`
@@ -626,14 +672,18 @@ export const useChatStore = defineStore('chat', () => {
            return dl ? { ...a, url: dl } : a
          })
        }
-        const pathParts = uploaded.map(f => `[File: ${f.name}](${urlMap.get(f.name)})`)
-        inputText = inputText ? inputText + '\n\n' + pathParts.join('\n') : pathParts.join('\n')
+
+        // Build content blocks with uploaded file paths
+        input = await buildContentBlocks(content, attachments, uploaded)
+      } else {
+        // No attachments: use plain text format
+        input = content.trim()
      }

      const appStore = useAppStore()
      const sessionModel = activeSession.value?.model || appStore.selectedModel
      const runPayload = {
-        input: inputText,
+        input,
        session_id: sid,
        model: sessionModel || undefined,
      }
@@ -29,6 +29,65 @@ import { getCompressionSnapshot } from '../../db/hermes/compression-snapshot'
 import { parseLLMJSON, parseToolArguments, parseAnthropicContentArray } from '../../lib/llm-json'
 import { logger } from '../logger'

+/**
+ * Content block types for Anthropic-compatible message format
+ */
+export type ContentBlock =
+  | { type: 'text'; text: string }
+  | { type: 'image'; name: string; path: string; media_type: string }
+  | { type: 'file'; name: string; path: string; media_type?: string }
+
+/**
+ * Convert ContentBlock[] to string for display/storage
+ * - string → 直接返回
+ * - ContentBlock[] → 返回 JSON 字符串
+ */
+function contentBlocksToString(input: string | ContentBlock[]): string {
+  if (typeof input === 'string') return input
+  return JSON.stringify(input)
+}
+
+/**
+ * Extract text content from ContentBlock[] for title preview
+ */
+function extractTextForPreview(input: string | ContentBlock[]): string {
+  if (typeof input === 'string') return input
+
+  return input
+    .filter(block => block.type === 'text')
+    .map(block => block.text)
+    .join('\n')
+}
+
+/**
+ * Check if input is ContentBlock array
+ */
+function isContentBlockArray(input: any): input is ContentBlock[] {
+  return Array.isArray(input) && input.length > 0 && ('type' in input[0])
+}
+
+/**
+ * Convert file/image blocks with path to base64 format for upstream API
+ *
+ * Converts images to base64 data URLs for Anthropic/OpenAI API compatibility.
+ * File attachments are converted to text mentions.
+ */
+async function convertContentBlocks(blocks: ContentBlock[]): Promise<string> {
+  let contentStr = ''
+
+  for (const block of blocks) {
+    if (block.type === 'text') {
+      contentStr += block.text
+    } else if (block.type === 'image') {
+      contentStr += `[Image: ${block.path}]`
+    } else if (block.type === 'file') {
+      contentStr += `[File: ${block.path}]`
+    }
+  }
+
+  return contentStr
+}
+
 const compressor = new ChatContextCompressor()

 // --- Helper: Convert OpenAI format to Anthropic format ---
@@ -439,7 +498,7 @@ export class ChatRunSocket {

  private async handleRun(
    socket: Socket,
-    data: { input: string; session_id?: string; model?: string; instructions?: string },
+    data: { input: string | ContentBlock[]; session_id?: string; model?: string; instructions?: string },
    profile: string,
  ) {
    const { input, session_id, model, instructions } = data
@@ -452,24 +511,27 @@ export class ChatRunSocket {
      : undefined

    const now = Math.floor(Date.now() / 1000)
-
    // Mark working immediately on run start, and append user message
    if (session_id) {
      const state = this.getOrCreateSession(session_id)
      this.hermesSessionIds.set(session_id, hermesSessionId)
      state.isWorking = true
      state.profile = profile
+
+      // Convert ContentBlock[] to string for storage
+      const inputStr = contentBlocksToString(input)
      state.messages.push({
        id: state.messages.length + 1,
        session_id,
        role: 'user',
-        content: input,
+        content: inputStr,
        timestamp: now,
      })

      // Create session in local DB if it doesn't exist
      if (!getSession(session_id)) {
-        const preview = input.replace(/[\r\n]/g, ' ').substring(0, 100)
+        const previewText = extractTextForPreview(input)
+        const preview = previewText.replace(/[\r\n]/g, ' ').substring(0, 100)
        createSession({ id: session_id, profile, model, title: preview })
      }

@@ -477,7 +539,7 @@ export class ChatRunSocket {
      addMessage({
        session_id,
        role: 'user',
-        content: input,
+        content: inputStr,
        timestamp: now,
      })

@@ -808,14 +870,16 @@ export class ChatRunSocket {

      const headers: Record<string, string> = { 'Content-Type': 'application/json' }
      if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
+      // Convert input from ContentBlock[] to Anthropic format (with base64 images)
+      if (isContentBlockArray(input)) {
+        body.input = await convertContentBlocks(input)
+      }

      // Debug: write history to JSON file for analysis (before conversion)

      // Convert conversation_history from OpenAI format to Anthropic format
      if (body.conversation_history && Array.isArray(body.conversation_history)) {
        body.conversation_history = convertToAnthropicFormat(body.conversation_history)
-        logger.info('[chat-run-socket] converted conversation_history to Anthropic format for session %s: %d messages, content: %s',
-          session_id || '(new)', body.conversation_history.length, JSON.stringify(body.conversation_history, null, 2))
      }
      const res = await fetch(`${upstream}/v1/runs`, {
        method: 'POST',
@@ -823,7 +887,6 @@ export class ChatRunSocket {
        body: JSON.stringify(body),
        signal: AbortSignal.timeout(120_000),
      })
-
      if (!res.ok) {
        const text = await res.text().catch(() => '')
        emit('run.failed', { event: 'run.failed', error: `Upstream ${res.status}: ${text}` })