mirror of
https://github.com/EKKOLearnAI/hermes-web-ui.git
synced 2026-05-27 14:30:18 +00:00
feat(chat): replace HTTP+SSE with Socket.IO for chat runs and add context compression
- Replace HTTP POST + SSE streaming with Socket.IO /chat-run namespace for decoupled message handling that survives client disconnect/refresh - Add SQLite-backed context compression with snapshot-based incremental updates - Unify server-side session state tracking (completedSessions + compressingSessions → sessionStates) for reliable state replay on reconnect - Filter compress_ sessions from session list queries - Add compression snapshot store with proper snake_case→camelCase column aliases - Delete temporary compress_ sessions after compression completes - Change compressed summary role from 'system' to 'user' - Add compression.started/completed events to frontend chat store Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+3
-2
@@ -62,15 +62,16 @@
|
||||
],
|
||||
"dependencies": {
|
||||
"eventsource": "^4.1.0",
|
||||
"js-tiktoken": "^1.0.21",
|
||||
"node-pty": "^1.1.0",
|
||||
"socket.io": "^4.8.3",
|
||||
"socket.io-client": "^4.8.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@multiavatar/multiavatar": "^1.0.7",
|
||||
"@koa/bodyparser": "^5.0.0",
|
||||
"@koa/cors": "^5.0.0",
|
||||
"@koa/router": "^15.4.0",
|
||||
"@multiavatar/multiavatar": "^1.0.7",
|
||||
"@pinia/testing": "^1.0.3",
|
||||
"@types/eventsource": "^1.1.15",
|
||||
"@types/js-yaml": "^4.0.9",
|
||||
@@ -117,4 +118,4 @@
|
||||
"vue-tsc": "^3.2.6",
|
||||
"ws": "^8.20.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { io, type Socket } from 'socket.io-client'
|
||||
import { request, getBaseUrlValue, getApiKey } from '../client'
|
||||
|
||||
export interface ChatMessage {
|
||||
@@ -8,7 +9,6 @@ export interface ChatMessage {
|
||||
export interface StartRunRequest {
|
||||
input: string | ChatMessage[]
|
||||
instructions?: string
|
||||
conversation_history?: ChatMessage[]
|
||||
session_id?: string
|
||||
model?: string
|
||||
}
|
||||
@@ -38,70 +38,134 @@ export interface RunEvent {
|
||||
output_tokens: number
|
||||
total_tokens: number
|
||||
}
|
||||
/** session_id tag added by server for client-side filtering */
|
||||
session_id?: string
|
||||
}
|
||||
|
||||
export async function startRun(body: StartRunRequest): Promise<StartRunResponse> {
|
||||
const headers: Record<string, string> = {}
|
||||
if (body.session_id) {
|
||||
headers['X-Hermes-Session-Id'] = body.session_id
|
||||
// ============================
|
||||
// Socket.IO chat run connection
|
||||
// ============================
|
||||
|
||||
let chatRunSocket: Socket | null = null
|
||||
|
||||
export function getChatRunSocket(): Socket | null {
|
||||
return chatRunSocket
|
||||
}
|
||||
|
||||
export function connectChatRun(): Socket {
|
||||
if (chatRunSocket?.connected) return chatRunSocket
|
||||
|
||||
// Clean up old socket to prevent duplicate event listeners
|
||||
if (chatRunSocket) {
|
||||
chatRunSocket.removeAllListeners()
|
||||
chatRunSocket.disconnect()
|
||||
}
|
||||
return request<StartRunResponse>('/api/hermes/v1/runs', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(body),
|
||||
headers,
|
||||
|
||||
const baseUrl = getBaseUrlValue()
|
||||
const token = getApiKey()
|
||||
const profile = localStorage.getItem('hermes_active_profile_name') || 'default'
|
||||
|
||||
chatRunSocket = io(`${baseUrl}/chat-run`, {
|
||||
auth: { token },
|
||||
query: { profile },
|
||||
transports: ['websocket', 'polling'],
|
||||
reconnection: true,
|
||||
reconnectionAttempts: Infinity,
|
||||
reconnectionDelay: 1000,
|
||||
reconnectionDelayMax: 10000,
|
||||
})
|
||||
|
||||
return chatRunSocket
|
||||
}
|
||||
|
||||
export function streamRunEvents(
|
||||
runId: string,
|
||||
export function disconnectChatRun(): void {
|
||||
if (chatRunSocket) {
|
||||
chatRunSocket.disconnect()
|
||||
chatRunSocket = null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a chat run via Socket.IO and stream events back.
|
||||
* Returns an AbortController-compatible handle for cancellation.
|
||||
*/
|
||||
export function startRunViaSocket(
|
||||
body: StartRunRequest,
|
||||
onEvent: (event: RunEvent) => void,
|
||||
onDone: () => void,
|
||||
onError: (err: Error) => void,
|
||||
) {
|
||||
const baseUrl = getBaseUrlValue()
|
||||
const token = getApiKey()
|
||||
const profile = localStorage.getItem('hermes_active_profile_name')
|
||||
const params = new URLSearchParams()
|
||||
if (token) params.set('token', token)
|
||||
if (profile && profile !== 'default') params.set('profile', profile)
|
||||
const qs = params.toString()
|
||||
const url = `${baseUrl}/api/hermes/v1/runs/${runId}/events${qs ? `?${qs}` : ''}`
|
||||
|
||||
onStarted?: (runId: string) => void,
|
||||
): { abort: () => void } {
|
||||
const socket = connectChatRun()
|
||||
let closed = false
|
||||
const source = new EventSource(url)
|
||||
|
||||
source.onmessage = (e) => {
|
||||
function cleanup() {
|
||||
if (closed) return
|
||||
try {
|
||||
const parsed = JSON.parse(e.data)
|
||||
onEvent(parsed)
|
||||
closed = true
|
||||
socket.off('run.started', onRunStarted)
|
||||
socket.off('run.failed', onRunFailed)
|
||||
socket.off('message.delta', onMessageDelta)
|
||||
socket.off('reasoning.delta', onReasoningDelta)
|
||||
socket.off('thinking.delta', onReasoningDelta)
|
||||
socket.off('reasoning.available', onReasoningAvailable)
|
||||
socket.off('tool.started', onToolStarted)
|
||||
socket.off('tool.completed', onToolCompleted)
|
||||
socket.off('run.completed', onRunCompleted)
|
||||
socket.off('compression.started', onCompressionStarted)
|
||||
socket.off('compression.completed', onCompressionCompleted)
|
||||
}
|
||||
|
||||
if (parsed.event === 'run.completed' || parsed.event === 'run.failed') {
|
||||
closed = true
|
||||
source.close()
|
||||
onDone()
|
||||
}
|
||||
} catch {
|
||||
onEvent({ event: 'message', delta: e.data })
|
||||
// All event handlers share the same cleanup logic
|
||||
const handleEvent = (event: RunEvent) => {
|
||||
if (closed) return
|
||||
onEvent(event)
|
||||
if (event.event === 'run.completed' || event.event === 'run.failed') {
|
||||
cleanup()
|
||||
onDone()
|
||||
}
|
||||
}
|
||||
|
||||
source.onerror = () => {
|
||||
if (closed) return
|
||||
closed = true
|
||||
source.close()
|
||||
onError(new Error('SSE connection error'))
|
||||
function onRunStarted(data: RunEvent) {
|
||||
handleEvent(data)
|
||||
onStarted?.(data.run_id || '')
|
||||
}
|
||||
function onRunFailed(data: RunEvent) {
|
||||
handleEvent(data)
|
||||
onError?.(new Error(data.error || 'Run failed'))
|
||||
}
|
||||
function onMessageDelta(data: RunEvent) { handleEvent(data) }
|
||||
function onReasoningDelta(data: RunEvent) { handleEvent(data) }
|
||||
function onThinkingDelta(data: RunEvent) { handleEvent(data) }
|
||||
function onReasoningAvailable(data: RunEvent) { handleEvent(data) }
|
||||
function onToolStarted(data: RunEvent) { handleEvent(data) }
|
||||
function onToolCompleted(data: RunEvent) { handleEvent(data) }
|
||||
function onRunCompleted(data: RunEvent) { handleEvent(data) }
|
||||
function onCompressionStarted(data: RunEvent) { handleEvent(data) }
|
||||
function onCompressionCompleted(data: RunEvent) { handleEvent(data) }
|
||||
|
||||
socket.on('run.started', onRunStarted)
|
||||
socket.on('run.failed', onRunFailed)
|
||||
socket.on('message.delta', onMessageDelta)
|
||||
socket.on('reasoning.delta', onReasoningDelta)
|
||||
socket.on('thinking.delta', onThinkingDelta)
|
||||
socket.on('reasoning.available', onReasoningAvailable)
|
||||
socket.on('tool.started', onToolStarted)
|
||||
socket.on('tool.completed', onToolCompleted)
|
||||
socket.on('run.completed', onRunCompleted)
|
||||
socket.on('compression.started', onCompressionStarted)
|
||||
socket.on('compression.completed', onCompressionCompleted)
|
||||
|
||||
// Emit run:start with ack callback to get run_id
|
||||
socket.emit('run', body)
|
||||
|
||||
// Return AbortController-compatible object
|
||||
return {
|
||||
abort: () => {
|
||||
if (!closed) {
|
||||
closed = true
|
||||
source.close()
|
||||
socket.emit('abort', { session_id: body.session_id })
|
||||
cleanup()
|
||||
}
|
||||
},
|
||||
} as unknown as AbortController
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchModels(): Promise<{ data: Array<{ id: string }> }> {
|
||||
|
||||
@@ -12,6 +12,12 @@ const { t } = useI18n();
|
||||
const { isDark } = useTheme();
|
||||
const listRef = ref<HTMLElement>();
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M'
|
||||
if (n >= 1_000) return (n / 1_000).toFixed(1) + 'K'
|
||||
return String(n)
|
||||
}
|
||||
|
||||
const displayMessages = computed(() =>
|
||||
chatStore.messages.filter((m) => m.role !== "tool"),
|
||||
);
|
||||
@@ -128,7 +134,48 @@ watch(currentToolCalls, () => {
|
||||
playsinline
|
||||
class="thinking-video"
|
||||
/>
|
||||
<div v-if="currentToolCalls.length > 0" class="tool-calls-panel">
|
||||
<div v-if="currentToolCalls.length > 0 || chatStore.compressionState" class="tool-calls-panel">
|
||||
<!-- Compression indicator -->
|
||||
<div v-if="chatStore.compressionState" class="tool-call-item compression-item">
|
||||
<svg
|
||||
v-if="chatStore.compressionState.compressing"
|
||||
width="12"
|
||||
height="12"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="1.5"
|
||||
class="tool-call-icon"
|
||||
>
|
||||
<path d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15" />
|
||||
</svg>
|
||||
<svg
|
||||
v-else-if="chatStore.compressionState.compressed"
|
||||
width="12"
|
||||
height="12"
|
||||
viewBox="0 0 24 24"
|
||||
fill="none"
|
||||
stroke="currentColor"
|
||||
stroke-width="1.5"
|
||||
class="tool-call-icon"
|
||||
>
|
||||
<path d="M5 13l4 4L19 7" />
|
||||
</svg>
|
||||
<span class="tool-call-name">
|
||||
{{
|
||||
chatStore.compressionState.compressing
|
||||
? `Compressing... (${chatStore.compressionState.messageCount} msgs, ~${formatTokens(chatStore.compressionState.beforeTokens)} tokens)`
|
||||
: chatStore.compressionState.compressed
|
||||
? `Compressed ${chatStore.compressionState.messageCount} msgs: ~${formatTokens(chatStore.compressionState.beforeTokens)} → ~${formatTokens(chatStore.compressionState.afterTokens)} tokens`
|
||||
: `Compression skipped`
|
||||
}}
|
||||
</span>
|
||||
<span
|
||||
v-if="chatStore.compressionState.compressing"
|
||||
class="tool-call-spinner"
|
||||
></span>
|
||||
</div>
|
||||
<!-- Tool calls -->
|
||||
<div
|
||||
v-for="tc in currentToolCalls"
|
||||
:key="tc.id"
|
||||
@@ -253,6 +300,11 @@ watch(currentToolCalls, () => {
|
||||
background: rgba(255, 255, 255, 0.06);
|
||||
}
|
||||
|
||||
&.compression-item {
|
||||
color: $text-muted;
|
||||
font-size: 10px;
|
||||
}
|
||||
|
||||
.tool-call-icon {
|
||||
flex-shrink: 0;
|
||||
color: $text-muted;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { startRun, streamRunEvents, type ChatMessage, type RunEvent } from '@/api/hermes/chat'
|
||||
import { startRunViaSocket, connectChatRun, type RunEvent } from '@/api/hermes/chat'
|
||||
import { deleteSession as deleteSessionApi, fetchSession, fetchSessions, fetchSessionUsageSingle, type HermesMessage, type SessionSummary } from '@/api/hermes/sessions'
|
||||
import { getApiKey } from '@/api/client'
|
||||
import { defineStore } from 'pinia'
|
||||
@@ -170,18 +170,10 @@ function mapHermesSession(s: SessionSummary): Session {
|
||||
}
|
||||
}
|
||||
|
||||
// Cache keys for stale-while-revalidate loading of sessions / messages.
|
||||
// All keys include the active profile name to isolate cache between profiles.
|
||||
// Rendering from cache on boot avoids the multi-round-trip wait the user sees
|
||||
// every time they open the page (esp. noticeable on mobile).
|
||||
const STORAGE_KEY_PREFIX = 'hermes_active_session_'
|
||||
const SESSIONS_CACHE_KEY_PREFIX = 'hermes_sessions_cache_v1_'
|
||||
const LEGACY_STORAGE_KEY = 'hermes_active_session'
|
||||
const LEGACY_SESSIONS_CACHE_KEY = 'hermes_sessions_cache_v1'
|
||||
const IN_FLIGHT_TTL_MS = 15 * 60 * 1000 // Give up after 15 minutes
|
||||
const POLL_INTERVAL_MS = 2000
|
||||
const POLL_STABLE_EXITS = 3 // 3 × 2s = 6s of no change → assume run finished
|
||||
const LIVE_BADGE_WINDOW_MS = 5 * 60 * 1000
|
||||
const IN_FLIGHT_TTL_MS = 15 * 60 * 1000 // Give up after 15 minutes
|
||||
|
||||
// 获取当前 profile 名称,用于隔离缓存。
|
||||
// 从 profiles store 的 activeProfileName(同步 localStorage)读取,
|
||||
@@ -196,11 +188,9 @@ function getProfileName(): string {
|
||||
|
||||
function storageKey(): string { return STORAGE_KEY_PREFIX + getProfileName() }
|
||||
function sessionsCacheKey(): string { return SESSIONS_CACHE_KEY_PREFIX + getProfileName() }
|
||||
function msgsCacheKey(sid: string): string { return `hermes_session_msgs_v1_${getProfileName()}_${sid}_` }
|
||||
function inFlightKey(sid: string): string { return `hermes_in_flight_v1_${getProfileName()}_${sid}` }
|
||||
function legacyStorageKey(): string | null { return getProfileName() === 'default' ? LEGACY_STORAGE_KEY : null }
|
||||
function legacySessionsCacheKey(): string | null { return getProfileName() === 'default' ? LEGACY_SESSIONS_CACHE_KEY : null }
|
||||
function legacyMsgsCacheKey(sid: string): string | null { return getProfileName() === 'default' ? `hermes_session_msgs_v1_${sid}` : null }
|
||||
function inFlightKey(sid: string): string { return `hermes_in_flight_v1_${getProfileName()}_${sid}` }
|
||||
function legacyInFlightKey(sid: string): string | null { return getProfileName() === 'default' ? `hermes_in_flight_v1_${sid}` : null }
|
||||
|
||||
interface InFlightRun {
|
||||
@@ -303,61 +293,37 @@ function removeItemWithLegacy(key: string, legacyKey?: string | null) {
|
||||
|
||||
// Strip the circular `file: File` reference from attachments before caching —
|
||||
// File objects don't serialize and we only need name/type/size/url for display.
|
||||
function sanitizeForCache(msgs: Message[]): Message[] {
|
||||
return msgs.map(m => {
|
||||
if (!m.attachments?.length) return m
|
||||
return {
|
||||
...m,
|
||||
attachments: m.attachments.map(a => ({ id: a.id, name: a.name, type: a.type, size: a.size, url: a.url })),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Heals assistant messages whose `reasoning` field was polluted by the
|
||||
// old bug where `reasoning.available` clobbered it with the assistant
|
||||
// content. Detection heuristic: reasoning is a prefix of content (the
|
||||
// bug always derived `reasoning` from `content[:500]` with tags stripped).
|
||||
// Legitimate reasoning is almost never a prefix of the final answer.
|
||||
function scrubBuggyReasoningInCache(msgs: Message[] | null | undefined): Message[] {
|
||||
if (!msgs) return []
|
||||
return msgs.map(m => {
|
||||
if (m.role !== 'assistant' || !m.reasoning || !m.content) return m
|
||||
const r = m.reasoning.trim()
|
||||
const c = m.content.trim()
|
||||
if (!r || !c) return m
|
||||
if (c === r || c.startsWith(r)) {
|
||||
const { reasoning: _drop, ...rest } = m
|
||||
return rest as Message
|
||||
}
|
||||
return m
|
||||
})
|
||||
}
|
||||
|
||||
export const useChatStore = defineStore('chat', () => {
|
||||
const sessions = ref<Session[]>([])
|
||||
const activeSessionId = ref<string | null>(null)
|
||||
const focusMessageId = ref<string | null>(null)
|
||||
const streamStates = ref<Map<string, AbortController>>(new Map())
|
||||
const streamStates = ref<Map<string, { abort: () => void }>>(new Map())
|
||||
const isStreaming = computed(() => activeSessionId.value != null && streamStates.value.has(activeSessionId.value))
|
||||
const isLoadingSessions = ref(false)
|
||||
const sessionsLoaded = ref(false)
|
||||
const isLoadingMessages = ref(false)
|
||||
// tmux-like resume state: true when we recovered an in-flight run from
|
||||
// localStorage after a refresh and are polling fetchSession for progress.
|
||||
// UI shows the thinking indicator while this is set.
|
||||
const resumingRuns = ref<Set<string>>(new Set())
|
||||
const isRunActive = computed(() =>
|
||||
isStreaming.value
|
||||
|| (activeSessionId.value != null && resumingRuns.value.has(activeSessionId.value))
|
||||
)
|
||||
const pollTimers = new Map<string, ReturnType<typeof setInterval>>()
|
||||
const pollSignatures = new Map<string, { sig: string, stableTicks: number }>()
|
||||
const isRunActive = computed(() => isStreaming.value)
|
||||
|
||||
// Compression state
|
||||
const compressionState = ref<{
|
||||
compressing: boolean
|
||||
messageCount: number
|
||||
beforeTokens: number
|
||||
afterTokens: number
|
||||
compressed: boolean | null
|
||||
error?: string
|
||||
} | null>(null)
|
||||
|
||||
function setCompressionState(state: typeof compressionState.value) {
|
||||
compressionState.value = state
|
||||
}
|
||||
|
||||
const activeSession = ref<Session | null>(null)
|
||||
const messages = computed<Message[]>(() => activeSession.value?.messages || [])
|
||||
|
||||
function isSessionLive(sessionId: string): boolean {
|
||||
if (streamStates.value.has(sessionId) || resumingRuns.value.has(sessionId)) return true
|
||||
if (streamStates.value.has(sessionId)) return true
|
||||
|
||||
const session = sessions.value.find(candidate => candidate.id === sessionId)
|
||||
if (!session?.lastActiveAt || session.endedAt != null) return false
|
||||
@@ -365,7 +331,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
|
||||
function persistSessionsList() {
|
||||
// Cache lightweight summaries only (messages are cached per-session).
|
||||
saveJsonWithLegacy(
|
||||
sessionsCacheKey(),
|
||||
sessions.value.map(s => ({ ...s, messages: [] })),
|
||||
@@ -373,13 +338,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
)
|
||||
}
|
||||
|
||||
function persistActiveMessages() {
|
||||
const sid = activeSessionId.value
|
||||
if (!sid) return
|
||||
const s = sessions.value.find(sess => sess.id === sid)
|
||||
if (s) saveJsonWithLegacy(msgsCacheKey(sid), sanitizeForCache(s.messages), legacyMsgsCacheKey(sid))
|
||||
}
|
||||
|
||||
function markInFlight(sid: string, runId: string) {
|
||||
saveJsonWithLegacy(inFlightKey(sid), { runId, startedAt: Date.now() } as InFlightRun, legacyInFlightKey(sid))
|
||||
}
|
||||
@@ -398,122 +356,10 @@ export const useChatStore = defineStore('chat', () => {
|
||||
return rec
|
||||
}
|
||||
|
||||
function compareServerMessages(local: Message[], server: Message[]) {
|
||||
const localUserIndexes = local.map((m, i) => (m.role === 'user' ? i : -1)).filter(i => i >= 0)
|
||||
const serverUserIndexes = server.map((m, i) => (m.role === 'user' ? i : -1)).filter(i => i >= 0)
|
||||
const localUsers = localUserIndexes.length
|
||||
const serverUsers = serverUserIndexes.length
|
||||
|
||||
if (serverUsers > localUsers) return { serverIsCaughtUp: true, serverIsAhead: true }
|
||||
if (serverUsers < localUsers) return { serverIsCaughtUp: false, serverIsAhead: false }
|
||||
|
||||
const localLastUserIndex = localUserIndexes[localUserIndexes.length - 1] ?? -1
|
||||
const serverLastUserIndex = serverUserIndexes[serverUserIndexes.length - 1] ?? -1
|
||||
const sameCurrentTurn =
|
||||
localLastUserIndex < 0
|
||||
|| serverLastUserIndex < 0
|
||||
|| local[localLastUserIndex]?.content === server[serverLastUserIndex]?.content
|
||||
|
||||
if (!sameCurrentTurn) return { serverIsCaughtUp: false, serverIsAhead: false }
|
||||
|
||||
const localCurrentAssistantLen = local
|
||||
.slice(localLastUserIndex + 1)
|
||||
.filter(m => m.role === 'assistant')
|
||||
.reduce((total, m) => total + (m.content?.length || 0), 0)
|
||||
const serverCurrentAssistantLen = server
|
||||
.slice(serverLastUserIndex + 1)
|
||||
.filter(m => m.role === 'assistant')
|
||||
.reduce((total, m) => total + (m.content?.length || 0), 0)
|
||||
|
||||
return {
|
||||
serverIsCaughtUp: true,
|
||||
serverIsAhead: serverCurrentAssistantLen >= localCurrentAssistantLen,
|
||||
}
|
||||
}
|
||||
|
||||
function stopPolling(sid: string) {
|
||||
const t = pollTimers.get(sid)
|
||||
if (t) {
|
||||
clearInterval(t)
|
||||
pollTimers.delete(sid)
|
||||
}
|
||||
pollSignatures.delete(sid)
|
||||
resumingRuns.value = new Set([...resumingRuns.value].filter(x => x !== sid))
|
||||
}
|
||||
|
||||
// Poll fetchSession while an in-flight run is recovering. Exits when the
|
||||
// server's message signature is stable for POLL_STABLE_EXITS ticks (run
|
||||
// presumed done), TTL elapses, or the user explicitly starts streaming.
|
||||
function startPolling(sid: string) {
|
||||
if (pollTimers.has(sid)) return
|
||||
resumingRuns.value = new Set([...resumingRuns.value, sid])
|
||||
const timer = setInterval(async () => {
|
||||
// If a fresh SSE stream started for this session, polling is redundant.
|
||||
if (streamStates.value.has(sid)) {
|
||||
stopPolling(sid)
|
||||
return
|
||||
}
|
||||
const inFlight = readInFlight(sid)
|
||||
if (!inFlight) {
|
||||
stopPolling(sid)
|
||||
return
|
||||
}
|
||||
try {
|
||||
const detail = await fetchSession(sid)
|
||||
if (!detail) return
|
||||
const mapped = mapHermesMessages(detail.messages || [])
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (!target) return
|
||||
// Use the same current-turn comparison as switchSession: server is
|
||||
// ahead only when it has a newer user turn or the assistant output
|
||||
// after the current user turn has caught up.
|
||||
const local = target.messages
|
||||
const { serverIsAhead, serverIsCaughtUp } = compareServerMessages(local, mapped)
|
||||
if (serverIsAhead) {
|
||||
target.messages = mapped
|
||||
if (detail.title && !target.title) target.title = detail.title
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
}
|
||||
// Stability detection ONLY matters when the server has at least as
|
||||
// many user turns as we do. Otherwise the server is still catching
|
||||
// up (e.g. the new turn we just sent hasn't been flushed server-side
|
||||
// yet) and a "stable" signature is a false positive — the stability
|
||||
// is the server NOT having our latest turn, not the run being done.
|
||||
if (!serverIsCaughtUp) {
|
||||
pollSignatures.delete(sid)
|
||||
} else {
|
||||
const last = mapped[mapped.length - 1]
|
||||
const sig = `${mapped.length}|${last?.content?.slice(-40) || ''}|${last?.toolStatus || ''}`
|
||||
const prev = pollSignatures.get(sid)
|
||||
if (prev && prev.sig === sig) {
|
||||
prev.stableTicks += 1
|
||||
if (prev.stableTicks >= POLL_STABLE_EXITS) {
|
||||
// The server view has stopped changing. If it is still behind
|
||||
// the locally streamed assistant reply, end recovery without
|
||||
// retreating local state; otherwise commit the server view.
|
||||
if (serverIsAhead) {
|
||||
target.messages = mapped
|
||||
if (detail.title) target.title = detail.title
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
}
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
}
|
||||
} else {
|
||||
pollSignatures.set(sid, { sig, stableTicks: 0 })
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// transient network error — ignore, next tick tries again
|
||||
}
|
||||
}, POLL_INTERVAL_MS)
|
||||
pollTimers.set(sid, timer)
|
||||
}
|
||||
|
||||
async function loadSessions() {
|
||||
isLoadingSessions.value = true
|
||||
try {
|
||||
// 从 profile 对应的缓存中恢复,实现 instant render
|
||||
// Restore sessions list from cache (lightweight, no messages)
|
||||
const cachedSessions = loadJsonWithFallback<Session[]>(sessionsCacheKey(), legacySessionsCacheKey())
|
||||
if (cachedSessions?.length) {
|
||||
sessions.value = cachedSessions
|
||||
@@ -521,8 +367,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (savedId) {
|
||||
const cachedActive = cachedSessions.find(s => s.id === savedId) || null
|
||||
if (cachedActive) {
|
||||
const cachedMsgs = loadJsonWithFallback<Message[]>(msgsCacheKey(savedId), legacyMsgsCacheKey(savedId))
|
||||
if (cachedMsgs) cachedActive.messages = scrubBuggyReasoningInCache(cachedMsgs)
|
||||
activeSession.value = cachedActive
|
||||
activeSessionId.value = savedId
|
||||
}
|
||||
@@ -548,8 +392,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
const localOnly = sessions.value.filter(s => {
|
||||
if (freshIds.has(s.id)) return false
|
||||
if (readInFlight(s.id)) return true
|
||||
// Session no longer exists on server and no active run — clean up cache
|
||||
removeItemWithLegacy(msgsCacheKey(s.id), legacyMsgsCacheKey(s.id))
|
||||
removeItemWithLegacy(inFlightKey(s.id), legacyInFlightKey(s.id))
|
||||
return false
|
||||
})
|
||||
@@ -572,10 +414,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
}
|
||||
|
||||
// Re-pull active session from server without retreating newer locally
|
||||
// streamed output. Used on SSE drop and on tab-visible events — mobile
|
||||
// browsers kill EventSource while backgrounded, but the backend run usually
|
||||
// completes anyway.
|
||||
// Re-pull active session from server. Used on tab-visible events.
|
||||
async function refreshActiveSession(): Promise<boolean> {
|
||||
const sid = activeSessionId.value
|
||||
if (!sid) return false
|
||||
@@ -585,11 +424,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (!target) return false
|
||||
const mapped = mapHermesMessages(detail.messages || [])
|
||||
const { serverIsAhead } = compareServerMessages(target.messages, mapped)
|
||||
if (serverIsAhead) {
|
||||
target.messages = mapped
|
||||
persistActiveMessages()
|
||||
}
|
||||
target.messages = mapped
|
||||
if (detail.title) target.title = detail.title
|
||||
return true
|
||||
} catch (err) {
|
||||
@@ -626,35 +461,12 @@ export const useChatStore = defineStore('chat', () => {
|
||||
|
||||
if (!activeSession.value) return
|
||||
|
||||
// Hydrate messages from localStorage cache first (instant render), then
|
||||
// revalidate from server in the background. If no cache exists, show the
|
||||
// loading state while we fetch.
|
||||
const hasLocalMessages = activeSession.value.messages.length > 0
|
||||
if (!hasLocalMessages) {
|
||||
const cachedMsgs = loadJsonWithFallback<Message[]>(msgsCacheKey(sessionId), legacyMsgsCacheKey(sessionId))
|
||||
if (cachedMsgs?.length) {
|
||||
activeSession.value.messages = scrubBuggyReasoningInCache(cachedMsgs)
|
||||
}
|
||||
}
|
||||
|
||||
const needsBlockingLoad = activeSession.value.messages.length === 0
|
||||
if (needsBlockingLoad) isLoadingMessages.value = true
|
||||
isLoadingMessages.value = true
|
||||
|
||||
try {
|
||||
const detail = await fetchSession(sessionId)
|
||||
if (detail && detail.messages) {
|
||||
const mapped = mapHermesMessages(detail.messages)
|
||||
// Pick whichever view has more information for the current turn.
|
||||
// Simple message-count comparison is wrong because mapHermesMessages
|
||||
// folds tool_call-only assistant messages; global last-assistant
|
||||
// comparison is also wrong across turns. Trust server only when it has
|
||||
// a newer user turn or its assistant output after the current user turn
|
||||
// has caught up.
|
||||
const local = activeSession.value.messages
|
||||
const { serverIsAhead } = compareServerMessages(local, mapped)
|
||||
if (serverIsAhead) {
|
||||
activeSession.value.messages = mapped
|
||||
}
|
||||
activeSession.value.messages = mapHermesMessages(detail.messages)
|
||||
// Update title: use Hermes title, or fallback to first user message
|
||||
if (detail.title) {
|
||||
activeSession.value.title = detail.title
|
||||
@@ -665,7 +477,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
activeSession.value.title = t + (firstUser.content.length > 40 ? '...' : '')
|
||||
}
|
||||
}
|
||||
persistActiveMessages()
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Failed to load session messages:', err)
|
||||
@@ -673,12 +484,9 @@ export const useChatStore = defineStore('chat', () => {
|
||||
isLoadingMessages.value = false
|
||||
}
|
||||
|
||||
// tmux-like resume: if this session has a recent in-flight run and we're
|
||||
// not currently streaming, start polling fetchSession to pick up progress
|
||||
// that happened while we were gone. Exits automatically on stability.
|
||||
if (readInFlight(sessionId) && !streamStates.value.has(sessionId)) {
|
||||
startPolling(sessionId)
|
||||
}
|
||||
// Always resume via Socket.IO for the active session.
|
||||
// Server tracks run/compression state per session and replays events.
|
||||
resumeInFlightRun(sessionId)
|
||||
|
||||
// Fetch token usage for this session from web-ui DB
|
||||
try {
|
||||
@@ -713,7 +521,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
async function deleteSession(sessionId: string) {
|
||||
await deleteSessionApi(sessionId)
|
||||
sessions.value = sessions.value.filter(s => s.id !== sessionId)
|
||||
removeItemWithLegacy(msgsCacheKey(sessionId), legacyMsgsCacheKey(sessionId))
|
||||
persistSessionsList()
|
||||
if (activeSessionId.value === sessionId) {
|
||||
if (sessions.value.length > 0) {
|
||||
@@ -777,21 +584,10 @@ export const useChatStore = defineStore('chat', () => {
|
||||
timestamp: Date.now(),
|
||||
attachments: attachments && attachments.length > 0 ? attachments : undefined,
|
||||
}
|
||||
// Build conversation history BEFORE adding the new message, so the
|
||||
// user's current message appears only in `input` — not duplicated in
|
||||
// `conversation_history` as well.
|
||||
const sessionMsgs = getSessionMsgs(sid)
|
||||
const history: ChatMessage[] = sessionMsgs
|
||||
.filter(m => (m.role === 'user' || m.role === 'assistant') && m.content.trim())
|
||||
.map(m => ({ role: m.role as 'user' | 'assistant' | 'system', content: m.content }))
|
||||
|
||||
addMessage(sid, userMsg)
|
||||
updateSessionTitle(sid)
|
||||
// Persist immediately so a refresh before the first SSE event (e.g. the
|
||||
// user closes the tab right after sending) still has the user's message
|
||||
// and session title in the cache.
|
||||
if (sid === activeSessionId.value) {
|
||||
persistActiveMessages()
|
||||
persistSessionsList()
|
||||
}
|
||||
|
||||
@@ -821,71 +617,65 @@ export const useChatStore = defineStore('chat', () => {
|
||||
|
||||
const appStore = useAppStore()
|
||||
const sessionModel = activeSession.value?.model || appStore.selectedModel
|
||||
const run = await startRun({
|
||||
const runPayload = {
|
||||
input: inputText,
|
||||
conversation_history: history,
|
||||
session_id: sid,
|
||||
model: sessionModel || undefined,
|
||||
})
|
||||
|
||||
const runId = (run as any).run_id || (run as any).id
|
||||
if (!runId) {
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'system',
|
||||
content: `Error: startRun returned no run ID. Response: ${JSON.stringify(run)}`,
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// tmux-like resume: persist run_id so refresh/reopen can pick up the
|
||||
// working indicator and poll for progress.
|
||||
markInFlight(sid, runId)
|
||||
// If we were already polling (e.g. user re-sent while resume was still
|
||||
// polling an earlier run), cancel that polling — the new SSE stream is
|
||||
// the authoritative live source.
|
||||
stopPolling(sid)
|
||||
|
||||
// Helper to clean up this session's stream state
|
||||
const cleanup = () => {
|
||||
streamStates.value.delete(sid)
|
||||
if (persistTimer) {
|
||||
clearTimeout(persistTimer)
|
||||
persistTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
// Throttle in-flight cache writes so a refresh mid-stream still shows
|
||||
// the partial reply. 800ms keeps quota pressure low while guaranteeing
|
||||
// at most ~1s of unsaved delta on reload.
|
||||
let persistTimer: ReturnType<typeof setTimeout> | null = null
|
||||
// Per-run flags used to detect silently-swallowed errors at run.completed.
|
||||
// hermes-agent occasionally emits run.completed with empty output and no
|
||||
// usage when the agent layer caught an upstream error (e.g. invalid API
|
||||
// key). We need to distinguish: (a) run with assistant text produced,
|
||||
// (b) run with only tool activity, (c) run with truly nothing visible.
|
||||
// Reset per send() call — closures captured by SSE callbacks are scoped
|
||||
// Reset per send() call — closures captured by Socket.IO callbacks are scoped
|
||||
// to this run, so there is no cross-run contamination.
|
||||
let runProducedAssistantText = false
|
||||
let runHadToolActivity = false
|
||||
const schedulePersist = () => {
|
||||
if (sid !== activeSessionId.value || persistTimer) return
|
||||
persistTimer = setTimeout(() => {
|
||||
persistTimer = null
|
||||
persistActiveMessages()
|
||||
}, 800)
|
||||
}
|
||||
|
||||
// Listen to SSE events — all closures capture `sid`
|
||||
const ctrl = streamRunEvents(
|
||||
runId,
|
||||
// Send run via Socket.IO and listen to streamed events — all closures capture `sid`
|
||||
const ctrl = startRunViaSocket(
|
||||
runPayload,
|
||||
// onEvent
|
||||
(evt: RunEvent) => {
|
||||
switch (evt.event) {
|
||||
case 'run.started':
|
||||
break
|
||||
|
||||
case 'compression.started': {
|
||||
setCompressionState({
|
||||
compressing: true,
|
||||
messageCount: (evt as any).message_count || 0,
|
||||
beforeTokens: (evt as any).token_count || 0,
|
||||
afterTokens: 0,
|
||||
compressed: null,
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
case 'compression.completed': {
|
||||
setCompressionState({
|
||||
compressing: false,
|
||||
messageCount: (evt as any).totalMessages || 0,
|
||||
beforeTokens: (evt as any).beforeTokens || 0,
|
||||
afterTokens: (evt as any).afterTokens || 0,
|
||||
compressed: (evt as any).compressed ?? false,
|
||||
error: (evt as any).error,
|
||||
})
|
||||
// Auto-clear after 5s
|
||||
setTimeout(() => {
|
||||
if (compressionState.value && !compressionState.value.compressing) {
|
||||
setCompressionState(null)
|
||||
}
|
||||
}, 5000)
|
||||
break
|
||||
}
|
||||
|
||||
case 'reasoning.delta':
|
||||
case 'thinking.delta': {
|
||||
const text = evt.text || evt.delta || ''
|
||||
@@ -908,7 +698,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
})
|
||||
noteReasoningStart(newId)
|
||||
}
|
||||
schedulePersist()
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
@@ -925,7 +715,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
// 否则(上游未转发 delta,只发这一次 available)不显示时长。
|
||||
noteReasoningEnd(last.id)
|
||||
}
|
||||
schedulePersist()
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
@@ -952,7 +742,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
isStreaming: true,
|
||||
})
|
||||
}
|
||||
schedulePersist()
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
@@ -972,7 +762,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
toolPreview: evt.preview,
|
||||
toolStatus: 'running',
|
||||
})
|
||||
schedulePersist()
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
@@ -986,7 +776,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
const last = toolMsgs[toolMsgs.length - 1]
|
||||
updateMessage(sid, last.id, { toolStatus: 'done' })
|
||||
}
|
||||
schedulePersist()
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
@@ -1048,9 +838,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||
// the next page load to still see in-flight === true (so
|
||||
// polling kicks in and recovers) rather than the other way
|
||||
// around (cleared in-flight + stale streaming cache = UI stuck).
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
break
|
||||
}
|
||||
|
||||
@@ -1077,9 +866,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
})
|
||||
cleanup()
|
||||
if (sid === activeSessionId.value) persistActiveMessages()
|
||||
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -1095,21 +883,13 @@ export const useChatStore = defineStore('chat', () => {
|
||||
updateSessionTitle(sid)
|
||||
},
|
||||
// onError
|
||||
// Mobile browsers drop EventSource when the tab backgrounds / screen
|
||||
// locks / network flips. The backend run usually completes anyway, so
|
||||
// rather than injecting a stale "SSE connection error" bubble we mark
|
||||
// streaming as done and silently re-sync from the server, which has
|
||||
// the real final answer. If the server fetch itself fails, we leave
|
||||
// whatever text we already streamed in place — no visible error.
|
||||
(err) => {
|
||||
console.warn('SSE connection dropped, resyncing from server:', err.message)
|
||||
console.warn('Socket.IO run stream error:', err.message)
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.isStreaming) {
|
||||
updateMessage(sid, last.id, { isStreaming: false })
|
||||
}
|
||||
// Any tool messages still marked 'running' will be replaced by the
|
||||
// server's view after refresh; clear their spinner state now.
|
||||
msgs.forEach((m, i) => {
|
||||
if (m.role === 'tool' && m.toolStatus === 'running') {
|
||||
msgs[i] = { ...m, toolStatus: 'done' }
|
||||
@@ -1119,12 +899,10 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (sid === activeSessionId.value) {
|
||||
void refreshActiveSession()
|
||||
}
|
||||
// The run might still be going on the server side (SSE drop doesn't
|
||||
// abort it). If we still have an in-flight record, fall back to
|
||||
// polling fetchSession to keep the user updated.
|
||||
if (readInFlight(sid)) {
|
||||
startPolling(sid)
|
||||
}
|
||||
},
|
||||
// onStarted — called when server acks with run_id
|
||||
(runId: string) => {
|
||||
markInFlight(sid, runId)
|
||||
},
|
||||
)
|
||||
|
||||
@@ -1139,6 +917,264 @@ export const useChatStore = defineStore('chat', () => {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resume an in-flight run after page refresh.
|
||||
* Emits 'resume' to join the session room on the server,
|
||||
* then sets up event listeners to receive ongoing events.
|
||||
*/
|
||||
function resumeInFlightRun(sid: string) {
|
||||
const socket = connectChatRun()
|
||||
let closed = false
|
||||
let runProducedAssistantText = false
|
||||
let runHadToolActivity = false
|
||||
|
||||
const cleanup = () => {
|
||||
if (closed) return
|
||||
closed = true
|
||||
socket.off('run.started', onRunStarted)
|
||||
socket.off('run.failed', onRunFailed)
|
||||
socket.off('message.delta', onMessageDelta)
|
||||
socket.off('reasoning.delta', onReasoningDelta)
|
||||
socket.off('thinking.delta', onThinkingDelta)
|
||||
socket.off('reasoning.available', onReasoningAvailable)
|
||||
socket.off('tool.started', onToolStarted)
|
||||
socket.off('tool.completed', onToolCompleted)
|
||||
socket.off('run.completed', onRunCompleted)
|
||||
socket.off('compression.started', onCompressionStarted)
|
||||
socket.off('compression.completed', onCompressionCompleted)
|
||||
streamStates.value.delete(sid)
|
||||
}
|
||||
|
||||
// Shared event handler — filters by session_id tag
|
||||
function handleEvent(evt: RunEvent) {
|
||||
if (closed) return
|
||||
// Filter events for this session (server tags all events with session_id)
|
||||
if (evt.session_id && evt.session_id !== sid) return
|
||||
switch (evt.event) {
|
||||
case 'run.started':
|
||||
break
|
||||
|
||||
case 'compression.started': {
|
||||
setCompressionState({
|
||||
compressing: true,
|
||||
messageCount: (evt as any).message_count || 0,
|
||||
beforeTokens: (evt as any).token_count || 0,
|
||||
afterTokens: 0,
|
||||
compressed: null,
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
case 'compression.completed': {
|
||||
setCompressionState({
|
||||
compressing: false,
|
||||
messageCount: (evt as any).totalMessages || 0,
|
||||
beforeTokens: (evt as any).beforeTokens || 0,
|
||||
afterTokens: (evt as any).afterTokens || 0,
|
||||
compressed: (evt as any).compressed ?? false,
|
||||
error: (evt as any).error,
|
||||
})
|
||||
setTimeout(() => {
|
||||
if (compressionState.value && !compressionState.value.compressing) {
|
||||
setCompressionState(null)
|
||||
}
|
||||
}, 5000)
|
||||
break
|
||||
}
|
||||
|
||||
case 'reasoning.delta':
|
||||
case 'thinking.delta': {
|
||||
const text = evt.text || evt.delta || ''
|
||||
if (!text) break
|
||||
runProducedAssistantText = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.role === 'assistant' && last.isStreaming) {
|
||||
last.reasoning = (last.reasoning || '') + text
|
||||
noteReasoningStart(last.id)
|
||||
} else {
|
||||
const newId = uid()
|
||||
addMessage(sid, {
|
||||
id: newId,
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
timestamp: Date.now(),
|
||||
isStreaming: true,
|
||||
reasoning: text,
|
||||
})
|
||||
noteReasoningStart(newId)
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case 'reasoning.available': {
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.role === 'assistant' && last.isStreaming) {
|
||||
noteReasoningEnd(last.id)
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case 'message.delta': {
|
||||
if (evt.delta) runProducedAssistantText = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.role === 'assistant' && last.isStreaming) {
|
||||
const prev = last.content
|
||||
const next = prev + (evt.delta || '')
|
||||
noteThinkingDelta(last.id, prev, next)
|
||||
if (last.reasoning) noteReasoningEnd(last.id)
|
||||
last.content = next
|
||||
} else {
|
||||
const newId = uid()
|
||||
const nextContent = evt.delta || ''
|
||||
noteThinkingDelta(newId, '', nextContent)
|
||||
addMessage(sid, {
|
||||
id: newId,
|
||||
role: 'assistant',
|
||||
content: nextContent,
|
||||
timestamp: Date.now(),
|
||||
isStreaming: true,
|
||||
})
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case 'tool.started': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const last = msgs[msgs.length - 1]
|
||||
if (last?.isStreaming) {
|
||||
updateMessage(sid, last.id, { isStreaming: false })
|
||||
}
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'tool',
|
||||
content: '',
|
||||
timestamp: Date.now(),
|
||||
toolName: evt.tool || evt.name,
|
||||
toolPreview: evt.preview,
|
||||
toolStatus: 'running',
|
||||
})
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case 'tool.completed': {
|
||||
runHadToolActivity = true
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const toolMsgs = msgs.filter(m => m.role === 'tool' && m.toolStatus === 'running')
|
||||
if (toolMsgs.length > 0) {
|
||||
updateMessage(sid, toolMsgs[toolMsgs.length - 1].id, { toolStatus: 'done' })
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
case 'run.completed': {
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const lastMsg = msgs[msgs.length - 1]
|
||||
if (lastMsg?.isStreaming) {
|
||||
updateMessage(sid, lastMsg.id, { isStreaming: false })
|
||||
}
|
||||
if (evt.usage) {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (target) {
|
||||
target.inputTokens = evt.usage.input_tokens
|
||||
target.outputTokens = evt.usage.output_tokens
|
||||
}
|
||||
}
|
||||
const finalOutput = typeof evt.output === 'string' ? evt.output : ''
|
||||
const finalOutputTrimmed = finalOutput.trim()
|
||||
if (!runProducedAssistantText && finalOutputTrimmed !== '') {
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'assistant',
|
||||
content: finalOutput,
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
}
|
||||
const swallowedError = !runProducedAssistantText && !runHadToolActivity && finalOutputTrimmed === ''
|
||||
if (swallowedError) {
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'system',
|
||||
content: 'Error: Agent returned no output. The model call may have failed (e.g. invalid API key, model not supported by provider, or context exceeded). Check the hermes-agent logs for details.',
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
}
|
||||
cleanup()
|
||||
updateSessionTitle(sid)
|
||||
|
||||
clearInFlight(sid)
|
||||
break
|
||||
}
|
||||
|
||||
case 'run.failed': {
|
||||
const msgs = getSessionMsgs(sid)
|
||||
const lastErr = msgs[msgs.length - 1]
|
||||
if (lastErr?.isStreaming) {
|
||||
updateMessage(sid, lastErr.id, {
|
||||
isStreaming: false,
|
||||
content: evt.error ? `Error: ${evt.error}` : 'Run failed',
|
||||
role: 'system',
|
||||
})
|
||||
} else {
|
||||
addMessage(sid, {
|
||||
id: uid(),
|
||||
role: 'system',
|
||||
content: evt.error ? `Error: ${evt.error}` : 'Run failed',
|
||||
timestamp: Date.now(),
|
||||
})
|
||||
}
|
||||
msgs.forEach((m, i) => {
|
||||
if (m.role === 'tool' && m.toolStatus === 'running') {
|
||||
msgs[i] = { ...m, toolStatus: 'error' }
|
||||
}
|
||||
})
|
||||
cleanup()
|
||||
|
||||
clearInFlight(sid)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function onRunStarted(data: RunEvent) { handleEvent(data) }
|
||||
function onRunFailed(data: RunEvent) { handleEvent(data) }
|
||||
function onMessageDelta(data: RunEvent) { handleEvent(data) }
|
||||
function onReasoningDelta(data: RunEvent) { handleEvent(data) }
|
||||
function onThinkingDelta(data: RunEvent) { handleEvent(data) }
|
||||
function onReasoningAvailable(data: RunEvent) { handleEvent(data) }
|
||||
function onToolStarted(data: RunEvent) { handleEvent(data) }
|
||||
function onToolCompleted(data: RunEvent) { handleEvent(data) }
|
||||
function onRunCompleted(data: RunEvent) { handleEvent(data) }
|
||||
function onCompressionStarted(data: RunEvent) { handleEvent(data) }
|
||||
function onCompressionCompleted(data: RunEvent) { handleEvent(data) }
|
||||
|
||||
socket.on('run.started', onRunStarted)
|
||||
socket.on('run.failed', onRunFailed)
|
||||
socket.on('message.delta', onMessageDelta)
|
||||
socket.on('reasoning.delta', onReasoningDelta)
|
||||
socket.on('thinking.delta', onThinkingDelta)
|
||||
socket.on('reasoning.available', onReasoningAvailable)
|
||||
socket.on('tool.started', onToolStarted)
|
||||
socket.on('tool.completed', onToolCompleted)
|
||||
socket.on('run.completed', onRunCompleted)
|
||||
socket.on('compression.started', onCompressionStarted)
|
||||
socket.on('compression.completed', onCompressionCompleted)
|
||||
|
||||
// Emit resume to join the session room
|
||||
socket.emit('resume', { session_id: sid })
|
||||
|
||||
// Mark as streaming so UI shows the indicator
|
||||
streamStates.value.set(sid, { abort: cleanup })
|
||||
}
|
||||
|
||||
function stopStreaming() {
|
||||
const sid = activeSessionId.value
|
||||
if (!sid) return
|
||||
@@ -1151,9 +1187,8 @@ export const useChatStore = defineStore('chat', () => {
|
||||
updateMessage(sid, lastMsg.id, { isStreaming: false })
|
||||
}
|
||||
streamStates.value.delete(sid)
|
||||
clearInFlight(sid)
|
||||
stopPolling(sid)
|
||||
}
|
||||
clearInFlight(sid)
|
||||
}
|
||||
|
||||
// Tab visibility: re-sync when returning to foreground
|
||||
@@ -1161,8 +1196,10 @@ export const useChatStore = defineStore('chat', () => {
|
||||
document.addEventListener('visibilitychange', () => {
|
||||
if (document.visibilityState === 'visible' && activeSessionId.value && !isStreaming.value) {
|
||||
void refreshActiveSession()
|
||||
if (readInFlight(activeSessionId.value)) {
|
||||
startPolling(activeSessionId.value)
|
||||
// Re-subscribe in case Socket.IO reconnected
|
||||
const sid = activeSessionId.value
|
||||
if (sid && !streamStates.value.has(sid)) {
|
||||
resumeInFlightRun(sid)
|
||||
}
|
||||
}
|
||||
})
|
||||
@@ -1237,6 +1274,7 @@ export const useChatStore = defineStore('chat', () => {
|
||||
isStreaming,
|
||||
isRunActive,
|
||||
isSessionLive,
|
||||
compressionState,
|
||||
isLoadingSessions,
|
||||
sessionsLoaded,
|
||||
isLoadingMessages,
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* SQLite-backed compression snapshot store for 1:1 chat sessions.
|
||||
*
|
||||
* Stores the latest compression summary and the index of the last
|
||||
* compressed message, so incremental compression can pick up where
|
||||
* the previous one left off.
|
||||
*/
|
||||
|
||||
import { isSqliteAvailable, ensureTable, getDb } from '../index'
|
||||
|
||||
const TABLE = 'chat_compression_snapshots'
|
||||
|
||||
const SCHEMA: Record<string, string> = {
|
||||
session_id: 'TEXT PRIMARY KEY',
|
||||
summary: 'TEXT NOT NULL DEFAULT \'\'',
|
||||
last_message_index: 'INTEGER NOT NULL DEFAULT 0',
|
||||
message_count_at_time: 'INTEGER NOT NULL DEFAULT 0',
|
||||
updated_at: 'INTEGER NOT NULL',
|
||||
}
|
||||
|
||||
export function initCompressionSnapshotStore(): void {
|
||||
if (isSqliteAvailable()) {
|
||||
ensureTable(TABLE, SCHEMA)
|
||||
}
|
||||
}
|
||||
|
||||
export function getCompressionSnapshot(sessionId: string): { summary: string; lastMessageIndex: number; messageCountAtTime: number } | null {
|
||||
if (!isSqliteAvailable()) return null
|
||||
return getDb()!.prepare(
|
||||
`SELECT summary, last_message_index AS lastMessageIndex, message_count_at_time AS messageCountAtTime FROM ${TABLE} WHERE session_id = ?`,
|
||||
).get(sessionId) as any ?? null
|
||||
}
|
||||
|
||||
export function saveCompressionSnapshot(
|
||||
sessionId: string,
|
||||
summary: string,
|
||||
lastMessageIndex: number,
|
||||
messageCountAtTime: number,
|
||||
): void {
|
||||
if (!isSqliteAvailable()) return
|
||||
getDb()!.prepare(
|
||||
`INSERT INTO ${TABLE} (session_id, summary, last_message_index, message_count_at_time, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(session_id) DO UPDATE SET
|
||||
summary = excluded.summary,
|
||||
last_message_index = excluded.last_message_index,
|
||||
message_count_at_time = excluded.message_count_at_time,
|
||||
updated_at = excluded.updated_at`,
|
||||
).run(sessionId, summary, lastMessageIndex, messageCountAtTime, Date.now())
|
||||
}
|
||||
|
||||
export function deleteCompressionSnapshot(sessionId: string): void {
|
||||
if (!isSqliteAvailable()) return
|
||||
getDb()!.prepare(`DELETE FROM ${TABLE} WHERE session_id = ?`).run(sessionId)
|
||||
}
|
||||
@@ -242,7 +242,7 @@ function runLiteralContentSearch(
|
||||
${SESSION_SELECT},
|
||||
s.parent_session_id AS parent_session_id
|
||||
FROM sessions s
|
||||
WHERE s.source != 'tool'
|
||||
WHERE s.source != 'tool' AND s.id NOT LIKE 'compress_%'
|
||||
${sourceClause}
|
||||
)
|
||||
SELECT
|
||||
@@ -411,7 +411,7 @@ function loadAllSessions(db: { prepare: (sql: string) => { all: (...params: any[
|
||||
${SESSION_SELECT},
|
||||
s.parent_session_id AS parent_session_id
|
||||
FROM sessions s
|
||||
WHERE s.source != 'tool'
|
||||
WHERE s.source != 'tool' AND s.id NOT LIKE 'compress_%'
|
||||
`).all() as Record<string, unknown>[]
|
||||
const sessions = rows.map(mapInternalSessionRow)
|
||||
const byId = new Map(sessions.map(s => [s.id, s]))
|
||||
@@ -623,7 +623,7 @@ export async function listSessionSummaries(source?: string, limit = 2000): Promi
|
||||
const db = new DatabaseSync(sessionDbPath(), { open: true, readOnly: true })
|
||||
|
||||
try {
|
||||
const clauses = ["s.parent_session_id IS NULL", "s.source != 'tool'"]
|
||||
const clauses = ["s.parent_session_id IS NULL", "s.source != 'tool'", "s.id NOT LIKE 'compress_%'"]
|
||||
const params: any[] = []
|
||||
if (source) {
|
||||
clauses.push('s.source = ?')
|
||||
@@ -689,7 +689,7 @@ export async function searchSessionSummaries(
|
||||
${SESSION_SELECT},
|
||||
s.parent_session_id AS parent_session_id
|
||||
FROM sessions s
|
||||
WHERE s.source != 'tool'
|
||||
WHERE s.source != 'tool' AND s.id NOT LIKE 'compress_%'
|
||||
${sourceClause}
|
||||
`
|
||||
|
||||
|
||||
@@ -3,7 +3,10 @@ import { mkdirSync, readFileSync, writeFileSync, existsSync } from 'fs'
|
||||
import { resolve } from 'path'
|
||||
import { homedir } from 'os'
|
||||
|
||||
const DB_DIR = resolve(homedir(), '.hermes-web-ui')
|
||||
const isDev = process.env.NODE_ENV !== 'production'
|
||||
const DB_DIR = isDev
|
||||
? resolve(process.cwd(), 'packages/server/data')
|
||||
: resolve(homedir(), '.hermes-web-ui')
|
||||
const DB_PATH = resolve(DB_DIR, 'hermes-web-ui.db')
|
||||
const JSON_PATH = resolve(DB_DIR, 'hermes-web-ui.json')
|
||||
|
||||
@@ -27,7 +30,7 @@ export function getDb(): DatabaseSync | null {
|
||||
if (!_db) {
|
||||
mkdirSync(DB_DIR, { recursive: true })
|
||||
_db = new DatabaseSync(DB_PATH)
|
||||
_db.exec('PRAGMA journal_mode=WAL')
|
||||
_db.exec('PRAGMA journal_mode=DELETE')
|
||||
_db.exec('PRAGMA foreign_keys=ON')
|
||||
}
|
||||
return _db
|
||||
|
||||
@@ -15,7 +15,9 @@ import { setupTerminalWebSocket } from './routes/hermes/terminal'
|
||||
import { startVersionCheck } from './routes/health'
|
||||
import { registerRoutes } from './routes'
|
||||
import { setGroupChatServer } from './routes/hermes/group-chat'
|
||||
import { setChatRunServer } from './routes/hermes/chat-run'
|
||||
import { GroupChatServer } from './services/hermes/group-chat'
|
||||
import { ChatRunSocket } from './services/hermes/chat-run-socket'
|
||||
import { logger } from './services/logger'
|
||||
|
||||
// Injected by esbuild at build time; fallback to reading package.json in dev mode
|
||||
@@ -52,6 +54,10 @@ export async function bootstrap() {
|
||||
initUsageStore()
|
||||
console.log('[bootstrap] usage store initialized')
|
||||
|
||||
const { initCompressionSnapshotStore } = await import('./db/hermes/compression-snapshot')
|
||||
initCompressionSnapshotStore()
|
||||
console.log('[bootstrap] compression snapshot store initialized')
|
||||
|
||||
app.use(cors({ origin: config.corsOrigins }))
|
||||
app.use(bodyParser())
|
||||
console.log('[bootstrap] cors + bodyParser registered')
|
||||
@@ -92,6 +98,11 @@ export async function bootstrap() {
|
||||
setGroupChatServer(groupChatServer)
|
||||
groupChatServer.setGatewayManager(getGatewayManagerInstance())
|
||||
|
||||
// Chat run Socket.IO — shares the same Server instance, just adds /chat-run namespace
|
||||
const chatRunServer = new ChatRunSocket(groupChatServer.getIO(), getGatewayManagerInstance())
|
||||
setChatRunServer(chatRunServer)
|
||||
chatRunServer.init()
|
||||
|
||||
// Catch-all: destroy upgrade requests not handled by terminal or Socket.IO
|
||||
server.on('upgrade', (req: any, socket: any) => {
|
||||
const url = new URL(req.url || '', `http://${req.headers.host}`)
|
||||
|
||||
@@ -0,0 +1,592 @@
|
||||
/**
|
||||
* Chat Context Compressor
|
||||
*
|
||||
* Compresses 1:1 chat conversation history before sending to upstream.
|
||||
* Uses the Hermes structured summary prompt for LLM-based compression.
|
||||
*
|
||||
* Algorithm:
|
||||
* 1. If total tokens < trigger threshold → return as-is
|
||||
* 2. Pre-clean: truncate old tool results (no LLM call)
|
||||
* 3. Load snapshot from SQLite for incremental update
|
||||
* 4. Keep last 20 messages verbatim (tail protection by message count)
|
||||
* 5. Summarize everything before the tail
|
||||
* 6. Save snapshot: last_message_index = index where compression ends
|
||||
*/
|
||||
|
||||
import { EventSource } from 'eventsource'
|
||||
import { encodingForModel, getEncoding } from 'js-tiktoken'
|
||||
import { logger } from '../../services/logger'
|
||||
import {
|
||||
getCompressionSnapshot,
|
||||
saveCompressionSnapshot,
|
||||
deleteCompressionSnapshot,
|
||||
} from '../../db/hermes/compression-snapshot'
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────
|
||||
|
||||
export interface ChatMessage {
|
||||
role: string
|
||||
content: string
|
||||
tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }>
|
||||
tool_call_id?: string
|
||||
name?: string
|
||||
}
|
||||
|
||||
export interface CompressionConfig {
|
||||
/** Token threshold to trigger compression (default: contextLength / 2) */
|
||||
triggerTokens: number
|
||||
/** Summary token target (default: 8000) */
|
||||
summaryBudget: number
|
||||
/** Number of recent messages to keep verbatim (default: 20) */
|
||||
tailMessageCount: number
|
||||
/** Timeout for LLM summarization call (default: 60_000ms) */
|
||||
summarizationTimeoutMs: number
|
||||
}
|
||||
|
||||
export const DEFAULT_COMPRESSION_CONFIG: CompressionConfig = {
|
||||
triggerTokens: 100_000,
|
||||
summaryBudget: 8_000,
|
||||
tailMessageCount: 20,
|
||||
summarizationTimeoutMs: 120_000,
|
||||
}
|
||||
|
||||
export interface CompressedResult {
|
||||
messages: ChatMessage[]
|
||||
meta: {
|
||||
totalMessages: number
|
||||
compressed: boolean
|
||||
/** true = actually called LLM to summarize; false = assembled from existing snapshot or returned as-is */
|
||||
llmCompressed: boolean
|
||||
summaryTokenEstimate: number
|
||||
verbatimCount: number
|
||||
compressedStartIndex: number
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Token counting ─────────────────────────────────────
|
||||
|
||||
let _encoder: ReturnType<typeof getEncoding> | null = null
|
||||
|
||||
function getEncoder() {
|
||||
if (!_encoder) {
|
||||
_encoder = getEncoding('cl100k_base')
|
||||
}
|
||||
return _encoder
|
||||
}
|
||||
|
||||
export function countTokens(text: string): number {
|
||||
try {
|
||||
return getEncoder().encode(text).length
|
||||
} catch {
|
||||
const cjk = (text.match(/[\u2e80-\u9fff\uac00-\ud7af\u3000-\u303f\uff00-\uffef]/g) || []).length
|
||||
const other = text.length - cjk
|
||||
return Math.ceil(cjk * 1.5 + other / 4)
|
||||
}
|
||||
}
|
||||
|
||||
export function countTokensForModel(text: string, model: string): number {
|
||||
try {
|
||||
const enc = encodingForModel(model as any)
|
||||
return enc.encode(text).length
|
||||
} catch {
|
||||
return countTokens(text)
|
||||
}
|
||||
}
|
||||
|
||||
function estimateMessagesTokens(messages: ChatMessage[]): number {
|
||||
return messages.reduce((sum, m) => sum + countTokens(m.content), 0)
|
||||
}
|
||||
|
||||
// ─── Prompts ────────────────────────────────────────────
|
||||
|
||||
export const SUMMARY_PREFIX = `[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted
|
||||
into the summary below. This is a handoff from a previous context
|
||||
window — treat it as background reference, NOT as active instructions.
|
||||
Do NOT answer questions or fulfill requests mentioned in this summary;
|
||||
they were already addressed.
|
||||
Your current task is identified in the '## Active Task' section of the
|
||||
summary — resume exactly from there.
|
||||
Respond ONLY to the latest user message
|
||||
that appears AFTER this summary. The current session state (files,
|
||||
config, etc.) may reflect work described here — avoid repeating it:`
|
||||
|
||||
const TEMPLATE_SECTIONS = `Use this exact structure:
|
||||
|
||||
## Active Task
|
||||
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
|
||||
task assignment verbatim — the exact words they used. If multiple tasks
|
||||
were requested and only some are done, list only the ones NOT yet completed.
|
||||
The next assistant must pick up exactly here. Example:
|
||||
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
|
||||
If no outstanding task exists, write "None."]
|
||||
|
||||
## Goal
|
||||
[What the user is trying to accomplish overall]
|
||||
|
||||
## Constraints & Preferences
|
||||
[User preferences, coding style, constraints, important decisions]
|
||||
|
||||
## Completed Actions
|
||||
[Numbered list of concrete actions taken — include tool used, target, and outcome.
|
||||
Format each as: N. ACTION target — outcome [tool: name]
|
||||
Example:
|
||||
1. READ config.py:45 — found == should be != [tool: read_file]
|
||||
2. PATCH config.py:45 — changed == to != [tool: patch]
|
||||
3. TEST pytest tests/ — 3/50 failed: test_parse, test_validate, test_edge [tool: terminal]
|
||||
Be specific with file paths, commands, line numbers, and results.]
|
||||
|
||||
## Active State
|
||||
[Current working state — include:
|
||||
- Working directory and branch (if applicable)
|
||||
- Modified/created files with brief note on each
|
||||
- Test status (X/Y passing)
|
||||
- Any running processes or servers
|
||||
- Environment details that matter]
|
||||
|
||||
## In Progress
|
||||
[Work currently underway — what was being done when compaction fired]
|
||||
|
||||
## Blocked
|
||||
[Any blockers, errors, or issues not yet resolved. Include exact error messages.]
|
||||
|
||||
## Key Decisions
|
||||
[Important technical decisions and WHY they were made]
|
||||
|
||||
## Resolved Questions
|
||||
[Questions the user asked that were ALREADY answered — include the answer so the next assistant does not re-answer them]
|
||||
|
||||
## Pending User Asks
|
||||
[Questions or requests from the user that have NOT yet been answered or fulfilled. If none, write "None."]
|
||||
|
||||
## Relevant Files
|
||||
[Files read, modified, or created — with brief note on each]
|
||||
|
||||
## Remaining Work
|
||||
[What remains to be done — framed as context, not instructions]
|
||||
|
||||
## Critical Context
|
||||
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]`
|
||||
|
||||
function buildFullPrompt(contentToSummarize: string, summaryBudget: number): string {
|
||||
return `You are a summarization agent creating a context checkpoint.
|
||||
Your output will be injected as reference material for a DIFFERENT
|
||||
assistant that continues the conversation.
|
||||
Do NOT respond to any questions or requests in the conversation —
|
||||
only output the structured summary.
|
||||
Do NOT include any preamble, greeting, or prefix.
|
||||
|
||||
Create a structured handoff summary for a different assistant that will continue
|
||||
this conversation after earlier turns are compacted. The next assistant should be
|
||||
able to understand what happened without re-reading the original turns.
|
||||
|
||||
TURNS TO SUMMARIZE:
|
||||
${contentToSummarize}
|
||||
|
||||
${TEMPLATE_SECTIONS}
|
||||
|
||||
Target ~${summaryBudget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
|
||||
|
||||
Write only the summary body. Do not include any preamble or prefix.`
|
||||
}
|
||||
|
||||
function buildIncrementalPrompt(previousSummary: string, contentToSummarize: string, summaryBudget: number): string {
|
||||
return `You are a summarization agent creating a context checkpoint.
|
||||
Your output will be injected as reference material for a DIFFERENT
|
||||
assistant that continues the conversation.
|
||||
Do NOT respond to any questions or requests in the conversation —
|
||||
only output the structured summary.
|
||||
Do NOT include any preamble, greeting, or prefix.
|
||||
|
||||
You are updating a context compaction summary. A previous compaction produced the
|
||||
summary below. New conversation turns have occurred since then and need to be
|
||||
incorporated.
|
||||
|
||||
PREVIOUS SUMMARY:
|
||||
${previousSummary}
|
||||
|
||||
NEW TURNS TO INCORPORATE:
|
||||
${contentToSummarize}
|
||||
|
||||
Update the summary using this exact structure. PRESERVE all existing information
|
||||
that is still relevant. ADD new completed actions to the numbered list
|
||||
(continue numbering). Move items from "In Progress" to "Completed Actions" when
|
||||
done. Move answered questions to "Resolved Questions". Update "Active State"
|
||||
to reflect current state. Remove information only if it is clearly obsolete.
|
||||
CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled
|
||||
request — this is the most important field for task continuity.
|
||||
|
||||
${TEMPLATE_SECTIONS}
|
||||
|
||||
Target ~${summaryBudget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.
|
||||
|
||||
Write only the summary body. Do not include any preamble or prefix.`
|
||||
}
|
||||
|
||||
// ─── Pre-cleaning ───────────────────────────────────────
|
||||
|
||||
function serializeForSummary(messages: ChatMessage[]): string {
|
||||
const parts: string[] = []
|
||||
for (const msg of messages) {
|
||||
const role = msg.role === 'tool' ? `[tool:${msg.name || 'unknown'}]` : msg.role
|
||||
let content = msg.content || ''
|
||||
|
||||
if (msg.role === 'tool' && content.length > 5500) {
|
||||
content = content.slice(0, 4000) + '\n... [truncated]\n...' + content.slice(-1500)
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant' && msg.tool_calls?.length) {
|
||||
const toolsInfo = msg.tool_calls.map(tc => {
|
||||
let args = tc.function.arguments
|
||||
if (args.length > 1500) args = args.slice(0, 1500) + '...'
|
||||
return `[tool_call: ${tc.function.name}(${args})]`
|
||||
}).join('\n')
|
||||
parts.push(`${role}: ${toolsInfo}`)
|
||||
if (content.trim()) parts.push(`${role}: ${content}`)
|
||||
} else {
|
||||
parts.push(`${role}: ${content}`)
|
||||
}
|
||||
}
|
||||
return parts.join('\n\n')
|
||||
}
|
||||
|
||||
function pruneOldToolResults(messages: ChatMessage[], keepRecentCount: number): ChatMessage[] {
|
||||
if (messages.length <= keepRecentCount) return messages
|
||||
|
||||
const tail = messages.slice(-keepRecentCount)
|
||||
const head = messages.slice(0, -keepRecentCount)
|
||||
|
||||
const pruned = head.map(msg => {
|
||||
if (msg.role !== 'tool') return msg
|
||||
const content = msg.content || ''
|
||||
const preview = content.slice(0, 100).replace(/\n/g, ' ')
|
||||
const truncated = content.length > 100 ? '...' : ''
|
||||
return { ...msg, content: `[${msg.name || 'tool'}] ${preview}${truncated}` }
|
||||
})
|
||||
|
||||
return [...pruned, ...tail]
|
||||
}
|
||||
|
||||
// ─── LLM Summarization ──────────────────────────────────
|
||||
|
||||
async function callSummarizer(
|
||||
upstream: string,
|
||||
apiKey: string | undefined,
|
||||
prompt: string,
|
||||
history: Array<{ role: string; content: string }>,
|
||||
timeoutMs: number,
|
||||
previousSummary?: string,
|
||||
): Promise<string> {
|
||||
const sessionId = `compress_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const convHistory: Array<{ role: string; content: string }> = [...history]
|
||||
|
||||
if (previousSummary) {
|
||||
convHistory.unshift(
|
||||
{ role: 'user', content: `[Previous summary]\n${previousSummary}` },
|
||||
{ role: 'assistant', content: 'Understood, I will update the summary.' },
|
||||
)
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
|
||||
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
input: prompt,
|
||||
conversation_history: convHistory,
|
||||
session_id: sessionId,
|
||||
}),
|
||||
signal: AbortSignal.timeout(timeoutMs),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
throw new Error(`Summarization run failed: ${res.status}`)
|
||||
}
|
||||
|
||||
const { run_id } = await res.json() as { run_id: string }
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
source.close()
|
||||
reject(new Error('Summarization timed out'))
|
||||
}, timeoutMs)
|
||||
|
||||
const eventsUrl = new URL(`${upstream}/v1/runs/${run_id}/events`)
|
||||
if (apiKey) eventsUrl.searchParams.set('token', apiKey)
|
||||
|
||||
const source = new EventSource(eventsUrl.toString())
|
||||
|
||||
source.onmessage = (event: MessageEvent) => {
|
||||
try {
|
||||
const parsed = JSON.parse(event.data)
|
||||
if (parsed.event === 'run.completed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(upstream, apiKey, sessionId).catch(() => {})
|
||||
const output = parsed.output
|
||||
if (!output || typeof output !== 'string' || output.trim() === '') {
|
||||
reject(new Error('Empty summarization response'))
|
||||
return
|
||||
}
|
||||
resolve(output.trim())
|
||||
} else if (parsed.event === 'run.failed') {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(upstream, apiKey, sessionId).catch(() => {})
|
||||
reject(new Error(parsed.error || 'Summarization run failed'))
|
||||
}
|
||||
} catch { /* ignore parse errors */ }
|
||||
}
|
||||
|
||||
source.onerror = () => {
|
||||
clearTimeout(timer)
|
||||
source.close()
|
||||
deleteCompressSession(upstream, apiKey, sessionId).catch(() => {})
|
||||
reject(new Error('Summarization SSE connection error'))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/** Best-effort delete the temporary compression session from the gateway */
|
||||
async function deleteCompressSession(upstream: string, apiKey: string | undefined, sessionId: string): Promise<void> {
|
||||
try {
|
||||
const headers: Record<string, string> = {}
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
|
||||
await fetch(`${upstream}/api/sessions/${sessionId}`, {
|
||||
method: 'DELETE',
|
||||
headers,
|
||||
signal: AbortSignal.timeout(5000),
|
||||
})
|
||||
} catch { /* best-effort */ }
|
||||
}
|
||||
|
||||
// ─── Main Compressor ────────────────────────────────────
|
||||
|
||||
export class ChatContextCompressor {
|
||||
private config: CompressionConfig
|
||||
|
||||
constructor(opts?: {
|
||||
config?: Partial<CompressionConfig>
|
||||
}) {
|
||||
this.config = { ...DEFAULT_COMPRESSION_CONFIG, ...opts?.config }
|
||||
}
|
||||
|
||||
/**
|
||||
* Assemble and compress conversation history.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Check snapshot → if exists, assemble = summary + new messages after snapshot index
|
||||
* 2. If no snapshot → assemble = all messages
|
||||
* 3. Count tokens of assembled context
|
||||
* 4. Under threshold → return assembled as-is (no LLM call)
|
||||
* 5. Over threshold → LLM compress, keep last N messages, save new snapshot
|
||||
*/
|
||||
async compress(
|
||||
messages: ChatMessage[],
|
||||
upstream: string,
|
||||
apiKey: string | undefined,
|
||||
sessionId?: string,
|
||||
contextLength?: number,
|
||||
): Promise<CompressedResult> {
|
||||
const cl = contextLength || 200_000
|
||||
const triggerTokens = Math.floor(cl / 2)
|
||||
const total = messages.length
|
||||
|
||||
const makeMeta = (opts: Partial<CompressedResult['meta']> = {}): CompressedResult['meta'] => ({
|
||||
totalMessages: total,
|
||||
compressed: false,
|
||||
llmCompressed: false,
|
||||
summaryTokenEstimate: 0,
|
||||
verbatimCount: total,
|
||||
compressedStartIndex: -1,
|
||||
...opts,
|
||||
})
|
||||
|
||||
// ── Step 1: Check snapshot first ─────────────────────
|
||||
const snapshot = sessionId ? getCompressionSnapshot(sessionId) : null
|
||||
|
||||
if (snapshot) {
|
||||
const { summary: previousSummary, lastMessageIndex } = snapshot
|
||||
const newMessages = messages.slice(lastMessageIndex + 1)
|
||||
const summaryTokens = countTokens(SUMMARY_PREFIX + previousSummary)
|
||||
const newTokens = estimateMessagesTokens(newMessages)
|
||||
const assembledTokens = summaryTokens + newTokens
|
||||
|
||||
logger.info(
|
||||
'[context-compressor] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
|
||||
sessionId, lastMessageIndex, newMessages.length, assembledTokens, triggerTokens,
|
||||
)
|
||||
|
||||
// Under threshold → return summary + new messages, no LLM call
|
||||
if (assembledTokens <= triggerTokens) {
|
||||
const result: ChatMessage[] = [
|
||||
{ role: 'system', content: SUMMARY_PREFIX + '\n\n' + previousSummary },
|
||||
...newMessages,
|
||||
]
|
||||
return {
|
||||
messages: result,
|
||||
meta: makeMeta({
|
||||
compressed: true,
|
||||
llmCompressed: false,
|
||||
summaryTokenEstimate: summaryTokens,
|
||||
verbatimCount: newMessages.length,
|
||||
compressedStartIndex: lastMessageIndex,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
// Over threshold → incremental LLM compress
|
||||
return this.incrementalCompress(
|
||||
messages, snapshot, upstream, apiKey, sessionId!, makeMeta(),
|
||||
)
|
||||
}
|
||||
|
||||
// ── Step 2: No snapshot — check all messages ──────────
|
||||
const totalTokens = estimateMessagesTokens(messages)
|
||||
|
||||
logger.info(
|
||||
'[context-compressor] session=%s: no snapshot, %d messages, ~%d tokens (threshold %d)',
|
||||
sessionId, total, totalTokens, triggerTokens,
|
||||
)
|
||||
|
||||
if (totalTokens <= triggerTokens) {
|
||||
return { messages, meta: makeMeta() }
|
||||
}
|
||||
|
||||
// Over threshold → full LLM compress
|
||||
return this.fullCompress(messages, upstream, apiKey, sessionId!, makeMeta())
|
||||
}
|
||||
|
||||
private async incrementalCompress(
|
||||
messages: ChatMessage[],
|
||||
snapshot: { summary: string; lastMessageIndex: number },
|
||||
upstream: string,
|
||||
apiKey: string | undefined,
|
||||
sessionId: string,
|
||||
meta: CompressedResult['meta'],
|
||||
): Promise<CompressedResult> {
|
||||
const { summary: previousSummary, lastMessageIndex } = snapshot
|
||||
const total = messages.length
|
||||
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
|
||||
const newMessages = cleaned.slice(lastMessageIndex + 1)
|
||||
const tailCount = this.config.tailMessageCount
|
||||
|
||||
// Keep last N of new messages, compress the rest
|
||||
const tailStart = Math.max(0, newMessages.length - tailCount)
|
||||
const toCompress = newMessages.slice(0, tailStart)
|
||||
const tail = newMessages.slice(tailStart)
|
||||
|
||||
logger.info(
|
||||
'[context-compressor] [incremental-llm] compressing %d of %d new messages, keeping %d tail',
|
||||
toCompress.length, newMessages.length, tail.length,
|
||||
)
|
||||
|
||||
let summary: string | null = null
|
||||
try {
|
||||
const contentToSummarize = serializeForSummary(toCompress)
|
||||
const prompt = buildIncrementalPrompt(previousSummary, contentToSummarize, this.config.summaryBudget)
|
||||
const history = toCompress
|
||||
.filter(m => m.role === 'user' || m.role === 'assistant')
|
||||
.map(m => ({ role: m.role, content: m.content }))
|
||||
|
||||
const t0 = Date.now()
|
||||
summary = await callSummarizer(upstream, apiKey, prompt, history, this.config.summarizationTimeoutMs, previousSummary)
|
||||
logger.info('[context-compressor] incremental-llm done in %dms, %d chars', Date.now() - t0, summary.length)
|
||||
} catch (err: any) {
|
||||
logger.warn('[context-compressor] incremental-llm failed: %s — reusing previous summary', err.message)
|
||||
summary = previousSummary
|
||||
}
|
||||
|
||||
const result: ChatMessage[] = [
|
||||
{ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary },
|
||||
...tail,
|
||||
]
|
||||
|
||||
const newLastIndex = lastMessageIndex + tailStart
|
||||
if (sessionId) {
|
||||
saveCompressionSnapshot(sessionId, summary, newLastIndex, total)
|
||||
}
|
||||
|
||||
return {
|
||||
messages: result,
|
||||
meta: {
|
||||
...meta,
|
||||
compressed: true,
|
||||
llmCompressed: true,
|
||||
summaryTokenEstimate: countTokens(SUMMARY_PREFIX + summary),
|
||||
verbatimCount: tail.length,
|
||||
compressedStartIndex: newLastIndex,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
private async fullCompress(
|
||||
messages: ChatMessage[],
|
||||
upstream: string,
|
||||
apiKey: string | undefined,
|
||||
sessionId: string,
|
||||
meta: CompressedResult['meta'],
|
||||
): Promise<CompressedResult> {
|
||||
const total = messages.length
|
||||
const cleaned = pruneOldToolResults(messages, this.config.tailMessageCount)
|
||||
const tailCount = this.config.tailMessageCount
|
||||
|
||||
if (total <= tailCount) {
|
||||
return { messages: cleaned, meta }
|
||||
}
|
||||
|
||||
const tailStart = total - tailCount
|
||||
const toCompress = cleaned.slice(0, tailStart)
|
||||
const tail = cleaned.slice(tailStart)
|
||||
|
||||
logger.info(
|
||||
'[context-compressor] [full-llm] compressing messages 0-%d, keeping %d-%d',
|
||||
tailStart - 1, tailStart, total - 1,
|
||||
)
|
||||
|
||||
const contentToSummarize = serializeForSummary(toCompress)
|
||||
const prompt = buildFullPrompt(contentToSummarize, this.config.summaryBudget)
|
||||
const history = toCompress
|
||||
.filter(m => m.role === 'user' || m.role === 'assistant')
|
||||
.map(m => ({ role: m.role, content: m.content }))
|
||||
|
||||
let summary: string | null = null
|
||||
try {
|
||||
const t0 = Date.now()
|
||||
summary = await callSummarizer(upstream, apiKey, prompt, history, this.config.summarizationTimeoutMs)
|
||||
logger.info('[context-compressor] full-llm done in %dms, %d chars', Date.now() - t0, summary.length)
|
||||
} catch (err: any) {
|
||||
logger.warn('[context-compressor] full-llm failed: %s', err.message)
|
||||
}
|
||||
|
||||
const result: ChatMessage[] = []
|
||||
|
||||
if (summary) {
|
||||
result.push({ role: 'system', content: SUMMARY_PREFIX + '\n\n' + summary })
|
||||
if (sessionId) {
|
||||
saveCompressionSnapshot(sessionId, summary, tailStart - 1, total)
|
||||
}
|
||||
}
|
||||
|
||||
result.push(...tail)
|
||||
|
||||
return {
|
||||
messages: result,
|
||||
meta: {
|
||||
...meta,
|
||||
compressed: true,
|
||||
llmCompressed: !!summary,
|
||||
summaryTokenEstimate: summary ? countTokens(SUMMARY_PREFIX + summary) : 0,
|
||||
verbatimCount: tail.length,
|
||||
compressedStartIndex: tailStart - 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/** Remove snapshot for a session (e.g. when session is deleted) */
|
||||
static invalidateSnapshot(sessionId: string): void {
|
||||
deleteCompressionSnapshot(sessionId)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
import type { ChatRunSocket } from '../../services/hermes/chat-run-socket'
|
||||
|
||||
let chatRunServer: ChatRunSocket | null = null
|
||||
|
||||
export function setChatRunServer(server: ChatRunSocket): void {
|
||||
chatRunServer = server
|
||||
}
|
||||
|
||||
export function getChatRunServer(): ChatRunSocket | null {
|
||||
return chatRunServer
|
||||
}
|
||||
@@ -15,7 +15,7 @@ export function setRunSession(runId: string, sessionId: string): void {
|
||||
setTimeout(() => runSessionMap.delete(runId), 30 * 60 * 1000)
|
||||
}
|
||||
|
||||
function getSessionForRun(runId: string): string | undefined {
|
||||
export function getSessionForRun(runId: string): string | undefined {
|
||||
return runSessionMap.get(runId)
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,467 @@
|
||||
/**
|
||||
* Chat run via Socket.IO — namespace /chat-run.
|
||||
*
|
||||
* Replaces HTTP POST + SSE. Socket.IO decouples message handling
|
||||
* from connection lifecycle: the server continues streaming upstream
|
||||
* events even after the client disconnects or refreshes.
|
||||
*
|
||||
* Uses Socket.IO rooms keyed by session_id. On client reconnect,
|
||||
* the client emits 'resume' to rejoin its session room.
|
||||
*/
|
||||
import type { Server, Socket } from 'socket.io'
|
||||
import { EventSource } from 'eventsource'
|
||||
import { setRunSession, getSessionForRun } from '../../routes/hermes/proxy-handler'
|
||||
import { updateUsage } from '../../db/hermes/usage-store'
|
||||
import { getSessionDetailFromDb } from '../../db/hermes/sessions-db'
|
||||
import { getModelContextLength } from './model-context'
|
||||
import { ChatContextCompressor, countTokens, SUMMARY_PREFIX } from '../../lib/context-compressor'
|
||||
import { getCompressionSnapshot } from '../../db/hermes/compression-snapshot'
|
||||
import { logger } from '../logger'
|
||||
|
||||
const compressor = new ChatContextCompressor()
|
||||
|
||||
// --- In-flight run tracking ---
|
||||
|
||||
interface InFlightRun {
|
||||
runId: string
|
||||
abortController: AbortController
|
||||
}
|
||||
|
||||
// --- ChatRunSocket ---
|
||||
|
||||
export class ChatRunSocket {
|
||||
private nsp: ReturnType<Server['of']>
|
||||
private gatewayManager: any
|
||||
/** sessionId → InFlightRun */
|
||||
private activeRuns = new Map<string, InFlightRun>()
|
||||
/** sessionId → accumulated state events for reconnecting clients */
|
||||
private sessionStates = new Map<string, Array<{ event: string; data: any }>>()
|
||||
|
||||
constructor(io: Server, gatewayManager: any) {
|
||||
this.nsp = io.of('/chat-run')
|
||||
this.gatewayManager = gatewayManager
|
||||
}
|
||||
|
||||
init() {
|
||||
this.nsp.use(this.authMiddleware.bind(this))
|
||||
this.nsp.on('connection', this.onConnection.bind(this))
|
||||
logger.info('[chat-run-socket] Socket.IO ready at /chat-run')
|
||||
}
|
||||
|
||||
// --- Auth middleware ---
|
||||
|
||||
private async authMiddleware(socket: Socket, next: (err?: Error) => void) {
|
||||
const token = socket.handshake.auth?.token as string | undefined
|
||||
if (!process.env.AUTH_DISABLED && process.env.AUTH_DISABLED !== '1') {
|
||||
const { getToken } = await import('../auth')
|
||||
const serverToken = await getToken()
|
||||
if (serverToken && token !== serverToken) {
|
||||
return next(new Error('Authentication failed'))
|
||||
}
|
||||
}
|
||||
next()
|
||||
}
|
||||
|
||||
// --- Connection handler ---
|
||||
|
||||
private onConnection(socket: Socket) {
|
||||
const profile = (socket.handshake.query?.profile as string) || 'default'
|
||||
|
||||
socket.on('run', async (data: {
|
||||
input: string
|
||||
session_id?: string
|
||||
model?: string
|
||||
instructions?: string
|
||||
}) => {
|
||||
await this.handleRun(socket, data, profile)
|
||||
})
|
||||
|
||||
socket.on('resume', (data: { session_id?: string }) => {
|
||||
if (data.session_id) {
|
||||
const sid = data.session_id
|
||||
const room = `session:${sid}`
|
||||
socket.join(room)
|
||||
|
||||
// Replay all accumulated state events for this session
|
||||
const states = this.sessionStates.get(sid)
|
||||
if (states) {
|
||||
for (const state of states) {
|
||||
socket.emit(state.event, { ...state.data, session_id: sid })
|
||||
}
|
||||
logger.info('[chat-run-socket] replayed %d state events for reconnecting client on session %s', states.length, sid)
|
||||
}
|
||||
|
||||
logger.info('[chat-run-socket] socket %s resumed session %s (active: %s)', socket.id, sid, this.activeRuns.has(sid))
|
||||
}
|
||||
})
|
||||
|
||||
socket.on('abort', (data: { session_id?: string }) => {
|
||||
if (data.session_id) {
|
||||
this.handleAbort(data.session_id)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// --- Run handler ---
|
||||
|
||||
private async handleRun(
|
||||
socket: Socket,
|
||||
data: { input: string; session_id?: string; model?: string; instructions?: string },
|
||||
profile: string,
|
||||
) {
|
||||
const { input, session_id, model, instructions } = data
|
||||
const upstream = (process.env.UPSTREAM || 'http://127.0.0.1:8642').replace(/\/$/, '')
|
||||
const apiKey = this.gatewayManager.getApiKey(profile) || undefined
|
||||
|
||||
// Join session room — events go to room, survives socket disconnect
|
||||
if (session_id) {
|
||||
socket.join(`session:${session_id}`)
|
||||
}
|
||||
|
||||
// Emit helper: tag every payload with session_id
|
||||
const emit = (event: string, payload: any) => {
|
||||
const tagged = session_id ? { ...payload, session_id } : payload
|
||||
if (session_id) {
|
||||
this.nsp.to(`session:${session_id}`).emit(event, tagged)
|
||||
} else if (socket.connected) {
|
||||
socket.emit(event, tagged)
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Build upstream request body
|
||||
const body: Record<string, any> = { input }
|
||||
if (session_id) body.session_id = session_id
|
||||
if (model) body.model = model
|
||||
if (instructions) body.instructions = instructions
|
||||
|
||||
// Build conversation_history from DB if session_id is provided
|
||||
if (session_id) {
|
||||
try {
|
||||
const detail = await getSessionDetailFromDb(session_id)
|
||||
if (detail?.messages?.length) {
|
||||
let history: Array<{
|
||||
role: string
|
||||
content: string
|
||||
tool_calls?: any[]
|
||||
tool_call_id?: string
|
||||
name?: string
|
||||
}> = detail.messages
|
||||
.filter(m => (m.role === 'user' || m.role === 'assistant' || m.role === 'tool') && m.content !== undefined)
|
||||
.map(m => {
|
||||
const msg: any = { role: m.role, content: m.content || '' }
|
||||
if (m.tool_calls?.length) msg.tool_calls = m.tool_calls
|
||||
if (m.tool_call_id) msg.tool_call_id = m.tool_call_id
|
||||
if (m.tool_name) msg.name = m.tool_name
|
||||
return msg
|
||||
})
|
||||
|
||||
// Context compression with snapshot awareness
|
||||
const contextLength = getModelContextLength(profile)
|
||||
const triggerTokens = Math.floor(contextLength / 2)
|
||||
|
||||
// Step 1: Check existing snapshot — if present, assemble summary + new messages
|
||||
const snapshot = session_id ? getCompressionSnapshot(session_id) : null
|
||||
if (snapshot) {
|
||||
const newMessages = history.slice(snapshot.lastMessageIndex + 1)
|
||||
const summaryTokens = countTokens(SUMMARY_PREFIX + snapshot.summary)
|
||||
const newTokens = newMessages.reduce((sum, m) => sum + countTokens(m.content), 0)
|
||||
const assembledTokens = summaryTokens + newTokens
|
||||
logger.info('[context-compress] session=%s: snapshot at %d, %d new messages, assembled ~%d tokens (threshold %d)',
|
||||
session_id, snapshot.lastMessageIndex, newMessages.length, assembledTokens, triggerTokens)
|
||||
if (assembledTokens <= triggerTokens) {
|
||||
// Under threshold — use assembled context directly, no LLM call needed
|
||||
history = [
|
||||
{ role: 'user', content: SUMMARY_PREFIX + '\n\n' + snapshot.summary },
|
||||
...newMessages,
|
||||
]
|
||||
} else {
|
||||
// Over threshold — needs incremental LLM compression
|
||||
const beforeTokens = assembledTokens
|
||||
this.pushState(session_id, 'compression.started', {
|
||||
event: 'compression.started',
|
||||
message_count: newMessages.length,
|
||||
token_count: beforeTokens,
|
||||
})
|
||||
emit('compression.started', {
|
||||
event: 'compression.started',
|
||||
message_count: newMessages.length,
|
||||
token_count: beforeTokens,
|
||||
})
|
||||
|
||||
try {
|
||||
const result = await compressor.compress(
|
||||
history, upstream, apiKey, session_id, contextLength,
|
||||
)
|
||||
|
||||
this.replaceState(session_id, 'compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: result.meta.compressed,
|
||||
llmCompressed: result.meta.llmCompressed,
|
||||
totalMessages: result.meta.totalMessages,
|
||||
resultMessages: result.messages.length,
|
||||
beforeTokens,
|
||||
afterTokens: result.messages.reduce((sum, m) => sum + countTokens(m.content), 0),
|
||||
summaryTokens: result.meta.summaryTokenEstimate,
|
||||
verbatimCount: result.meta.verbatimCount,
|
||||
compressedStartIndex: result.meta.compressedStartIndex,
|
||||
})
|
||||
logger.info('[context-compress] AFTER session=%s: %d messages, ~%d tokens (was %d)', session_id, result.messages.length, result.messages.reduce((sum, m) => sum + countTokens(m.content), 0), beforeTokens)
|
||||
|
||||
emit('compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: result.meta.compressed,
|
||||
llmCompressed: result.meta.llmCompressed,
|
||||
totalMessages: result.meta.totalMessages,
|
||||
resultMessages: result.messages.length,
|
||||
beforeTokens,
|
||||
afterTokens: result.messages.reduce((sum, m) => sum + countTokens(m.content), 0),
|
||||
summaryTokens: result.meta.summaryTokenEstimate,
|
||||
verbatimCount: result.meta.verbatimCount,
|
||||
compressedStartIndex: result.meta.compressedStartIndex,
|
||||
})
|
||||
|
||||
history = result.messages.map(m => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
tool_calls: m.tool_calls,
|
||||
tool_call_id: m.tool_call_id,
|
||||
name: m.name,
|
||||
}))
|
||||
} catch (err: any) {
|
||||
this.replaceState(session_id, 'compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: false,
|
||||
totalMessages: newMessages.length,
|
||||
resultMessages: newMessages.length,
|
||||
beforeTokens,
|
||||
afterTokens: beforeTokens,
|
||||
summaryTokens: 0,
|
||||
verbatimCount: newMessages.length,
|
||||
compressedStartIndex: -1,
|
||||
error: err.message,
|
||||
})
|
||||
logger.warn(err, '[chat-run-socket] compression failed for session %s, using assembled context', session_id)
|
||||
emit('compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: false,
|
||||
totalMessages: newMessages.length,
|
||||
resultMessages: newMessages.length,
|
||||
beforeTokens,
|
||||
afterTokens: beforeTokens,
|
||||
summaryTokens: 0,
|
||||
verbatimCount: newMessages.length,
|
||||
compressedStartIndex: -1,
|
||||
error: err.message,
|
||||
})
|
||||
}
|
||||
}
|
||||
} else if (history.length > 4) {
|
||||
// No snapshot — check if raw history exceeds threshold
|
||||
const beforeTokens = history.reduce((sum, m) => sum + countTokens(m.content), 0)
|
||||
|
||||
if (beforeTokens <= triggerTokens) {
|
||||
// Under threshold — use raw history as-is
|
||||
logger.info('[context-compress] session=%s: %d messages, ~%d tokens — under threshold, skip', session_id, history.length, beforeTokens)
|
||||
} else {
|
||||
// Over threshold — full LLM compression
|
||||
logger.info('[context-compress] BEFORE session=%s: %d messages, ~%d tokens (threshold %d)', session_id, history.length, beforeTokens, triggerTokens)
|
||||
|
||||
this.pushState(session_id, 'compression.started', {
|
||||
event: 'compression.started',
|
||||
message_count: history.length,
|
||||
token_count: beforeTokens,
|
||||
})
|
||||
emit('compression.started', {
|
||||
event: 'compression.started',
|
||||
message_count: history.length,
|
||||
token_count: beforeTokens,
|
||||
})
|
||||
|
||||
try {
|
||||
const result = await compressor.compress(
|
||||
history, upstream, apiKey, session_id, contextLength,
|
||||
)
|
||||
|
||||
this.replaceState(session_id, 'compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: result.meta.compressed,
|
||||
llmCompressed: result.meta.llmCompressed,
|
||||
totalMessages: result.meta.totalMessages,
|
||||
resultMessages: result.messages.length,
|
||||
beforeTokens,
|
||||
afterTokens: result.messages.reduce((sum, m) => sum + countTokens(m.content), 0),
|
||||
summaryTokens: result.meta.summaryTokenEstimate,
|
||||
verbatimCount: result.meta.verbatimCount,
|
||||
compressedStartIndex: result.meta.compressedStartIndex,
|
||||
})
|
||||
logger.info('[context-compress] AFTER session=%s: %d messages, ~%d tokens (was %d)', session_id, result.messages.length, result.messages.reduce((sum, m) => sum + countTokens(m.content), 0), beforeTokens)
|
||||
|
||||
emit('compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: result.meta.compressed,
|
||||
llmCompressed: result.meta.llmCompressed,
|
||||
totalMessages: result.meta.totalMessages,
|
||||
resultMessages: result.messages.length,
|
||||
beforeTokens,
|
||||
afterTokens: result.messages.reduce((sum, m) => sum + countTokens(m.content), 0),
|
||||
summaryTokens: result.meta.summaryTokenEstimate,
|
||||
verbatimCount: result.meta.verbatimCount,
|
||||
compressedStartIndex: result.meta.compressedStartIndex,
|
||||
})
|
||||
|
||||
history = result.messages.map(m => ({
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
tool_calls: m.tool_calls,
|
||||
tool_call_id: m.tool_call_id,
|
||||
name: m.name,
|
||||
}))
|
||||
} catch (err: any) {
|
||||
this.replaceState(session_id, 'compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: false,
|
||||
totalMessages: history.length,
|
||||
resultMessages: history.length,
|
||||
beforeTokens,
|
||||
afterTokens: beforeTokens,
|
||||
summaryTokens: 0,
|
||||
verbatimCount: history.length,
|
||||
compressedStartIndex: -1,
|
||||
error: err.message,
|
||||
})
|
||||
logger.warn(err, '[chat-run-socket] compression failed for session %s, using raw history', session_id)
|
||||
emit('compression.completed', {
|
||||
event: 'compression.completed',
|
||||
compressed: false,
|
||||
totalMessages: history.length,
|
||||
resultMessages: history.length,
|
||||
beforeTokens,
|
||||
afterTokens: beforeTokens,
|
||||
summaryTokens: 0,
|
||||
verbatimCount: history.length,
|
||||
compressedStartIndex: -1,
|
||||
error: err.message,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
body.conversation_history = history
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(err, '[chat-run-socket] failed to load conversation history for session %s', session_id)
|
||||
}
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
|
||||
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body),
|
||||
signal: AbortSignal.timeout(120_000),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
const text = await res.text().catch(() => '')
|
||||
emit('run.failed', { event: 'run.failed', error: `Upstream ${res.status}: ${text}` })
|
||||
return
|
||||
}
|
||||
|
||||
const runData = await res.json() as any
|
||||
const runId = runData.run_id
|
||||
if (!runId) {
|
||||
emit('run.failed', { event: 'run.failed', error: 'No run_id in upstream response' })
|
||||
return
|
||||
}
|
||||
|
||||
if (session_id) {
|
||||
setRunSession(runId, session_id)
|
||||
}
|
||||
|
||||
const abortController = new AbortController()
|
||||
if (session_id) {
|
||||
this.activeRuns.set(session_id, { runId, abortController })
|
||||
}
|
||||
|
||||
emit('run.started', { event: 'run.started', run_id: runId, status: runData.status })
|
||||
|
||||
// Stream upstream events via EventSource — survives socket disconnect
|
||||
const eventsUrl = new URL(`${upstream}/v1/runs/${runId}/events`)
|
||||
if (apiKey) eventsUrl.searchParams.set('token', apiKey)
|
||||
|
||||
const source = new EventSource(eventsUrl.toString())
|
||||
|
||||
source.onmessage = (event: MessageEvent) => {
|
||||
try {
|
||||
const parsed = JSON.parse(event.data as string)
|
||||
|
||||
// Intercept run.completed for usage tracking
|
||||
if (parsed.event === 'run.completed' && parsed.usage && parsed.run_id) {
|
||||
const sid = getSessionForRun(parsed.run_id)
|
||||
if (sid) {
|
||||
updateUsage(sid, parsed.usage.input_tokens, parsed.usage.output_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
emit(parsed.event || 'message', parsed)
|
||||
|
||||
if (parsed.event === 'run.completed' || parsed.event === 'run.failed') {
|
||||
source.close()
|
||||
if (session_id) this.markCompleted(session_id, { event: parsed.event, run_id: parsed.run_id })
|
||||
}
|
||||
} catch { /* not JSON, skip */ }
|
||||
}
|
||||
|
||||
source.onerror = () => {
|
||||
source.close()
|
||||
emit('run.failed', { event: 'run.failed', error: 'EventSource connection lost' })
|
||||
if (session_id) this.markCompleted(session_id, { event: 'run.failed' })
|
||||
}
|
||||
} catch (err: any) {
|
||||
emit('run.failed', { event: 'run.failed', error: err.message })
|
||||
if (session_id) this.markCompleted(session_id, { event: 'run.failed' })
|
||||
}
|
||||
}
|
||||
|
||||
// --- Abort handler ---
|
||||
|
||||
private handleAbort(sessionId: string) {
|
||||
const run = this.activeRuns.get(sessionId)
|
||||
if (run) {
|
||||
run.abortController.abort()
|
||||
this.markCompleted(sessionId, { event: 'run.failed', run_id: run.runId })
|
||||
}
|
||||
}
|
||||
|
||||
/** Mark a session run as completed/failed so reconnecting clients get notified */
|
||||
private markCompleted(sessionId: string, info: { event: string; run_id?: string }) {
|
||||
this.activeRuns.delete(sessionId)
|
||||
this.pushState(sessionId, info.event, { event: info.event, run_id: info.run_id })
|
||||
// Auto-cleanup after 30s — enough time for a page refresh
|
||||
setTimeout(() => this.sessionStates.delete(sessionId), 30_000)
|
||||
}
|
||||
|
||||
/** Append a state event for a session (used for replay on reconnect) */
|
||||
private pushState(sessionId: string, event: string, data: any) {
|
||||
if (!this.sessionStates.has(sessionId)) {
|
||||
this.sessionStates.set(sessionId, [])
|
||||
}
|
||||
this.sessionStates.get(sessionId)!.push({ event, data })
|
||||
}
|
||||
|
||||
/** Replace the last state with the same event name, or append if different */
|
||||
private replaceState(sessionId: string, event: string, data: any) {
|
||||
const states = this.sessionStates.get(sessionId)
|
||||
if (states) {
|
||||
const idx = states.findIndex(s => s.event === event)
|
||||
if (idx >= 0) {
|
||||
states[idx] = { event, data }
|
||||
return
|
||||
}
|
||||
}
|
||||
this.pushState(sessionId, event, data)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user