import { beforeEach, describe, expect, it, vi } from 'vitest' const getSessionDetailMock = vi.fn() const getSessionMock = vi.fn() const getCompressionSnapshotMock = vi.fn() const getModelContextLengthMock = vi.fn() const calcAndUpdateUsageMock = vi.fn() const estimateUsageTokensFromMessagesMock = vi.fn() const updateMessageContextTokenUsageMock = vi.fn((sid: string, state: any, emit: any, messageTokens: number, usage?: { inputTokens: number; outputTokens: number }) => { state.contextTokens = messageTokens emit('usage.updated', { event: 'usage.updated', session_id: sid, inputTokens: usage?.inputTokens ?? state.inputTokens ?? 0, outputTokens: usage?.outputTokens ?? state.outputTokens ?? 0, contextTokens: messageTokens, }) return messageTokens }) const compressorCompressMock = vi.fn() const readConfigYamlForProfileMock = vi.fn() const compressorConstructorMock = vi.fn() vi.mock('../../packages/server/src/db/hermes/session-store', () => ({ getSessionDetail: getSessionDetailMock, getSession: getSessionMock, })) vi.mock('../../packages/server/src/db/hermes/compression-snapshot', () => ({ getCompressionSnapshot: getCompressionSnapshotMock, })) vi.mock('../../packages/server/src/lib/context-compressor', () => ({ SUMMARY_PREFIX: '[Previous context summary]', ChatContextCompressor: class { constructor(opts?: any) { compressorConstructorMock(opts) } compress = compressorCompressMock }, })) vi.mock('../../packages/server/src/services/hermes/model-context', () => ({ getModelContextLength: getModelContextLengthMock, })) vi.mock('../../packages/server/src/services/config-helpers', () => ({ readConfigYamlForProfile: readConfigYamlForProfileMock, })) vi.mock('../../packages/server/src/services/logger', () => ({ logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn(), }, bridgeLogger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn(), }, })) vi.mock('../../packages/server/src/services/hermes/run-chat/usage', () => ({ calcAndUpdateUsage: calcAndUpdateUsageMock, estimateUsageTokensFromMessages: estimateUsageTokensFromMessagesMock, updateMessageContextTokenUsage: updateMessageContextTokenUsageMock, })) vi.mock('../../packages/server/src/services/hermes/run-chat/message-format', () => ({ isAssistantMessageSendable: vi.fn(() => true), })) describe('run chat compression trigger', () => { beforeEach(() => { getSessionDetailMock.mockReset() getSessionMock.mockReset() getCompressionSnapshotMock.mockReset() getModelContextLengthMock.mockReset() calcAndUpdateUsageMock.mockReset() estimateUsageTokensFromMessagesMock.mockReset() updateMessageContextTokenUsageMock.mockClear() compressorCompressMock.mockReset() compressorConstructorMock.mockReset() readConfigYamlForProfileMock.mockReset() getSessionMock.mockReturnValue({ id: 'session-1', profile: 'default' }) getModelContextLengthMock.mockReturnValue(200_000) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 }) estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 0, outputTokens: 0 }) getCompressionSnapshotMock.mockReturnValue(null) readConfigYamlForProfileMock.mockResolvedValue({}) }) it('does not compress long low-token history just because it has more than 150 messages', async () => { const messages = Array.from({ length: 152 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 151 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `m${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), ) expect(history).toHaveLength(151) expect(history[0]).toEqual({ role: 'user', content: 'm0' }) expect(history.at(-1)).toEqual({ role: 'user', content: 'm150' }) expect(compressorCompressMock).not.toHaveBeenCalled() }) it('uses configured threshold before triggering compression', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) readConfigYamlForProfileMock.mockResolvedValue({ compression: { threshold: 0.25, target_ratio: 0.1, protect_last_n: 7, protect_first_n: 2 }, }) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 60_000, outputTokens: 0 }) compressorCompressMock.mockResolvedValue({ messages: [{ role: 'user', content: 'compressed' }], meta: { compressed: true, llmCompressed: true, totalMessages: 9, summaryTokenEstimate: 1, verbatimCount: 0, compressedStartIndex: 0, }, }) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), ) expect(history).toEqual([{ role: 'user', content: 'compressed' }]) expect(compressorCompressMock).toHaveBeenCalledWith( expect.any(Array), 'http://upstream', undefined, 'session-1', expect.objectContaining({ profile: 'default' }), ) }) it('uses full context estimates for compression threshold decisions', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 }) compressorCompressMock.mockResolvedValue({ messages: [{ role: 'user', content: 'compressed by full context estimate' }], meta: { compressed: true, llmCompressed: true, totalMessages: 9, summaryTokenEstimate: 1, verbatimCount: 0, compressedStartIndex: 0, }, }) const emit = vi.fn() const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, emit, new Map(), {}, vi.fn(async () => 120_000), ) expect(history).toEqual([{ role: 'user', content: 'compressed by full context estimate' }]) expect(compressorCompressMock).toHaveBeenCalledTimes(1) expect(updateMessageContextTokenUsageMock).toHaveBeenCalledWith( 'session-1', expect.any(Object), emit, 1_000, { inputTokens: 1_000, outputTokens: 0 }, ) }) it('emits full context token usage when the full estimate is under threshold', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 900 }) const emit = vi.fn() const contextTokenEstimator = vi.fn(async () => 19_379) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, emit, new Map(), {}, contextTokenEstimator, ) expect(history).toHaveLength(9) expect(contextTokenEstimator).toHaveBeenCalledWith(expect.arrayContaining([{ role: 'user', content: 'message 0' }])) expect(emit).toHaveBeenCalledWith('usage.updated', expect.objectContaining({ event: 'usage.updated', session_id: 'session-1', inputTokens: 1_000, outputTokens: 900, contextTokens: 19_379, })) expect(compressorCompressMock).not.toHaveBeenCalled() }) it('throws when fixed prompt and tool schemas exceed threshold before any history exists', async () => { getSessionDetailMock.mockReturnValue({ messages: [] }) const emit = vi.fn() const { buildCompressedHistory, ContextWindowTooSmallError } = await import('../../packages/server/src/services/hermes/run-chat/compression') await expect(buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, emit, new Map(), {}, vi.fn(async () => 120_000), )).rejects.toBeInstanceOf(ContextWindowTooSmallError) expect(emit).not.toHaveBeenCalledWith('usage.updated', expect.anything()) expect(compressorCompressMock).not.toHaveBeenCalled() }) it('throws instead of compressing when full context is over threshold but history is too short', async () => { const messages = Array.from({ length: 5 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 4 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 1_000, outputTokens: 0 }) const { buildCompressedHistory, ContextWindowTooSmallError } = await import('../../packages/server/src/services/hermes/run-chat/compression') await expect(buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), {}, vi.fn(async () => 120_000), )).rejects.toBeInstanceOf(ContextWindowTooSmallError) expect(compressorCompressMock).not.toHaveBeenCalled() }) it('merges partial compression config with defaults', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) readConfigYamlForProfileMock.mockResolvedValue({ compression: { protect_last_n: 5 }, }) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 120_000, outputTokens: 0 }) compressorCompressMock.mockResolvedValue({ messages: [{ role: 'user', content: 'compressed' }], meta: { compressed: true, llmCompressed: true, totalMessages: 9, summaryTokenEstimate: 1, verbatimCount: 0, compressedStartIndex: 0, }, }) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), ) expect(compressorConstructorMock).toHaveBeenCalledWith({ config: { triggerTokens: 100_000, summaryBudget: 40_000, headMessageCount: 3, tailMessageCount: 5, }, }) expect(compressorCompressMock).toHaveBeenCalledTimes(1) }) it('uses stale snapshot summary plus safe tail instead of full history when under threshold', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) getCompressionSnapshotMock.mockReturnValue({ summary: 'old summary', lastMessageIndex: 99, messageCountAtTime: 100, }) readConfigYamlForProfileMock.mockResolvedValue({ compression: { protect_first_n: 2, protect_last_n: 3 }, }) estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 1_000, outputTokens: 0 }) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), ) expect(history.map(m => m.content)).toEqual([ 'message 0', 'message 1', '[Previous context summary]\n\nold summary', 'message 6', 'message 7', 'message 8', ]) expect(compressorCompressMock).not.toHaveBeenCalled() }) it('compresses stale snapshot safe tail instead of full history when stale assembly exceeds threshold', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) getCompressionSnapshotMock.mockReturnValue({ summary: 'old summary', lastMessageIndex: 99, messageCountAtTime: 100, }) readConfigYamlForProfileMock.mockResolvedValue({ compression: { protect_first_n: 2, protect_last_n: 3 }, }) estimateUsageTokensFromMessagesMock.mockReturnValue({ inputTokens: 120_000, outputTokens: 0 }) compressorCompressMock.mockResolvedValue({ messages: [{ role: 'user', content: 'updated stale compressed' }], meta: { compressed: true, llmCompressed: true, totalMessages: 9, summaryTokenEstimate: 1, verbatimCount: 0, compressedStartIndex: 8, }, }) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), ) expect(history).toEqual([{ role: 'user', content: 'updated stale compressed' }]) expect(compressorCompressMock).toHaveBeenCalledWith( expect.arrayContaining([{ role: 'user', content: 'message 0' }]), 'http://upstream', undefined, 'session-1', expect.objectContaining({ profile: 'default' }), ) }) it('does not compress when compression is disabled', async () => { const messages = Array.from({ length: 10 }, (_, index) => ({ id: index + 1, session_id: 'session-1', role: index === 9 ? 'user' : index % 2 === 0 ? 'user' : 'assistant', content: `message ${index}`, timestamp: index + 1, tool_call_id: null, tool_calls: null, tool_name: null, finish_reason: null, reasoning_content: null, })) getSessionDetailMock.mockReturnValue({ messages }) readConfigYamlForProfileMock.mockResolvedValue({ compression: { enabled: false, threshold: 0.01 }, }) calcAndUpdateUsageMock.mockResolvedValue({ inputTokens: 180_000, outputTokens: 0 }) const { buildCompressedHistory } = await import('../../packages/server/src/services/hermes/run-chat/compression') const history = await buildCompressedHistory( 'session-1', 'default', 'http://upstream', undefined, vi.fn(), new Map(), ) expect(history).toHaveLength(9) expect(compressorCompressMock).not.toHaveBeenCalled() expect(calcAndUpdateUsageMock).not.toHaveBeenCalled() }) })