mirror of
https://github.com/EKKOLearnAI/hermes-web-ui.git
synced 2026-05-26 22:10:15 +00:00
fix(chat): remove upstream usage values and pre-send inputTokens overwrite
- Remove all evt.usage/parsed.usage references, only use local countTokens - Remove pre-send inputTokens calculation that was overwriting resume value with compressed context, causing incorrect context drop (70k → 40k) - run.completed now recalculates inputTokens with current snapshot + full messages including new ones from this run Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -783,13 +783,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (lastMsg?.isStreaming) {
|
||||
updateMessage(sid, lastMsg.id, { isStreaming: false })
|
||||
}
|
||||
if (evt.usage) {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (target) {
|
||||
target.inputTokens = evt.usage.input_tokens
|
||||
target.outputTokens = evt.usage.output_tokens
|
||||
}
|
||||
}
|
||||
// Server-computed usage (local countTokens, snapshot-aware)
|
||||
if ((evt as any).inputTokens != null) {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
@@ -1101,13 +1094,6 @@ export const useChatStore = defineStore('chat', () => {
|
||||
if (lastMsg?.isStreaming) {
|
||||
updateMessage(sid, lastMsg.id, { isStreaming: false })
|
||||
}
|
||||
if (evt.usage) {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
if (target) {
|
||||
target.inputTokens = evt.usage.input_tokens
|
||||
target.outputTokens = evt.usage.output_tokens
|
||||
}
|
||||
}
|
||||
// Server-computed usage (local countTokens, snapshot-aware)
|
||||
if ((evt as any).inputTokens != null) {
|
||||
const target = sessions.value.find(s => s.id === sid)
|
||||
|
||||
@@ -436,15 +436,6 @@ export class ChatRunSocket {
|
||||
const headers: Record<string, string> = { 'Content-Type': 'application/json' }
|
||||
if (apiKey) headers['Authorization'] = `Bearer ${apiKey}`
|
||||
|
||||
// Save input token count after compression (the actual context sent to model)
|
||||
if (session_id && body.conversation_history) {
|
||||
const state = this.sessionMap.get(session_id)
|
||||
if (state) {
|
||||
state.inputTokens = (body.conversation_history as any[]).reduce(
|
||||
(sum, m) => sum + countTokens(m.content || ''), 0)
|
||||
}
|
||||
}
|
||||
|
||||
const res = await fetch(`${upstream}/v1/runs`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
@@ -568,7 +559,7 @@ export class ChatRunSocket {
|
||||
}
|
||||
}
|
||||
|
||||
// Track usage — recalculate with current snapshot + full messages
|
||||
// Track usage — self-calculate with countTokens + snapshot
|
||||
if (parsed.event === 'run.completed') {
|
||||
const sid = session_id
|
||||
if (sid) {
|
||||
|
||||
Reference in New Issue
Block a user