mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 11:10:18 +00:00
fix: context window indicator overflow (#1356)
- api/streaming.py SSE payload now falls back to agent.model_metadata.get_model_context_length when compressor doesn't supply context_length (mirrors the session-save fallback shipped in v0.50.247). - api/streaming.py also falls back to s.last_prompt_tokens to avoid using the cumulative input_tokens counter. - static/ui.js tracks rawPct separately from pct and shows '(context exceeded)' tooltip when rawPct > 100 instead of misleading '100% used (0% left)'. - static/messages.js clears 'Uploading...' composer status after upload completes. Co-authored-by: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
This commit is contained in:
@@ -2241,6 +2241,28 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
|
||||
usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
|
||||
usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
|
||||
# Fallback: when the compressor is absent or reports context_length=0,
|
||||
# resolve the model's context window from metadata so the UI indicator
|
||||
# shows the correct percentage rather than overflowing against the 128K
|
||||
# JS default. Mirrors the session-save fallback above (lines ~2205-2217).
|
||||
if not usage.get('context_length'):
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length as _get_cl
|
||||
_fb_cl = _get_cl(
|
||||
getattr(agent, 'model', resolved_model or '') or '',
|
||||
getattr(agent, 'base_url', '') or '',
|
||||
)
|
||||
if _fb_cl:
|
||||
usage['context_length'] = _fb_cl
|
||||
except Exception:
|
||||
pass
|
||||
# Fallback: when last_prompt_tokens is missing (no compressor), use the
|
||||
# session-persisted value rather than letting the frontend fall back to
|
||||
# the cumulative input_tokens counter, which overflows for long sessions.
|
||||
if not usage.get('last_prompt_tokens'):
|
||||
_sess_lpt = getattr(s, 'last_prompt_tokens', 0) or 0
|
||||
if _sess_lpt:
|
||||
usage['last_prompt_tokens'] = _sess_lpt
|
||||
# (reasoning trace already attached + saved above, before s.save())
|
||||
# Leftover-steer delivery: if a /steer was queued (via
|
||||
# api/chat/steer) but the agent finished its turn before
|
||||
|
||||
@@ -141,6 +141,10 @@ async function send(){
|
||||
let uploaded=[];
|
||||
try{uploaded=await uploadPendingFiles();}
|
||||
catch(e){if(!text){setComposerStatus(`Upload error: ${e.message}`);return;}}
|
||||
// Clear the uploading status now that upload is done — if we don't clear here
|
||||
// it stays visible for the entire duration of the agent stream, since
|
||||
// setComposerStatus('') is only called in setBusy(false), not setBusy(true).
|
||||
setComposerStatus('');
|
||||
|
||||
const uploadedNames=uploaded.map(u=>u.name||u);
|
||||
const uploadedPaths=uploaded.map(u=>u&&u.is_image?(u.name||u.filename||u):(u.path||u.name||u));
|
||||
|
||||
+4
-2
@@ -864,7 +864,9 @@ function _syncCtxIndicator(usage){
|
||||
}
|
||||
if(wrap) wrap.style.display='';
|
||||
const hasPromptTok=!!promptTok;
|
||||
const pct=hasPromptTok?Math.min(100,Math.round((promptTok/ctxWindow)*100)):0;
|
||||
const rawPct=hasPromptTok?Math.round((promptTok/ctxWindow)*100):0;
|
||||
const pct=Math.min(100,rawPct);
|
||||
const overflowed=rawPct>100;
|
||||
const ring=$('ctxRingValue');
|
||||
const center=$('ctxPercent');
|
||||
const usageLine=$('ctxTooltipUsage');
|
||||
@@ -908,7 +910,7 @@ function _syncCtxIndicator(usage){
|
||||
if(!hasExplicitCtx&&hasPromptTok) label+=' (est. 128K)';
|
||||
if(cost) label+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
|
||||
el.setAttribute('aria-label',label);
|
||||
if(usageLine) usageLine.textContent=hasPromptTok?`${pct}% used (${Math.max(0,100-pct)}% left)`:`${_fmtTokens(totalTok)} tokens used`;
|
||||
if(usageLine) usageLine.textContent=hasPromptTok?(overflowed?`${rawPct}% used (context exceeded)`:`${pct}% used (${100-pct}% left)`):`${_fmtTokens(totalTok)} tokens used`;
|
||||
if(tokensLine) tokensLine.textContent=hasPromptTok?`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)} tokens used`:`In: ${_fmtTokens(usage.input_tokens||0)} \u00b7 Out: ${_fmtTokens(usage.output_tokens||0)}`;
|
||||
const threshold=usage.threshold_tokens||0;
|
||||
if(thresholdLine){
|
||||
|
||||
Reference in New Issue
Block a user