fix: context window indicator overflow (#1356)

- api/streaming.py SSE payload now falls back to agent.model_metadata.get_model_context_length when compressor doesn't supply context_length (mirrors the session-save fallback shipped in v0.50.247). - api/streaming.py also falls back to s.last_prompt_tokens to avoid using the cumulative input_tokens counter. - static/ui.js tracks rawPct separately from pct and shows '(context exceeded)' tooltip when rawPct > 100 instead of misleading '100% used (0% left)'. - static/messages.js clears 'Uploading...' composer status after upload completes. Co-authored-by: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
2026-05-25 11:10:18 +00:00 · 2026-04-30 21:32:45 +00:00
parent 9303636dd9
commit bbdacdca5c
3 changed files with 30 additions and 2 deletions
@@ -2241,6 +2241,28 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                usage['context_length'] = getattr(_cc, 'context_length', 0) or 0
                usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0
                usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0
+            # Fallback: when the compressor is absent or reports context_length=0,
+            # resolve the model's context window from metadata so the UI indicator
+            # shows the correct percentage rather than overflowing against the 128K
+            # JS default.  Mirrors the session-save fallback above (lines ~2205-2217).
+            if not usage.get('context_length'):
+                try:
+                    from agent.model_metadata import get_model_context_length as _get_cl
+                    _fb_cl = _get_cl(
+                        getattr(agent, 'model', resolved_model or '') or '',
+                        getattr(agent, 'base_url', '') or '',
+                    )
+                    if _fb_cl:
+                        usage['context_length'] = _fb_cl
+                except Exception:
+                    pass
+            # Fallback: when last_prompt_tokens is missing (no compressor), use the
+            # session-persisted value rather than letting the frontend fall back to
+            # the cumulative input_tokens counter, which overflows for long sessions.
+            if not usage.get('last_prompt_tokens'):
+                _sess_lpt = getattr(s, 'last_prompt_tokens', 0) or 0
+                if _sess_lpt:
+                    usage['last_prompt_tokens'] = _sess_lpt
            # (reasoning trace already attached + saved above, before s.save())
            # Leftover-steer delivery: if a /steer was queued (via
            # api/chat/steer) but the agent finished its turn before
@@ -141,6 +141,10 @@ async function send(){
  let uploaded=[];
  try{uploaded=await uploadPendingFiles();}
  catch(e){if(!text){setComposerStatus(`Upload error: ${e.message}`);return;}}
+  // Clear the uploading status now that upload is done — if we don't clear here
+  // it stays visible for the entire duration of the agent stream, since
+  // setComposerStatus('') is only called in setBusy(false), not setBusy(true).
+  setComposerStatus('');

  const uploadedNames=uploaded.map(u=>u.name||u);
  const uploadedPaths=uploaded.map(u=>u&&u.is_image?(u.name||u.filename||u):(u.path||u.name||u));
@@ -864,7 +864,9 @@ function _syncCtxIndicator(usage){
  }
  if(wrap) wrap.style.display='';
  const hasPromptTok=!!promptTok;
-  const pct=hasPromptTok?Math.min(100,Math.round((promptTok/ctxWindow)*100)):0;
+  const rawPct=hasPromptTok?Math.round((promptTok/ctxWindow)*100):0;
+  const pct=Math.min(100,rawPct);
+  const overflowed=rawPct>100;
  const ring=$('ctxRingValue');
  const center=$('ctxPercent');
  const usageLine=$('ctxTooltipUsage');
@@ -908,7 +910,7 @@ function _syncCtxIndicator(usage){
  if(!hasExplicitCtx&&hasPromptTok) label+=' (est. 128K)';
  if(cost) label+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
  el.setAttribute('aria-label',label);
-  if(usageLine) usageLine.textContent=hasPromptTok?`${pct}% used (${Math.max(0,100-pct)}% left)`:`${_fmtTokens(totalTok)} tokens used`;
+  if(usageLine) usageLine.textContent=hasPromptTok?(overflowed?`${rawPct}% used (context exceeded)`:`${pct}% used (${100-pct}% left)`):`${_fmtTokens(totalTok)} tokens used`;
  if(tokensLine) tokensLine.textContent=hasPromptTok?`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)} tokens used`:`In: ${_fmtTokens(usage.input_tokens||0)} \u00b7 Out: ${_fmtTokens(usage.output_tokens||0)}`;
  const threshold=usage.threshold_tokens||0;
  if(thresholdLine){