From 081e600b33fe32b2b0af745fa4d2fcfb5f4062d9 Mon Sep 17 00:00:00 2001 From: nesquena-hermes <226517000+nesquena-hermes@users.noreply.github.com> Date: Sat, 2 May 2026 01:43:00 +0000 Subject: [PATCH] fix: context-window indicator broken on older sessions (#1436) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two-layer bug where `/api/session` returned `context_length=0` for sessions that pre-date #1318, then the frontend silently fell back to cumulative `input_tokens` and the 128K JS default, producing nonsense indicators like "100" capped from "890% used (context exceeded), 1.2M / 131.1k tokens used". Empirical impact: 23 of 75 sessions on dev server rendered >100% before this fix. #1356 fixed the same symptom on the live SSE path but missed the GET /api/session load path that older sessions go through. Two-layer fix: 1. Backend (api/routes.py:1295-1313) — resolve context_length via agent.model_metadata.get_model_context_length() when the persisted value is 0. Mirrors api/streaming.py:2333-2342. 2. Frontend (static/ui.js:1269) — drop the cumulative `input_tokens` fallback. When last_prompt_tokens is missing, render "·" + "tokens used" (existing !hasPromptTok branch) instead of computing a percentage from the cumulative total. 10 regression tests in tests/test_issue1436_context_indicator_load_path.py covering both layers + the empty-model edge case (avoids the 256K default-for-unknown-model trap that get_model_context_length('') returns). Verified live: claude-opus-4-7 session with input_tokens=5,226,479 now renders "·" + "5.3M tokens used" instead of "100" + "3987% used". Reported by @AvidFuturist. Closes #1436. --- CHANGELOG.md | 5 + api/routes.py | 19 +- static/ui.js | 9 +- ...t_issue1436_context_indicator_load_path.py | 298 ++++++++++++++++++ 4 files changed, 328 insertions(+), 3 deletions(-) create mode 100644 tests/test_issue1436_context_indicator_load_path.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a2cec01f..abb6d6b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Hermes Web UI -- Changelog +## [Unreleased] + +### Fixed +- **Context-window indicator broken on older sessions ("100" / "890% used")** (#1436, fixes #1436) — `#1356` (closed Apr 30) fixed the same symptom on the **live SSE path** but didn't cover the **GET /api/session load path**, so any session that pre-dates `#1318` (when `context_length` was added to `Session`) returned `context_length=0` from `/api/session`. Combined with two cascading frontend fallbacks (`promptTok = last_prompt_tokens || input_tokens`, `ctxWindow = context_length || 128*1024`), the ring rendered "100" capped from 800-4000% and the tooltip showed "890% used (context exceeded), 1.2M / 131.1k tokens used" — a misleading prompt to compress that the user couldn't address. Empirically: 23 of 75 sessions on the dev server were broken before this fix. **Two-layer fix**: (1) backend `api/routes.py` now resolves `context_length` via `agent.model_metadata.get_model_context_length()` when the persisted value is 0, mirroring the SSE-path fallback in `api/streaming.py:2333-2342`. (2) frontend `static/ui.js:1269` no longer falls back to cumulative `input_tokens` when `last_prompt_tokens` is missing — that fallback divides cumulative input by the context window, producing nonsense percentages. Older sessions without last-prompt data now render "·" + "tokens used" (honest no-data) on the ring instead of a misleading >100% percentage. **10 regression tests** in `tests/test_issue1436_context_indicator_load_path.py` pin: persisted-value pass-through, zero-value fallback, fallback-receives-correct-model, empty-model-skips-fallback (avoids 256K default-for-unknown trap), exception-swallowed-on-import-failure, frontend-no-input_tokens-fallback, frontend-uses-last_prompt_tokens-only, no-data-branch-renders-dot, load-path-imports-the-helper, fix-comment-references-issue-number. Reported by @AvidFuturist. (`api/routes.py`, `static/ui.js`, `tests/test_issue1436_context_indicator_load_path.py`) + ## [v0.50.262] — 2026-05-02 ### Fixed diff --git a/api/routes.py b/api/routes.py index 7fa84a18..4106d07c 100644 --- a/api/routes.py +++ b/api/routes.py @@ -1292,6 +1292,23 @@ def handle_get(handler, parsed) -> bool: _truncated_msgs = _all_msgs else: _truncated_msgs = _all_msgs + # Resolve effective context_length with model-metadata fallback so + # older sessions (pre-#1318) that have context_length=0 persisted + # still render a meaningful indicator on load. Mirrors the + # SSE-path fallback in api/streaming.py:2333-2342. Fixes #1436. + _persisted_cl = getattr(s, "context_length", 0) or 0 + if not _persisted_cl: + _model_for_lookup = ( + getattr(s, "model", "") or effective_model or "" + ).strip() + if _model_for_lookup: + try: + from agent.model_metadata import get_model_context_length as _get_cl + _fb_cl = _get_cl(_model_for_lookup, "") or 0 + if _fb_cl: + _persisted_cl = _fb_cl + except Exception: + pass raw = s.compact() | { "messages": _truncated_msgs, "tool_calls": getattr(s, "tool_calls", []) if load_messages else [], @@ -1299,7 +1316,7 @@ def handle_get(handler, parsed) -> bool: "pending_user_message": getattr(s, "pending_user_message", None), "pending_attachments": getattr(s, "pending_attachments", []) if load_messages else [], "pending_started_at": getattr(s, "pending_started_at", None), - "context_length": getattr(s, "context_length", 0) or 0, + "context_length": _persisted_cl, "threshold_tokens": getattr(s, "threshold_tokens", 0) or 0, "last_prompt_tokens": getattr(s, "last_prompt_tokens", 0) or 0, } diff --git a/static/ui.js b/static/ui.js index e6e6d42b..4d5a5497 100644 --- a/static/ui.js +++ b/static/ui.js @@ -1265,8 +1265,13 @@ function _syncCtxIndicator(usage){ const wrap=$('ctxIndicatorWrap'); const el=$('ctxIndicator'); if(!el)return; - // Use input_tokens as fallback when last_prompt_tokens is not available - const promptTok=usage.last_prompt_tokens||usage.input_tokens||0; + // #1436: Use last_prompt_tokens only — NEVER fall back to cumulative + // input_tokens for the "context window % used" calculation. input_tokens + // is summed across all turns, so dividing it by the context window gives a + // nonsense percentage (often >100%) on long sessions. When we have no + // last-prompt data we render "·" + "tokens used" via the !hasPromptTok + // branch below — honest "no data" instead of misleading "890% used". + const promptTok=usage.last_prompt_tokens||0; const totalTok=(usage.input_tokens||0)+(usage.output_tokens||0); // Default context window to 128K when not provided by backend const DEFAULT_CTX=128*1024; diff --git a/tests/test_issue1436_context_indicator_load_path.py b/tests/test_issue1436_context_indicator_load_path.py new file mode 100644 index 00000000..59a11b0e --- /dev/null +++ b/tests/test_issue1436_context_indicator_load_path.py @@ -0,0 +1,298 @@ +"""Regression test for #1436 — context-window indicator broken on load path. + +#1356 (closed Apr 30 2026) fixed the indicator on the **live SSE path** by adding +a model-metadata fallback when `agent.context_compressor` didn't provide +`context_length`. But the **GET /api/session load path** (used when clicking +older sessions from the sidebar) was missed — it returned `context_length=0` +verbatim from the persisted Session object for any session that pre-dates +#1318 (when the field was added). + +Combined with two cascading frontend fallbacks (`promptTok = last_prompt_tokens +|| input_tokens`, `ctxWindow = context_length || 128*1024`), older sessions +loaded their indicator showing nonsense like "100 / 890% used (context exceeded)" +because: + - `context_length=0` → fallback to 131,072 (128K JS default) + - `last_prompt_tokens=0` → fallback to **cumulative** `input_tokens` + (often 1M+ on long sessions) + - 1.2M / 131K = ~890% → ring caps at 100, tooltip shows "890% used" + +Two-layer fix: + 1. Backend (api/routes.py:1295-1305) — add model_metadata fallback so + loaded sessions get a sane `context_length` even when persisted as 0. + 2. Frontend (static/ui.js:1269) — drop the `input_tokens` fallback for + `promptTok`. Cumulative input is fundamentally wrong for "context window + % used"; better to render "·" + "tokens used" (honest no-data) than a + misleading >100% percentage. + +Reported by @AvidFuturist in Discord (May 1 2026, "the 100 comes up way too +often"). Confirmed live on the dev server: 23 of 75 sessions had +`context_length=0` + `input_tokens > 128K`, all rendering >100%. +""" +import json +from pathlib import Path +from unittest.mock import MagicMock, patch +from urllib.parse import urlparse + +ROUTES = Path(__file__).resolve().parent.parent / "api" / "routes.py" +UI_JS = Path(__file__).resolve().parent.parent / "static" / "ui.js" + + +# ───────────────────────────────────────────────────────────────────────────── +# Backend: GET /api/session load path +# ───────────────────────────────────────────────────────────────────────────── + + +class TestIssue1436BackendFallback: + """The /api/session GET handler must resolve context_length via + agent.model_metadata.get_model_context_length when the persisted value is 0.""" + + def _stub_session(self, *, context_length, model, last_prompt_tokens=0, + input_tokens=0): + """Build a mock Session that mimics the persisted-session shape.""" + s = MagicMock() + s.context_length = context_length + s.threshold_tokens = 0 + s.last_prompt_tokens = last_prompt_tokens + s.input_tokens = input_tokens + s.output_tokens = 0 + s.model = model + s.title = "test-session" + s.session_id = "test-1436" + s.messages = [] + s.tool_calls = [] + s.active_stream_id = None + s.pending_user_message = None + s.pending_attachments = [] + s.pending_started_at = None + # compact() returns a dict that gets merged with the response. + s.compact.return_value = { + "session_id": "test-1436", + "title": "test-session", + "model": model, + "input_tokens": input_tokens, + "output_tokens": 0, + "context_length": context_length, + "threshold_tokens": 0, + "last_prompt_tokens": last_prompt_tokens, + } + return s + + def _invoke_get_session(self, session_obj, *, fallback_returns=0): + """Hit handle_get(/api/session?session_id=...) and capture the JSON response.""" + import api.routes as routes + + captured = {} + + def fake_j(h, data, status=200): + captured["data"] = data + captured["status"] = status + + # Patch get_model_context_length so the test doesn't depend on the + # live agent.model_metadata bundle. + fake_module = MagicMock() + fake_module.get_model_context_length = MagicMock(return_value=fallback_returns) + + handler = MagicMock() + parsed = urlparse("/api/session?session_id=test-1436&messages=0") + + # Patch import so `from agent.model_metadata import ...` resolves to our fake. + with patch("api.routes.get_session", return_value=session_obj), \ + patch("api.routes.j", side_effect=fake_j), \ + patch.dict("sys.modules", {"agent.model_metadata": fake_module}): + routes.handle_get(handler, parsed) + return captured + + def test_persisted_context_length_passed_through_unchanged(self): + """When Session.context_length is non-zero, return it as-is (no fallback).""" + s = self._stub_session(context_length=200_000, model="claude-sonnet-4.6") + result = self._invoke_get_session(s, fallback_returns=999_999) + body = result["data"]["session"] + assert body["context_length"] == 200_000, ( + f"persisted context_length must pass through unchanged, " + f"got {body['context_length']}" + ) + + def test_zero_context_length_falls_back_to_model_metadata(self): + """Pre-#1318 sessions with context_length=0 must resolve via model_metadata.""" + s = self._stub_session(context_length=0, model="claude-opus-4-7", + input_tokens=1_200_000) + result = self._invoke_get_session(s, fallback_returns=1_000_000) + body = result["data"]["session"] + assert body["context_length"] == 1_000_000, ( + f"context_length=0 must resolve to model's 1M window via fallback, " + f"got {body['context_length']}" + ) + + def test_fallback_called_with_persisted_model(self): + """Fallback must be called with Session.model (not empty string).""" + import api.routes as routes + + captured = {} + + def fake_j(h, data, status=200): + captured["data"] = data + + fake_module = MagicMock() + fake_module.get_model_context_length = MagicMock(return_value=400_000) + + s = self._stub_session(context_length=0, model="gpt-5-mini") + handler = MagicMock() + parsed = urlparse("/api/session?session_id=test-1436&messages=0") + + with patch("api.routes.get_session", return_value=s), \ + patch("api.routes.j", side_effect=fake_j), \ + patch.dict("sys.modules", {"agent.model_metadata": fake_module}): + routes.handle_get(handler, parsed) + + # First positional arg should be the model name; second is base_url ("") + called_args = fake_module.get_model_context_length.call_args + assert called_args is not None, "get_model_context_length was never called" + assert called_args.args[0] == "gpt-5-mini", ( + f"fallback called with wrong model: {called_args}" + ) + + def test_empty_model_skips_fallback(self): + """If Session.model is empty AND no effective_model is available, skip + the fallback rather than calling get_model_context_length('').""" + s = self._stub_session(context_length=0, model="") + # resolve_model=0 to skip _resolve_effective_session_model_for_display + import api.routes as routes + captured = {} + + def fake_j(h, data, status=200): + captured["data"] = data + + fake_module = MagicMock() + fake_module.get_model_context_length = MagicMock(return_value=256_000) + + handler = MagicMock() + parsed = urlparse( + "/api/session?session_id=test-1436&messages=0&resolve_model=0" + ) + + with patch("api.routes.get_session", return_value=s), \ + patch("api.routes.j", side_effect=fake_j), \ + patch.dict("sys.modules", {"agent.model_metadata": fake_module}): + routes.handle_get(handler, parsed) + + # When model is empty, fallback either isn't called OR returns no value + # we use. Either way context_length stays 0. + body = captured["data"]["session"] + assert body["context_length"] == 0, ( + f"empty model must NOT trigger fallback (avoids the 256K default-for-unknown trap), " + f"got context_length={body['context_length']}" + ) + + def test_fallback_exception_does_not_break_response(self): + """If the fallback raises (older agent build, missing module), the route + must still return a response — context_length just stays 0.""" + import api.routes as routes + captured = {} + + def fake_j(h, data, status=200): + captured["data"] = data + + # Fallback raises on import + fake_module = MagicMock() + fake_module.get_model_context_length = MagicMock( + side_effect=RuntimeError("simulated agent error") + ) + + s = self._stub_session(context_length=0, model="claude-opus-4-7") + handler = MagicMock() + parsed = urlparse("/api/session?session_id=test-1436&messages=0") + + with patch("api.routes.get_session", return_value=s), \ + patch("api.routes.j", side_effect=fake_j), \ + patch.dict("sys.modules", {"agent.model_metadata": fake_module}): + routes.handle_get(handler, parsed) # must not raise + + body = captured["data"]["session"] + assert body["context_length"] == 0, ( + "fallback exception must be swallowed; field stays 0" + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# Frontend: static/ui.js _syncCtxIndicator +# ───────────────────────────────────────────────────────────────────────────── + + +class TestIssue1436FrontendDefense: + """The frontend must NOT fall back to cumulative input_tokens when + last_prompt_tokens is missing — that produces nonsense percentages.""" + + def test_promptTok_does_not_fall_back_to_input_tokens(self): + """Verify the line `promptTok = usage.last_prompt_tokens||usage.input_tokens||0` + has been removed. The old fallback divides cumulative input by the + context window, producing the >100% bug.""" + src = UI_JS.read_text(encoding="utf-8") + # The bug shape: the `||usage.input_tokens` fragment must NOT appear + # on any line that defines `promptTok`. + for line_num, line in enumerate(src.splitlines(), 1): + stripped = line.strip() + if stripped.startswith("//") or stripped.startswith("*"): + continue + if "promptTok" in line and "=" in line and "usage.last_prompt_tokens" in line: + assert "usage.input_tokens" not in line, ( + f"static/ui.js:{line_num} still falls back to cumulative " + f"input_tokens for promptTok — this produces the >100% indicator " + f"bug from #1436. Line: {line.rstrip()!r}" + ) + + def test_promptTok_assignment_uses_last_prompt_tokens_only(self): + """Verify the new assignment: `promptTok = usage.last_prompt_tokens || 0`.""" + src = UI_JS.read_text(encoding="utf-8") + # Allow whitespace variations. + normalized = "".join(src.split()) + assert "constpromptTok=usage.last_prompt_tokens||0" in normalized, ( + "static/ui.js _syncCtxIndicator must assign " + "`promptTok = usage.last_prompt_tokens || 0` (no input_tokens fallback)" + ) + + def test_no_data_branch_renders_dot(self): + """When promptTok is 0 (no last-prompt data), the `!hasPromptTok` branch + must render '·' (U+00B7) on the ring instead of computing a percentage. + This is the existing behavior; the test pins it so a future refactor + doesn't accidentally re-introduce a numeric fallback.""" + src = UI_JS.read_text(encoding="utf-8") + assert "hasPromptTok=!!promptTok" in src.replace(" ", ""), ( + "hasPromptTok must be a boolean of promptTok" + ) + # The ring center text uses '·' when !hasPromptTok + assert "hasPromptTok?String(pct):'\\u00b7'" in src.replace(" ", ""), ( + "ring center must show '·' (\\u00b7) when no last-prompt data" + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# Static-source assertions (defense in depth — pin the comment markers in place) +# ───────────────────────────────────────────────────────────────────────────── + + +class TestIssue1436SourceMarkers: + """Pin the fix comments + import shape so a future refactor can't silently + drop the fallback.""" + + def test_routes_load_path_imports_get_model_context_length(self): + src = ROUTES.read_text(encoding="utf-8") + # The import must appear inside the GET /api/session load-path block. + start = src.find('if parsed.path == "/api/session":') + end = src.find('if parsed.path == "/api/projects":', start) + block = src[start:end] + assert "from agent.model_metadata import get_model_context_length" in block, ( + "GET /api/session load-path block must lazy-import " + "get_model_context_length for the context_length=0 fallback (#1436)" + ) + + def test_routes_load_path_marks_fix_with_issue_number(self): + """Comment must reference #1436 so future maintainers find this trail.""" + src = ROUTES.read_text(encoding="utf-8") + # Find the load-path block (between "if parsed.path == \"/api/session\":" + # and the next `if parsed.path` after it). + start = src.find('if parsed.path == "/api/session":') + end = src.find('if parsed.path == "/api/projects":', start) + block = src[start:end] + assert "#1436" in block, ( + "GET /api/session load-path block must reference #1436 in a comment" + )