From ab663cdfc8711d8bda5eef996cbf80bac20efb8a Mon Sep 17 00:00:00 2001 From: starship-s <45587122+starship-s@users.noreply.github.com> Date: Wed, 13 May 2026 19:01:47 -0600 Subject: [PATCH] fix(providers): avoid caching transient quota probe failures --- api/providers.py | 25 +++++++---- tests/test_provider_quota_status.py | 65 ++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 12 deletions(-) diff --git a/api/providers.py b/api/providers.py index 9400561a..86a31155 100644 --- a/api/providers.py +++ b/api/providers.py @@ -104,10 +104,12 @@ _ACCOUNT_USAGE_PARENT_DEATHSIG_BOOTSTRAP = ( _account_usage_probe_semaphore: threading.BoundedSemaphore | None = None # Short-lived account-usage cache. The Codex pooled probe may check multiple -# credentials, so cache the sanitized snapshot briefly to avoid re-querying the +# credentials, so cache sanitized snapshots briefly to avoid re-querying the # provider on every Settings repaint/profile-panel refresh. Pool composition # changes can be stale for at most this TTL; that is preferred to hammering the -# provider usage API while the Settings panel is open. +# provider usage API while the Settings panel is open. Transient None probe +# results are intentionally not cached; known exhausted/unavailable states are +# represented as non-None snapshots and remain cacheable. _account_usage_status_cache: dict[tuple[str, str, str], tuple[float, Any]] = {} _account_usage_status_cache_lock = threading.Lock() @@ -461,6 +463,9 @@ def _best_remaining_by_window(rows): "credential_label": label, } current = best.get(window_label.lower()) + # The normalized Codex account-limit payload currently exposes + # percentages, not absolute request/token capacity. If absolute + # remaining capacity becomes available, prefer it here. if current is None or float(remaining) > float(current.get("remaining_percent") or -1): best[window_label.lower()] = candidate return list(best.values()) @@ -713,6 +718,12 @@ def _load_env_file(env_path: Path) -> dict[str, str]: def _decode_jwt_claims_unverified(token: str) -> dict[str, Any]: + """Decode JWT claims for token-shape classification only. + + The signature is intentionally not verified because this helper is not an + authorization decision: it only prevents a Codex OAuth JWT-shaped value from + being treated as a raw OpenAI API key in provider-card detection. + """ if not isinstance(token, str) or token.count(".") != 2: return {} payload = token.split(".", 2)[1] @@ -1185,15 +1196,15 @@ def invalidate_account_usage_status_cache(provider_id: str | None = None) -> Non def _set_cached_account_usage( cache_key: tuple[str, str, str], snapshot: Any, - *, - preserve_non_none: bool = False, ) -> None: now = time.monotonic() with _account_usage_status_cache_lock: - if preserve_non_none and snapshot is None: + if snapshot is None: cached = _account_usage_status_cache.get(cache_key) if cached is not None and cached[1] is not None: return + _account_usage_status_cache.pop(cache_key, None) + return _account_usage_status_cache[cache_key] = (now, snapshot) expired = [ key for key, (fetched_at, _snapshot) in _account_usage_status_cache.items() @@ -1284,11 +1295,11 @@ def _fetch_account_usage_with_profile_context(provider: str, *, refresh: bool = home, api_key=api_key, ) - _set_cached_account_usage(cache_key, snapshot, preserve_non_none=refresh) + _set_cached_account_usage(cache_key, snapshot) return snapshot except Exception: logger.debug("Failed to fetch account usage for %s", provider, exc_info=True) - _set_cached_account_usage(cache_key, None, preserve_non_none=refresh) + _set_cached_account_usage(cache_key, None) return None diff --git a/tests/test_provider_quota_status.py b/tests/test_provider_quota_status.py index 7d0f24e9..02ddc563 100644 --- a/tests/test_provider_quota_status.py +++ b/tests/test_provider_quota_status.py @@ -999,18 +999,30 @@ def test_account_usage_profile_cache_invalidates_with_credential_pool_cache(monk -def test_account_usage_profile_fetch_caches_none_results(monkeypatch, tmp_path): - """Known-empty/unavailable account usage should be cached, not re-probed each refresh.""" +def test_account_usage_profile_fetch_caches_unavailable_snapshots(monkeypatch, tmp_path): + """Known unavailable account snapshots should be cached like available ones.""" import api.providers as providers monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: tmp_path) old_cfg, old_mtime = _with_config(model={"provider": "openai-codex"}) providers._account_usage_status_cache.clear() calls = [] + unavailable_snapshot = SimpleNamespace( + provider="openai-codex", + source="usage_api_pool", + title="Account limits", + plan=None, + windows=(), + details=("0/1 credentials available", "1 exhausted"), + available=False, + unavailable_reason="Credential pool exhausted until 2030-03-17T18:46:40Z.", + fetched_at=datetime(2030, 3, 17, 12, 30, tzinfo=timezone.utc), + pool={"total_credentials": 1, "available_credentials": 0, "exhausted_credentials": 1, "credentials": []}, + ) def fake_fetch(provider, home, api_key=None): calls.append((provider, str(home), api_key)) - return None + return unavailable_snapshot monkeypatch.setattr(providers, "_agent_fetch_account_usage_for_home", fake_fetch) try: @@ -1020,11 +1032,54 @@ def test_account_usage_profile_fetch_caches_none_results(monkeypatch, tmp_path): providers._account_usage_status_cache.clear() _restore_config(old_cfg, old_mtime) - assert first is None - assert second is None + assert first is unavailable_snapshot + assert second is unavailable_snapshot assert calls == [("openai-codex", str(tmp_path), None)] +def test_account_usage_profile_fetch_does_not_cache_transient_none_results(monkeypatch, tmp_path): + """Transient None probe results should not mask the next successful status check.""" + import api.providers as providers + + monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: tmp_path) + old_cfg, old_mtime = _with_config(model={"provider": "openai-codex"}) + providers._account_usage_status_cache.clear() + calls = [] + recovered_snapshot = SimpleNamespace( + provider="openai-codex", + source="usage_api_pool", + title="Account limits", + plan=None, + windows=(), + details=(), + available=True, + unavailable_reason=None, + fetched_at=datetime(2030, 3, 17, 12, 31, tzinfo=timezone.utc), + pool={"total_credentials": 1, "credentials": []}, + ) + + def fake_fetch(provider, home, api_key=None): + calls.append((provider, str(home), api_key)) + return None if len(calls) == 1 else recovered_snapshot + + monkeypatch.setattr(providers, "_agent_fetch_account_usage_for_home", fake_fetch) + try: + first = providers._fetch_account_usage_with_profile_context("openai-codex") + second = providers._fetch_account_usage_with_profile_context("openai-codex") + third = providers._fetch_account_usage_with_profile_context("openai-codex") + finally: + providers._account_usage_status_cache.clear() + _restore_config(old_cfg, old_mtime) + + assert first is None + assert second is recovered_snapshot + assert third is recovered_snapshot + assert calls == [ + ("openai-codex", str(tmp_path), None), + ("openai-codex", str(tmp_path), None), + ] + + def test_account_usage_profile_fetches_can_overlap_for_different_homes(monkeypatch, tmp_path): """Different profile quota fetches should not serialize on cron's global lock.""" import api.providers as providers