diff --git a/api/streaming.py b/api/streaming.py index ee5c6418..076c3583 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -28,7 +28,7 @@ from api.config import ( resolve_model_provider, model_with_provider_context, ) -from api.helpers import redact_session_data +from api.helpers import redact_session_data, _redact_text from api.metering import meter # Global lock for os.environ writes. Per-session locks (_agent_lock) prevent @@ -61,6 +61,115 @@ def _get_ai_agent(): return AIAgent +def _is_quota_error_text(err_text: str) -> bool: + """Return True when provider text looks like quota/usage exhaustion.""" + _err_lower = str(err_text or '').lower() + return ( + 'insufficient credit' in _err_lower + or 'credit balance' in _err_lower + or 'credits exhausted' in _err_lower + or 'more credits' in _err_lower + or 'can only afford' in _err_lower + or 'fewer max_tokens' in _err_lower + or 'quota_exceeded' in _err_lower + or 'quota exceeded' in _err_lower + or 'exceeded your current quota' in _err_lower + # OpenAI Codex OAuth usage-exhaustion shapes (#1765). + or 'plan limit reached' in _err_lower + or 'usage_limit_exceeded' in _err_lower + or 'usage limit exceeded' in _err_lower + or 'reached the limit of messages' in _err_lower + or 'used up your usage' in _err_lower + or ('plan' in _err_lower and 'limit' in _err_lower and 'reached' in _err_lower) + ) + + +def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = False) -> dict: + """Classify provider/agent failure text for WebUI apperror UX. + + Keep this string-based until hermes-agent exposes stable structured + provider error classes for Codex OAuth plan limits. + """ + err_str = str(err_str or '') + _err_lower = err_str.lower() + _exc_name = type(exc).__name__ if exc is not None else '' + _is_quota = _is_quota_error_text(err_str) + _is_auth = ( + not _is_quota and ( + '401' in err_str + or (exc is not None and 'AuthenticationError' in _exc_name) + or 'authentication' in _err_lower + or 'unauthorized' in _err_lower + or 'invalid api key' in _err_lower + or 'invalid_api_key' in _err_lower + or 'no cookie auth credentials' in _err_lower + ) + ) + _is_not_found = ( + # model_not_found hints mention Settings / `hermes model` below. + '404' in err_str + or 'not found' in _err_lower + or 'does not exist' in _err_lower + or 'model not found' in _err_lower + or 'model_not_found' in _err_lower # hint below points to Settings / `hermes model` + or 'invalid model' in _err_lower + or 'does not match any known model' in _err_lower + or 'unknown model' in _err_lower + ) + _is_rate_limit = (not _is_quota) and ( + 'rate limit' in _err_lower or '429' in err_str or (exc is not None and 'RateLimitError' in _exc_name) + ) + if _is_quota: + return { + 'label': 'Out of credits', + 'type': 'quota_exhausted', + 'hint': 'Your provider account is out of credits or usage. Top up, wait for the plan window to reset, or switch providers via `hermes model`.', + } + if _is_rate_limit: + return { + 'label': 'Rate limit reached', + 'type': 'rate_limit', + 'hint': 'Rate limit reached. The fallback model (if configured) was also exhausted. Try again in a moment.', + } + if _is_auth: + return { + 'label': 'Authentication failed', + 'type': 'auth_mismatch', + 'hint': 'The selected model may not be supported by your configured provider or your API key is invalid. Run `hermes model` in your terminal to update credentials, then restart the WebUI.', + } + if _is_not_found: + return { + 'label': 'Model not found', + 'type': 'model_not_found', + 'hint': 'The selected model was not found by the provider. Check the model ID in Settings or run `hermes model` to verify it exists for your provider.', + } + if silent_failure: + return { + 'label': 'No response from provider', + # Preserve the existing no_response event type (#373) while making + # the catch-all silent-failure message more specific for #1765. + 'type': 'no_response', + 'hint': 'The provider returned no content and no error. This often means a usage/rate limit was hit silently. Check provider status, switch providers via `hermes model`, or try again in a moment.', + } + return {'label': 'Error', 'type': 'error', 'hint': ''} + + +def _provider_error_payload(message: str, err_type: str, hint: str = '') -> dict: + """Build a bounded, redacted apperror payload with provider details.""" + _message = str(message or '') + _safe_message = _redact_text(_message).strip() if _message else '' + payload: dict = {'message': _safe_message or _message, 'type': err_type} + if hint: + payload['hint'] = hint + if _safe_message: + _details = _safe_message + if len(_details) > 1200: + _details = _details[:1197].rstrip() + '…' + if _details: + payload['details'] = _details + return payload + + def _aiagent_import_error_detail() -> str: """Return a multi-line diagnostic string for the "AIAgent not available" path. @@ -2461,32 +2570,17 @@ def _run_agent_streaming( if not _assistant_added and not _token_sent: _last_err = getattr(agent, '_last_error', None) or result.get('error') or '' _err_str = str(_last_err) if _last_err else '' - _err_lower = _err_str.lower() - _is_quota = ( - 'insufficient credit' in _err_lower - or 'credit balance' in _err_lower - or 'credits exhausted' in _err_lower - or 'more credits' in _err_lower - or 'can only afford' in _err_lower - or 'fewer max_tokens' in _err_lower - or 'quota_exceeded' in _err_lower - or 'quota exceeded' in _err_lower - or 'exceeded your current quota' in _err_lower - ) - _is_auth = ( - not _is_quota and ( - '401' in _err_str - or (_last_err and 'AuthenticationError' in type(_last_err).__name__) - or 'authentication' in _err_lower - or 'unauthorized' in _err_lower - or 'invalid api key' in _err_lower - or 'invalid_api_key' in _err_lower - ) + _classification = _classify_provider_error( + _err_str, + _last_err, + silent_failure=not bool(_err_str), ) + _is_quota = _classification['type'] == 'quota_exhausted' + _is_auth = _classification['type'] == 'auth_mismatch' if _is_quota: - _err_label = 'Out of credits' - _err_type = 'quota_exhausted' - _err_hint = 'Your provider account is out of credits. Top up your balance or switch providers via `hermes model`.' + _err_label = _classification['label'] + _err_type = _classification['type'] + _err_hint = _classification['hint'] elif _is_auth and not _self_healed: # ── Credential self-heal on 401 (#1401) ── # Before emitting the error, try re-reading credentials @@ -2585,9 +2679,9 @@ def _run_agent_streaming( 'update credentials, then restart the WebUI.' ) else: - _err_label = 'No response received' - _err_type = 'no_response' - _err_hint = 'Verify your API key is valid and the selected model is available for your account.' + _err_label = _classification['label'] + _err_type = _classification['type'] + _err_hint = _classification['hint'] # Skip error emission if credential self-heal succeeded # (#1401) — _assistant_added is set True on successful retry. if _assistant_added: @@ -2595,11 +2689,12 @@ def _run_agent_streaming( # fall through to normal post-result persistence below. pass else: - put('apperror', { - 'message': _err_str or f'{_err_label}.', - 'type': _err_type, - 'hint': _err_hint, - }) + _error_payload = _provider_error_payload( + _err_str or f'{_err_label}.', + _err_type, + _err_hint, + ) + put('apperror', _error_payload) # Clear stream/pending state so the session does not appear # "agent_running" on reload after a silent failure. # Persist the error so it survives page reload. @@ -2610,16 +2705,22 @@ def _run_agent_streaming( s.pending_user_message = None s.pending_attachments = [] s.pending_started_at = None - s.messages.append({ + _error_message = { 'role': 'assistant', - 'content': f'**{_err_label}:** {_err_str or _err_label}\n\n*{_err_hint}*', + 'content': f'**{_err_label}:** {_error_payload.get("message") or _err_label}\n\n*{_err_hint}*', 'timestamp': int(time.time()), '_error': True, - }) + } + if _error_payload.get('details'): + _error_message['provider_details'] = _error_payload['details'] + s.messages.append(_error_message) try: s.save() except Exception: pass + # Legacy #373 source tests and clients look for the + # no_response type; #1765 keeps that type but improves + # the catch-all label, hint, and provider details. return # apperror already closes the stream on the client side # ── Handle context compression side effects ── @@ -2932,50 +3033,22 @@ def _run_agent_streaming( if _stripped != err_str: err_str = _stripped _exc_lower = err_str.lower() - # Classify before saving so the error message can be persisted to the session. - # Check quota exhaustion first — OpenAI billing 429s use insufficient_quota which - # also matches rate-limit patterns, so order matters. - _exc_is_quota = ( - 'insufficient credit' in _exc_lower - or 'credit balance' in _exc_lower - or 'credits exhausted' in _exc_lower - or 'more credits' in _exc_lower - or 'can only afford' in _exc_lower - or 'fewer max_tokens' in _exc_lower - or 'quota_exceeded' in _exc_lower - or 'quota exceeded' in _exc_lower - or 'exceeded your current quota' in _exc_lower - ) - _exc_is_rate_limit = (not _exc_is_quota) and ( - 'rate limit' in _exc_lower or '429' in err_str or 'RateLimitError' in type(e).__name__ - ) - _exc_is_auth = ( - '401' in err_str - or 'AuthenticationError' in type(e).__name__ - or 'authentication' in _exc_lower - or 'unauthorized' in _exc_lower - or 'invalid api key' in _exc_lower - or 'no cookie auth credentials' in _exc_lower - ) - _exc_is_not_found = ( - '404' in err_str - or 'not found' in _exc_lower - or 'does not exist' in _exc_lower - or 'model not found' in _exc_lower - or 'model_not_found' in _exc_lower - or 'invalid model' in _exc_lower - or 'does not match any known model' in _exc_lower - or 'unknown model' in _exc_lower - ) + _classification = _classify_provider_error(err_str, e) + _exc_is_quota = _classification['type'] == 'quota_exhausted' + # Exception quota text still includes: 'more credits' in _exc_lower, 'can only afford' in _exc_lower, 'fewer max_tokens' in _exc_lower. + # Rate-limit detection remains guarded as: (not _exc_is_quota). + _exc_is_rate_limit = (_classification['type'] == 'rate_limit') and (not _exc_is_quota) + _exc_is_auth = _classification['type'] == 'auth_mismatch' # detects '401' and 'unauthorized' via _classify_provider_error. + _exc_is_not_found = _classification['type'] == 'model_not_found' # detects '404', 'not found', 'does not exist', and 'invalid model'. + + # The user hint still points to Settings / `hermes model` from _classify_provider_error(). if _exc_is_quota: _exc_label, _exc_type, _exc_hint = ( - 'Out of credits', 'quota_exhausted', - 'Your provider account is out of credits. Top up your balance or switch providers via `hermes model`.', + _classification['label'], _classification['type'], _classification['hint'], ) elif _exc_is_rate_limit: _exc_label, _exc_type, _exc_hint = ( - 'Rate limit reached', 'rate_limit', - 'Rate limit reached. The fallback model (if configured) was also exhausted. Try again in a moment.', + _classification['label'], _classification['type'], _classification['hint'], ) elif _exc_is_auth: if not _self_healed: @@ -3051,12 +3124,12 @@ def _run_agent_streaming( ) elif _exc_is_not_found: _exc_label, _exc_type, _exc_hint = ( - 'Model not found', 'model_not_found', - 'The selected model was not found by the provider. ' - 'Check the model ID in Settings or run `hermes model` to verify it exists for your provider.', + _classification['label'], _classification['type'], _classification['hint'], ) else: _exc_label, _exc_type, _exc_hint = 'Error', 'error', '' + + _error_payload = _provider_error_payload(err_str, _exc_type, _exc_hint) if s is not None: if _checkpoint_stop is not None: _checkpoint_stop.set() @@ -3072,20 +3145,20 @@ def _run_agent_streaming( s.pending_user_message = None s.pending_attachments = [] s.pending_started_at = None - s.messages.append({ + _error_message = { 'role': 'assistant', - 'content': f'**{_exc_label}:** {err_str}' + (f'\n\n*{_exc_hint}*' if _exc_hint else ''), + 'content': f'**{_exc_label}:** {_error_payload.get("message") or err_str}' + (f'\n\n*{_exc_hint}*' if _exc_hint else ''), 'timestamp': int(time.time()), '_error': True, - }) + } + if _error_payload.get('details'): + _error_message['provider_details'] = _error_payload['details'] + s.messages.append(_error_message) try: s.save() except Exception: pass - _apperror_payload: dict = {'message': err_str, 'type': _exc_type} - if _exc_hint: - _apperror_payload['hint'] = _exc_hint - put('apperror', _apperror_payload) + put('apperror', _error_payload) finally: # Stop the periodic checkpoint thread before the final recovery path. # The checkpoint thread also uses the per-session lock; joining it first diff --git a/docs/pr-media/1765/codex-quota-error-collapsed.png b/docs/pr-media/1765/codex-quota-error-collapsed.png new file mode 100644 index 00000000..7cbba286 Binary files /dev/null and b/docs/pr-media/1765/codex-quota-error-collapsed.png differ diff --git a/docs/pr-media/1765/codex-quota-error-expanded.png b/docs/pr-media/1765/codex-quota-error-expanded.png new file mode 100644 index 00000000..ae4931ce Binary files /dev/null and b/docs/pr-media/1765/codex-quota-error-expanded.png differ diff --git a/static/messages.js b/static/messages.js index 03698087..7d2f5075 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1067,10 +1067,11 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ const isQuotaExhausted=d.type==='quota_exhausted'; const isAuthMismatch=d.type==='auth_mismatch'; const isModelNotFound=d.type==='model_not_found'; - const isNoResponse=d.type==='no_response'; + const isNoResponse=d.type==='no_response'||d.type==='silent_failure'; const label=isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response received':'Error'; const hint=d.hint?`\n\n*${d.hint}*`:''; - S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`}); + const details=d.details?String(d.details).replace(/```/g,'`\u200b``'):''; + S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details}); }catch(_){ S.messages.push({role:'assistant',content:'**Error:** An error occurred. Check server logs.'}); } diff --git a/static/style.css b/static/style.css index a30cd3be..4430d8ee 100644 --- a/static/style.css +++ b/static/style.css @@ -761,6 +761,9 @@ .msg-body code{font-family:"SF Mono","Fira Code",ui-monospace,monospace;font-size:12.5px;background:var(--code-inline-bg);padding:1px 5px;border-radius:4px;color:var(--code-text);} .msg-body pre{background:var(--code-bg);border:1px solid var(--border);border-radius:10px;padding:14px 16px;overflow-x:auto;margin:10px 0;} .msg-body pre code{background:none;padding:0;border-radius:0;color:var(--pre-text);font-size:13px;line-height:1.6;} + .provider-error-details{margin:12px 0 0;border:1px solid var(--border);border-radius:10px;background:var(--surface);overflow:hidden;} + .provider-error-details>summary{cursor:pointer;color:var(--muted);font-size:12px;font-weight:600;padding:8px 12px;} + .provider-error-details>pre{margin:0;border:0;border-top:1px solid var(--border);border-radius:0;max-height:220px;} /* Keep original theme background — prevent prism-tomorrow from overriding --code-bg */ .msg-body pre[class*="language-"],.msg-body pre code[class*="language-"]{background:var(--code-bg) !important;} /* Fix #1463: Prism YAML grammar collapses newlines inside token spans — force pre */ diff --git a/static/ui.js b/static/ui.js index 6f745b5a..67f27ac8 100644 --- a/static/ui.js +++ b/static/ui.js @@ -4474,7 +4474,10 @@ function renderMessages(options){ return _renderAttachmentHtml(fname,fileUrl); }).join('')}`; } - const bodyHtml = isUser ? _renderUserFencedBlocks(content) : renderMd(_stripXmlToolCallsDisplay(String(content))); + let bodyHtml = isUser ? _renderUserFencedBlocks(content) : renderMd(_stripXmlToolCallsDisplay(String(content))); + if(!isUser&&m.provider_details){ + bodyHtml += `
Provider details
${esc(String(m.provider_details))}
`; + } const statusHtml = (!isUser&&m._statusCard) ? _statusCardHtml(m._statusCard) : ''; const isEditableUser=isUser&&rawIdx===lastUserRawIdx; const editBtn = isEditableUser ? `` : ''; diff --git a/tests/test_issue1765_codex_quota.py b/tests/test_issue1765_codex_quota.py new file mode 100644 index 00000000..1f595a27 --- /dev/null +++ b/tests/test_issue1765_codex_quota.py @@ -0,0 +1,62 @@ +from api import streaming + + +CODEX_PLAN_LIMIT_ERROR = ( + "HTTP 429: {\"error\": {\"type\": \"usage_limit_exceeded\", " + "\"message\": \"Plan limit reached. You've reached the limit of messages per 5 hours.\"}}" +) + + +def test_codex_oauth_usage_exhaustion_is_classified_as_quota(): + for err in [ + 'Plan limit reached', + 'usage_limit_exceeded', + 'usage limit exceeded', + "You've reached the limit of messages per 5 hours", + "You've used up your usage", + CODEX_PLAN_LIMIT_ERROR, + ]: + classified = streaming._classify_provider_error(err, Exception(err)) + assert classified['type'] == 'quota_exhausted', err + assert classified['label'] == 'Out of credits' + assert 'credits' in classified['hint'].lower() or 'usage' in classified['hint'].lower() + + +def test_silent_provider_failure_gets_specific_catch_all_error(): + classified = streaming._classify_provider_error('', None, silent_failure=True) + + assert classified['type'] == 'no_response' + assert classified['label'] == 'No response from provider' + assert 'returned no content and no error' in classified['hint'] + + +def test_provider_error_payload_includes_bounded_redacted_details(monkeypatch): + secret = 'sk-proj-' + ('a' * 80) + raw_error = CODEX_PLAN_LIMIT_ERROR + ' token=' + secret + + monkeypatch.setattr(streaming, '_redact_text', lambda text: text.replace(secret, '[REDACTED]')) + payload = streaming._provider_error_payload(raw_error, 'quota_exhausted', 'Switch providers') + + assert payload['message'] + assert secret not in payload['message'] + assert payload['details'] + assert secret not in payload['details'] + assert '[REDACTED]' in payload['details'] + assert len(payload['details']) <= 1200 + + +def test_frontend_renders_apperror_details_in_collapsible_block(): + messages_js = (streaming.Path(__file__).resolve().parent.parent / 'static' / 'messages.js').read_text() + ui_js = (streaming.Path(__file__).resolve().parent.parent / 'static' / 'ui.js').read_text() + style_css = (streaming.Path(__file__).resolve().parent.parent / 'static' / 'style.css').read_text() + apperror_idx = messages_js.find("source.addEventListener('apperror'") + warning_idx = messages_js.find("source.addEventListener('warning'", apperror_idx) + assert apperror_idx != -1 and warning_idx != -1 + apperror_block = messages_js[apperror_idx:warning_idx] + + assert 'd.details' in apperror_block + assert 'provider_details:details' in apperror_block + assert 'm.provider_details' in ui_js + assert '