mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 03:00:23 +00:00
Classify onboarding DNS probe failures consistently
This commit is contained in:
@@ -8,6 +8,10 @@
|
||||
|
||||
- **PR #2165** by @starship-s — Pooled OpenAI Codex quota status surfaced in the Providers panel. Pre-fix, the Providers page presented Codex quota as if there were only one credential/account state, which was misleading when users authenticate through a credential pool with several usable credentials, temporarily exhausted credentials, failed probes, and different reset windows. Now the active provider quota card includes a credential-pool summary (available / exhausted / failed / checked counts), displays the best currently-available pool windows in the collapsed view as "Best of N", and exposes per-credential detail behind an expandable section. Exhausted credentials are intentionally NOT re-probed while their cooldown is active (matches credential-pool selection behavior, avoids generating failed quota calls from a status page). Manual refresh still means "probe now" but transient refresh failures preserve the last known-good snapshot. JWT decode (`_decode_jwt_claims_unverified`) is used only for token-shape classification (Codex OAuth JWT vs raw OpenAI API key), explicitly NOT for authorization — documented in the function docstring. Per-row plan labels only shown when verified account-limit data is available. Concurrent probing capped at `min(_CODEX_POOL_MAX_WORKERS=6, len(probe_items))` so page render time stays bounded on large pools. Transient `None` probe results are NOT cached (only known unavailable/exhausted states are cached); 32-test regression suite covering pool snapshot, concurrent probe, JWT detection, cache invalidation, transient-vs-known cache distinction, and i18n parity across all currently-supported locales. Scoped to OpenAI Codex (the only provider with the credential-pool/account-limit path needed to surface this accurately).
|
||||
|
||||
### Fixed
|
||||
|
||||
- Onboarding provider endpoint probes now classify DNS-style failures more consistently as `dns`, including `getaddrinfo` failures wrapped by `URLError`/`OSError` and network failures against reserved non-resolvable TLDs such as `.invalid`, `.test`, and `.example`.
|
||||
|
||||
## [v0.51.64] — 2026-05-14 — Release AN (stage-357 — 3-PR small batch — docker_init k8s whoami fallback + PWA manifest session routes (closes #2226) + aux title test coverage)
|
||||
|
||||
### Fixed
|
||||
|
||||
+45
-1
@@ -312,6 +312,44 @@ class _NoRedirectHandler(urllib.request.HTTPRedirectHandler):
|
||||
|
||||
|
||||
_PROBE_OPENER = urllib.request.build_opener(_NoRedirectHandler())
|
||||
_DNS_ONLY_TEST_TLDS = frozenset({"invalid", "test", "example"})
|
||||
|
||||
|
||||
def _hostname_uses_reserved_dns_tld(hostname: str | None) -> bool:
|
||||
host = str(hostname or "").strip().rstrip(".").lower()
|
||||
if not host or "." not in host:
|
||||
return False
|
||||
return host.rsplit(".", 1)[-1] in _DNS_ONLY_TEST_TLDS
|
||||
|
||||
|
||||
def _exception_chain_text(exc) -> str:
|
||||
parts: list[str] = []
|
||||
seen: set[int] = set()
|
||||
cur = exc
|
||||
while cur is not None and id(cur) not in seen:
|
||||
seen.add(id(cur))
|
||||
parts.append(str(cur))
|
||||
cur = getattr(cur, "__cause__", None) or getattr(cur, "__context__", None)
|
||||
return " ".join(parts).lower()
|
||||
|
||||
|
||||
def _probe_failure_is_dns(exc, hostname: str | None) -> bool:
|
||||
if isinstance(exc, socket.gaierror):
|
||||
return True
|
||||
text = _exception_chain_text(exc)
|
||||
if any(
|
||||
marker in text
|
||||
for marker in (
|
||||
"getaddrinfo",
|
||||
"gaierror",
|
||||
"name or service not known",
|
||||
"temporary failure in name resolution",
|
||||
"nodename nor servname provided",
|
||||
"no address associated with hostname",
|
||||
)
|
||||
):
|
||||
return True
|
||||
return _hostname_uses_reserved_dns_tld(hostname)
|
||||
|
||||
|
||||
def probe_provider_endpoint(
|
||||
@@ -416,7 +454,7 @@ def probe_provider_endpoint(
|
||||
reason = exc.reason
|
||||
if isinstance(reason, socket.timeout) or "timed out" in str(reason).lower():
|
||||
return {"ok": False, "error": "timeout", "detail": f"connection timed out after {timeout:g}s"}
|
||||
if isinstance(reason, socket.gaierror):
|
||||
if _probe_failure_is_dns(reason, parsed.hostname):
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "dns",
|
||||
@@ -433,6 +471,12 @@ def probe_provider_endpoint(
|
||||
except (TimeoutError, socket.timeout):
|
||||
return {"ok": False, "error": "timeout", "detail": f"connection timed out after {timeout:g}s"}
|
||||
except Exception as exc: # pragma: no cover — defensive net
|
||||
if _probe_failure_is_dns(exc, parsed.hostname):
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "dns",
|
||||
"detail": f"could not resolve host '{parsed.hostname}'",
|
||||
}
|
||||
logger.debug("probe_provider_endpoint unexpected error", exc_info=True)
|
||||
return {"ok": False, "error": "unreachable", "detail": str(exc)[:200]}
|
||||
|
||||
|
||||
@@ -177,6 +177,40 @@ class TestIssue1499OnboardingProbe:
|
||||
assert r["ok"] is False
|
||||
assert r["error"] == "dns", f"Expected dns error, got {r}"
|
||||
|
||||
def test_dns_failure_wrapped_by_urlerror(self, monkeypatch):
|
||||
"""Proxy/network stacks can wrap DNS failures as generic URLError."""
|
||||
from api import onboarding
|
||||
|
||||
class FakeOpener:
|
||||
def open(self, *_args, **_kwargs):
|
||||
raise urllib.error.URLError(OSError("getaddrinfo failed"))
|
||||
|
||||
monkeypatch.setattr(onboarding, "_PROBE_OPENER", FakeOpener())
|
||||
r = onboarding.probe_provider_endpoint(
|
||||
"lmstudio",
|
||||
"http://model-server.example:1234/v1",
|
||||
timeout=2.0,
|
||||
)
|
||||
assert r["ok"] is False
|
||||
assert r["error"] == "dns", f"Expected dns error, got {r}"
|
||||
|
||||
def test_reserved_dns_tld_network_failure_classifies_as_dns(self, monkeypatch):
|
||||
"""Reserved non-resolvable TLDs stay dns even if the stack says generic."""
|
||||
from api import onboarding
|
||||
|
||||
class FakeOpener:
|
||||
def open(self, *_args, **_kwargs):
|
||||
raise urllib.error.URLError(OSError("network is unreachable"))
|
||||
|
||||
monkeypatch.setattr(onboarding, "_PROBE_OPENER", FakeOpener())
|
||||
r = onboarding.probe_provider_endpoint(
|
||||
"lmstudio",
|
||||
"http://this-host-definitely-does-not-exist-zxq987.invalid:1234/v1",
|
||||
timeout=2.0,
|
||||
)
|
||||
assert r["ok"] is False
|
||||
assert r["error"] == "dns", f"Expected dns error, got {r}"
|
||||
|
||||
def test_connect_refused(self):
|
||||
"""Connecting to a port nobody's listening on → error='connect_refused'."""
|
||||
from api.onboarding import probe_provider_endpoint
|
||||
|
||||
Reference in New Issue
Block a user