From 512c401e8a43f4efb25777c5e6966483af53437f Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Fri, 15 May 2026 12:16:23 -0700 Subject: [PATCH] fix: route endpoint-discovered Ollama models correctly --- CHANGELOG.md | 2 + api/config.py | 31 ++++++++++++++ ...test_resolve_model_provider_free_suffix.py | 41 +++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 674486f1..ce20b3de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Fixed +- LAN Ollama models selected from endpoint-discovered `custom:-` / `custom::` picker entries now route through the configured `ollama` provider and base URL instead of surfacing a missing `CUSTOM_*_API_KEY` error. Refs #2271. + - **PR #2279** by @franksong2702 (closes #2262 + refs #2168) — WebUI stream completion recovery gaps closed for both `notify_on_complete` background tasks and the preserved-task-list compression marker UI. Pre-fix, completions held in the agent process registry were never drained by the WebUI gateway session because the gateway session platform was unset. The fix routes the completion queue by process session key before injecting any notification into a WebUI turn. Separately, the preserved-task-list compression marker — an internal sentinel — was sometimes the only assistant text rendered after a context compression turn timed out, leaving a confusing "preserved tasks" message with no actual response. The frontend now suppresses the marker when it's the only assistant content and the run state is terminal. - **PR #2299** by @starship-s — Background workers (title generation, manual session compression, update-summary generation) now correctly inherit profile-scoped configuration when a profile-scoped chat triggers them. Pre-fix, those workers read default-profile configuration instead of the session/request profile, so auxiliary model routing silently used the wrong configured model or failed provider resolution entirely. The fix threads the active profile context through `_run_background_title_update`, `_run_background_title_refresh`, and the manual compression and update-summary helpers, with regression tests covering all three paths. diff --git a/api/config.py b/api/config.py index cad66b46..46b377f0 100644 --- a/api/config.py +++ b/api/config.py @@ -925,6 +925,30 @@ def _normalize_base_url_for_match(value: object) -> str: return f"{scheme}://{netloc}{path}" +def _custom_endpoint_slugs_for_base_url(value: object) -> set[str]: + """Return custom provider slugs that WebUI may derive from a base URL. + + Model picker values for endpoint-discovered models have historically used + both ``custom::`` and ``custom:-`` forms. When the + active config already names a local-server provider such as Ollama for that + same base URL, those endpoint slugs are just UI routing hints and should + resolve back to the configured provider rather than requiring a CUSTOM_* API + key. + """ + url = str(value or "").strip().rstrip("/") + if not url: + return set() + parsed_url = urlparse(url if "://" in url else f"http://{url}") + host = (parsed_url.hostname or "").strip().lower() + if not host: + return set() + port = parsed_url.port + if port is None: + scheme = (parsed_url.scheme or "http").lower() + port = 443 if scheme == "https" else 80 + return {f"custom:{host}:{port}", f"custom:{host}-{port}"} + + def _named_custom_provider_slug_for_base_url( base_url: object, config_obj: dict | None = None, @@ -1681,6 +1705,13 @@ def resolve_model_provider(model_id: str) -> tuple: and provider_hint not in _PROVIDER_DISPLAY and not provider_hint.startswith("custom:")): provider_hint, bare_model = inner.split(":", 1) + if ( + provider_hint.startswith("custom:") + and config_base_url + and _is_local_server_provider(config_provider) + and provider_hint.lower() in _custom_endpoint_slugs_for_base_url(config_base_url) + ): + return bare_model, config_provider, config_base_url return bare_model, provider_hint, _get_provider_base_url(provider_hint) if "/" in model_id: diff --git a/tests/test_resolve_model_provider_free_suffix.py b/tests/test_resolve_model_provider_free_suffix.py index 8798b71e..040c58b4 100644 --- a/tests/test_resolve_model_provider_free_suffix.py +++ b/tests/test_resolve_model_provider_free_suffix.py @@ -208,3 +208,44 @@ def test_model_with_provider_context_custom_ipv4_port_roundtrip(): assert model == "Qwen3-235B" finally: cfg_mod.cfg["model"] = old + + +def test_endpoint_custom_slug_matching_ollama_base_url_uses_ollama_provider(): + """Issue #2271: endpoint-derived custom slugs must not force CUSTOM_* keys.""" + import api.config as cfg_mod + + old = dict(cfg_mod.cfg.get("model", {})) + cfg_mod.cfg["model"] = { + "provider": "ollama", + "default": "ministral-3:latest", + "base_url": "http://lan-box.local:11434/v1", + } + try: + model, provider, base_url = resolve_model_provider( + "@custom:lan-box.local-11434:ministral-3:latest" + ) + assert model == "ministral-3:latest" + assert provider == "ollama" + assert base_url == "http://lan-box.local:11434/v1" + finally: + cfg_mod.cfg["model"] = old + + +def test_endpoint_custom_colon_slug_matching_ollama_base_url_uses_ollama_provider(): + import api.config as cfg_mod + + old = dict(cfg_mod.cfg.get("model", {})) + cfg_mod.cfg["model"] = { + "provider": "ollama", + "default": "llama3.2", + "base_url": "http://ollama.internal:11434/v1", + } + try: + model, provider, base_url = resolve_model_provider( + "@custom:ollama.internal:11434:llama3.2" + ) + assert model == "llama3.2" + assert provider == "ollama" + assert base_url == "http://ollama.internal:11434/v1" + finally: + cfg_mod.cfg["model"] = old