From 9b1d7864598b6c4cbd4621525373374c128e341f Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Wed, 13 May 2026 07:05:57 +0000 Subject: [PATCH] fix(config): preserve nvidia/ prefix on NVIDIA NIM (closes #2177) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the `_PORTAL_PROVIDERS` guard in `resolve_model_provider()` to run BEFORE the `prefix == config_provider` strip branch. The guard was added for NVIDIA (along with the Nous portal cases in #854 / #894) but was placed after the strip, so it never fired when `config_provider == "nvidia"` and the model id started with `nvidia/`. For `model_id="nvidia/nemotron-3-super-120b-a12b"`, `config_provider="nvidia"`: - prefix = "nvidia", bare = "nemotron-3-super-120b-a12b" - prefix == config_provider → True → strip branch returned bare name - `_PORTAL_PROVIDERS` guard never reached - bare "nemotron-3-super-120b-a12b" sent to NVIDIA NIM → HTTP 404 NIM requires the full namespaced path. The fix moves the portal guard to run first, so all portal providers (Nous, OpenCode-Zen, OpenCode-Go, NVIDIA NIM) always preserve the full `provider/model` id regardless of whether the prefix happens to equal the provider name. This also closes a latent symmetric bug for the Nous case if a `nous/` id ever existed in the catalog. Test plan: - New `tests/test_issue2177_nvidia_prefix_preservation.py` covers: - nvidia/nemotron-... under nvidia (the reported case) - cross-namespace qwen/ and meta/ under nvidia (regression pin) - every static nvidia model in `_PROVIDER_MODELS` resolves to itself - latent nous/ under nous (structural ordering pin) - non-portal providers (anthropic) still strip — fix doesn't over-correct - Existing portal-routing suites (test_nous_portal_routing.py, test_issue895_894_nous_prefix.py) continue to pass. - Full test suite: 5320 passed, 4 skipped, 3 xpassed. Reported on Discord by @vishnu (Nathan forwarded as #2177). --- CHANGELOG.md | 2 + api/config.py | 21 ++-- ...st_issue2177_nvidia_prefix_preservation.py | 118 ++++++++++++++++++ 3 files changed, 131 insertions(+), 10 deletions(-) create mode 100644 tests/test_issue2177_nvidia_prefix_preservation.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d737b24..f9168513 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Fixed +- **(closes #2177)** — `resolve_model_provider()` no longer strips the `nvidia/` prefix when the model id's prefix matches the configured NVIDIA NIM provider name. Reported on Discord by @vishnu: selecting `nvidia/nemotron-3-super-120b-a12b` with `provider: nvidia` was returning HTTP 404 because the resolver hit the generic `prefix == config_provider` strip branch and sent the bare `nemotron-3-super-120b-a12b` id, but NIM requires the full namespaced path. Same bug class as #854 / #894 for the Nous portal — the `_PORTAL_PROVIDERS` guard at `api/config.py:1658` was added for NVIDIA but was placed *after* the prefix-strip and never fired in the self-prefix case. Fix moves the guard to run before the strip so portal providers (Nous, OpenCode-Zen, OpenCode-Go, NVIDIA NIM) always preserve the full `provider/model` path regardless of whether the prefix happens to equal the provider name. Includes a new `tests/test_issue2177_nvidia_prefix_preservation.py` regression file covering nvidia self-namespace, cross-namespace (qwen/, meta/), the full static nvidia model list, a latent symmetric Nous-self-prefix case, and a regression-pin that non-portal providers (anthropic) still get their existing strip behavior. + - **PR #2136** by @LumenYoung — Stale stream writebacks no longer poison the active session transcript. `cancel_stream()` intentionally clears `active_stream_id` early so the UI can accept a follow-up turn while an old worker is unwinding — but the old worker could still return later from `run_conversation()` and persist its stale result over the newer transcript, causing visible transcript / turn journal / `state.db` to disagree (especially around cancel+retry on compressed continuations). Adds a single-line ownership check `_stream_writeback_is_current(session, stream_id)` (token equality against `session.active_stream_id`) and short-circuits both finalize paths: the success path in `_run_agent_streaming` and the cancel-handler path in `cancel_stream()`. When the stream no longer owns the writeback, both paths log `Skipping stale stream/cancel writeback` and return cleanly without persisting. 89-line regression suite in `tests/test_stale_stream_writeback.py`; companion updates to `tests/test_issue1361_cancel_data_loss.py` and `tests/test_sprint42.py` for the new return-without-persist behavior. ### Added diff --git a/api/config.py b/api/config.py index d44c1039..17f3db5b 100644 --- a/api/config.py +++ b/api/config.py @@ -1644,20 +1644,21 @@ def resolve_model_provider(model_id: str) -> tuple: # anthropic/claude-sonnet-4.6). Never strip the prefix for OpenRouter. if config_provider == "openrouter": return model_id, "openrouter", config_base_url + # Portal providers (Nous, OpenCode, NVIDIA NIM) serve models from multiple + # upstream namespaces — check them BEFORE the prefix-strip branch so that + # a model id whose prefix happens to equal the config_provider (e.g. + # nvidia/nemotron-... on NVIDIA NIM) still keeps the full namespaced path. + # The earlier ordering ran this guard AFTER the prefix-strip, so it never + # fired in the prefix==config_provider case, causing HTTP 404 from the + # portal which requires the full provider/model id (#2177; sibling of + # #854 / #894 for Nous, where this guard was originally added). + _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"} + if config_provider in _PORTAL_PROVIDERS: + return model_id, config_provider, config_base_url # If prefix matches config provider exactly, strip it and use that provider directly. # e.g. config=anthropic, model=anthropic/claude-... → bare name to anthropic API if config_provider and prefix == config_provider: return bare, config_provider, config_base_url - # Portal providers (Nous, OpenCode) serve models from multiple upstream - # namespaces — check them BEFORE the config_base_url branch so that a - # Nous user whose config.yaml also has a base_url doesn't accidentally - # fall into the prefix-stripping path (#894: minimax/minimax-m2.7 → bare - # name sent to Nous → 404 because Nous requires the full namespace path). - # NVIDIA NIM also serves models from multiple namespaces (qwen, nvidia, etc.) - # and requires the full model path. - _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"} - if config_provider in _PORTAL_PROVIDERS: - return model_id, config_provider, config_base_url # The OpenAI Codex provider uses a real base_url, but its default # ChatGPT endpoint cannot serve OpenRouter-style provider/model IDs. # Keep that narrow exception before the custom endpoint protection so diff --git a/tests/test_issue2177_nvidia_prefix_preservation.py b/tests/test_issue2177_nvidia_prefix_preservation.py new file mode 100644 index 00000000..66018e5a --- /dev/null +++ b/tests/test_issue2177_nvidia_prefix_preservation.py @@ -0,0 +1,118 @@ +"""Regression tests for #2177 — resolve_model_provider() strips nvidia/ prefix on NVIDIA NIM. + +The bug: when ``config_provider == "nvidia"`` and the model id is +``nvidia/nemotron-3-super-120b-a12b``, the resolver hit the +``prefix == config_provider`` strip BEFORE the ``_PORTAL_PROVIDERS`` guard and +returned the bare name ``nemotron-3-super-120b-a12b``. NVIDIA NIM's inference +endpoint then 404'd because it requires the full namespaced model id. + +Same bug class as #854 / #894 for the Nous portal. The fix moves the +``_PORTAL_PROVIDERS`` guard to run BEFORE the prefix-strip so portal providers +always preserve the full ``provider/model`` path regardless of whether the +prefix happens to equal the config_provider name. +""" +import api.config as config + + +def _resolve(model_id: str, provider: str): + """Resolve a model under a synthesized config_provider, isolated from disk state.""" + old = dict(config.cfg) + old_mtime = config._cfg_mtime + config.cfg.clear() + config.cfg["model"] = {"provider": provider} + try: + config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime + except Exception: + config._cfg_mtime = 0.0 + try: + return config.resolve_model_provider(model_id) + finally: + config.cfg.clear() + config.cfg.update(old) + config._cfg_mtime = old_mtime + + +class TestNvidiaPortalPrefixPreservation: + """NVIDIA NIM requires the full ``namespace/model`` id — prefix must never be stripped.""" + + def test_nvidia_self_namespace_prefix_preserved(self): + """``nvidia/nemotron-...`` under nvidia provider must keep the nvidia/ prefix. + + This is the exact case in #2177 — prefix == config_provider would otherwise + trigger the strip branch and emit a bare ``nemotron-...`` id that NIM 404s. + """ + model, provider, _ = _resolve("nvidia/nemotron-3-super-120b-a12b", "nvidia") + assert model == "nvidia/nemotron-3-super-120b-a12b", ( + f"prefix was stripped: {model!r} — NVIDIA NIM requires the full namespaced id" + ) + assert provider == "nvidia" + + def test_nvidia_cross_namespace_qwen_prefix_preserved(self): + """``qwen/...`` under nvidia provider keeps the qwen/ prefix (regression pin). + + Cross-namespace already worked because ``prefix != config_provider`` — pinning + the test so a later refactor doesn't regress it. + """ + model, provider, _ = _resolve("qwen/qwen2.5-coder-32b-instruct", "nvidia") + assert model == "qwen/qwen2.5-coder-32b-instruct" + assert provider == "nvidia" + + def test_nvidia_cross_namespace_meta_prefix_preserved(self): + """``meta/llama-...`` under nvidia provider keeps the meta/ prefix.""" + model, provider, _ = _resolve("meta/llama-3.1-70b-instruct", "nvidia") + assert model == "meta/llama-3.1-70b-instruct" + assert provider == "nvidia" + + def test_nvidia_static_models_all_resolve_with_prefix_intact(self): + """Every static nvidia model in ``_PROVIDER_MODELS`` must resolve to itself. + + Pins the contract that the static dropdown list and the resolver agree on + the wire format — same invariant ``test_nous_portal_routing.py`` enforces + for the Nous portal. + """ + nvidia_models = config._PROVIDER_MODELS.get("nvidia", []) + assert nvidia_models, "nvidia must have at least one static model" + for entry in nvidia_models: + mid = entry["id"] + resolved, provider, _ = _resolve(mid, "nvidia") + assert resolved == mid, ( + f"static model {mid!r} resolved to {resolved!r} — " + f"portal must preserve full namespaced id" + ) + assert provider == "nvidia" + + +class TestPortalGuardOrdering: + """The ``_PORTAL_PROVIDERS`` guard must run BEFORE the prefix-strip branch. + + Otherwise, any portal user whose model id starts with the literal provider + name (nvidia/nvidia-..., nous/nous-..., etc.) hits the strip branch and + breaks at the upstream API. This is structural — pinning the ordering so a + future refactor cannot quietly reintroduce the bug. + """ + + def test_hypothetical_nous_self_prefix_preserved(self): + """``nous/`` under nous provider must keep the prefix. + + Latent bug shape — there's no shipped ``nous/`` id today, but if + Nous ever serves a self-prefixed model the resolver must handle it the + same way it handles nvidia/. + """ + model, provider, _ = _resolve("nous/hermes-3-something", "nous") + assert model == "nous/hermes-3-something", ( + f"prefix stripped: {model!r} — portal guard must preserve full id" + ) + assert provider == "nous" + + def test_anthropic_self_prefix_still_strips_for_anthropic(self): + """Non-portal providers (anthropic) keep the existing strip behavior. + + ``anthropic/claude-...`` under the anthropic provider should still + resolve to bare ``claude-...`` — anthropic's API doesn't want the + ``anthropic/`` prefix. Pinning so the fix doesn't over-correct. + """ + model, provider, _ = _resolve("anthropic/claude-opus-4.6", "anthropic") + assert model == "claude-opus-4.6", ( + f"anthropic strip-prefix path broken: got {model!r}, expected 'claude-opus-4.6'" + ) + assert provider == "anthropic"