From 9b1d7864598b6c4cbd4621525373374c128e341f Mon Sep 17 00:00:00 2001
From: nesquena-hermes <nesquena-hermes@users.noreply.github.com>
Date: Wed, 13 May 2026 07:05:57 +0000
Subject: [PATCH] fix(config): preserve nvidia/ prefix on NVIDIA NIM (closes
 #2177)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the `_PORTAL_PROVIDERS` guard in `resolve_model_provider()` to run
BEFORE the `prefix == config_provider` strip branch. The guard was added
for NVIDIA (along with the Nous portal cases in #854 / #894) but was
placed after the strip, so it never fired when `config_provider == "nvidia"`
and the model id started with `nvidia/`.

For `model_id="nvidia/nemotron-3-super-120b-a12b"`,
`config_provider="nvidia"`:
  - prefix = "nvidia", bare = "nemotron-3-super-120b-a12b"
  - prefix == config_provider → True → strip branch returned bare name
  - `_PORTAL_PROVIDERS` guard never reached
  - bare "nemotron-3-super-120b-a12b" sent to NVIDIA NIM → HTTP 404

NIM requires the full namespaced path. The fix moves the portal guard
to run first, so all portal providers (Nous, OpenCode-Zen, OpenCode-Go,
NVIDIA NIM) always preserve the full `provider/model` id regardless of
whether the prefix happens to equal the provider name.

This also closes a latent symmetric bug for the Nous case if a
`nous/<model>` id ever existed in the catalog.

Test plan:
- New `tests/test_issue2177_nvidia_prefix_preservation.py` covers:
  - nvidia/nemotron-... under nvidia (the reported case)
  - cross-namespace qwen/ and meta/ under nvidia (regression pin)
  - every static nvidia model in `_PROVIDER_MODELS` resolves to itself
  - latent nous/<model> under nous (structural ordering pin)
  - non-portal providers (anthropic) still strip — fix doesn't over-correct
- Existing portal-routing suites (test_nous_portal_routing.py,
  test_issue895_894_nous_prefix.py) continue to pass.
- Full test suite: 5320 passed, 4 skipped, 3 xpassed.

Reported on Discord by @vishnu (Nathan forwarded as #2177).
---
 CHANGELOG.md                                  |   2 +
 api/config.py                                 |  21 ++--
 ...st_issue2177_nvidia_prefix_preservation.py | 118 ++++++++++++++++++
 3 files changed, 131 insertions(+), 10 deletions(-)
 create mode 100644 tests/test_issue2177_nvidia_prefix_preservation.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d737b24..f9168513 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@
 
 ### Fixed
 
+- **(closes #2177)** — `resolve_model_provider()` no longer strips the `nvidia/` prefix when the model id's prefix matches the configured NVIDIA NIM provider name. Reported on Discord by @vishnu: selecting `nvidia/nemotron-3-super-120b-a12b` with `provider: nvidia` was returning HTTP 404 because the resolver hit the generic `prefix == config_provider` strip branch and sent the bare `nemotron-3-super-120b-a12b` id, but NIM requires the full namespaced path. Same bug class as #854 / #894 for the Nous portal — the `_PORTAL_PROVIDERS` guard at `api/config.py:1658` was added for NVIDIA but was placed *after* the prefix-strip and never fired in the self-prefix case. Fix moves the guard to run before the strip so portal providers (Nous, OpenCode-Zen, OpenCode-Go, NVIDIA NIM) always preserve the full `provider/model` path regardless of whether the prefix happens to equal the provider name. Includes a new `tests/test_issue2177_nvidia_prefix_preservation.py` regression file covering nvidia self-namespace, cross-namespace (qwen/, meta/), the full static nvidia model list, a latent symmetric Nous-self-prefix case, and a regression-pin that non-portal providers (anthropic) still get their existing strip behavior.
+
 - **PR #2136** by @LumenYoung — Stale stream writebacks no longer poison the active session transcript. `cancel_stream()` intentionally clears `active_stream_id` early so the UI can accept a follow-up turn while an old worker is unwinding — but the old worker could still return later from `run_conversation()` and persist its stale result over the newer transcript, causing visible transcript / turn journal / `state.db` to disagree (especially around cancel+retry on compressed continuations). Adds a single-line ownership check `_stream_writeback_is_current(session, stream_id)` (token equality against `session.active_stream_id`) and short-circuits both finalize paths: the success path in `_run_agent_streaming` and the cancel-handler path in `cancel_stream()`. When the stream no longer owns the writeback, both paths log `Skipping stale stream/cancel writeback` and return cleanly without persisting. 89-line regression suite in `tests/test_stale_stream_writeback.py`; companion updates to `tests/test_issue1361_cancel_data_loss.py` and `tests/test_sprint42.py` for the new return-without-persist behavior.
 
 ### Added
diff --git a/api/config.py b/api/config.py
index d44c1039..17f3db5b 100644
--- a/api/config.py
+++ b/api/config.py
@@ -1644,20 +1644,21 @@ def resolve_model_provider(model_id: str) -> tuple:
         # anthropic/claude-sonnet-4.6). Never strip the prefix for OpenRouter.
         if config_provider == "openrouter":
             return model_id, "openrouter", config_base_url
+        # Portal providers (Nous, OpenCode, NVIDIA NIM) serve models from multiple
+        # upstream namespaces — check them BEFORE the prefix-strip branch so that
+        # a model id whose prefix happens to equal the config_provider (e.g.
+        # nvidia/nemotron-... on NVIDIA NIM) still keeps the full namespaced path.
+        # The earlier ordering ran this guard AFTER the prefix-strip, so it never
+        # fired in the prefix==config_provider case, causing HTTP 404 from the
+        # portal which requires the full provider/model id (#2177; sibling of
+        # #854 / #894 for Nous, where this guard was originally added).
+        _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"}
+        if config_provider in _PORTAL_PROVIDERS:
+            return model_id, config_provider, config_base_url
         # If prefix matches config provider exactly, strip it and use that provider directly.
         # e.g. config=anthropic, model=anthropic/claude-... → bare name to anthropic API
         if config_provider and prefix == config_provider:
             return bare, config_provider, config_base_url
-        # Portal providers (Nous, OpenCode) serve models from multiple upstream
-        # namespaces — check them BEFORE the config_base_url branch so that a
-        # Nous user whose config.yaml also has a base_url doesn't accidentally
-        # fall into the prefix-stripping path (#894: minimax/minimax-m2.7 → bare
-        # name sent to Nous → 404 because Nous requires the full namespace path).
-        # NVIDIA NIM also serves models from multiple namespaces (qwen, nvidia, etc.)
-        # and requires the full model path.
-        _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"}
-        if config_provider in _PORTAL_PROVIDERS:
-            return model_id, config_provider, config_base_url
         # The OpenAI Codex provider uses a real base_url, but its default
         # ChatGPT endpoint cannot serve OpenRouter-style provider/model IDs.
         # Keep that narrow exception before the custom endpoint protection so
diff --git a/tests/test_issue2177_nvidia_prefix_preservation.py b/tests/test_issue2177_nvidia_prefix_preservation.py
new file mode 100644
index 00000000..66018e5a
--- /dev/null
+++ b/tests/test_issue2177_nvidia_prefix_preservation.py
@@ -0,0 +1,118 @@
+"""Regression tests for #2177 — resolve_model_provider() strips nvidia/ prefix on NVIDIA NIM.
+
+The bug: when ``config_provider == "nvidia"`` and the model id is
+``nvidia/nemotron-3-super-120b-a12b``, the resolver hit the
+``prefix == config_provider`` strip BEFORE the ``_PORTAL_PROVIDERS`` guard and
+returned the bare name ``nemotron-3-super-120b-a12b``. NVIDIA NIM's inference
+endpoint then 404'd because it requires the full namespaced model id.
+
+Same bug class as #854 / #894 for the Nous portal. The fix moves the
+``_PORTAL_PROVIDERS`` guard to run BEFORE the prefix-strip so portal providers
+always preserve the full ``provider/model`` path regardless of whether the
+prefix happens to equal the config_provider name.
+"""
+import api.config as config
+
+
+def _resolve(model_id: str, provider: str):
+    """Resolve a model under a synthesized config_provider, isolated from disk state."""
+    old = dict(config.cfg)
+    old_mtime = config._cfg_mtime
+    config.cfg.clear()
+    config.cfg["model"] = {"provider": provider}
+    try:
+        config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime
+    except Exception:
+        config._cfg_mtime = 0.0
+    try:
+        return config.resolve_model_provider(model_id)
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old)
+        config._cfg_mtime = old_mtime
+
+
+class TestNvidiaPortalPrefixPreservation:
+    """NVIDIA NIM requires the full ``namespace/model`` id — prefix must never be stripped."""
+
+    def test_nvidia_self_namespace_prefix_preserved(self):
+        """``nvidia/nemotron-...`` under nvidia provider must keep the nvidia/ prefix.
+
+        This is the exact case in #2177 — prefix == config_provider would otherwise
+        trigger the strip branch and emit a bare ``nemotron-...`` id that NIM 404s.
+        """
+        model, provider, _ = _resolve("nvidia/nemotron-3-super-120b-a12b", "nvidia")
+        assert model == "nvidia/nemotron-3-super-120b-a12b", (
+            f"prefix was stripped: {model!r} — NVIDIA NIM requires the full namespaced id"
+        )
+        assert provider == "nvidia"
+
+    def test_nvidia_cross_namespace_qwen_prefix_preserved(self):
+        """``qwen/...`` under nvidia provider keeps the qwen/ prefix (regression pin).
+
+        Cross-namespace already worked because ``prefix != config_provider`` — pinning
+        the test so a later refactor doesn't regress it.
+        """
+        model, provider, _ = _resolve("qwen/qwen2.5-coder-32b-instruct", "nvidia")
+        assert model == "qwen/qwen2.5-coder-32b-instruct"
+        assert provider == "nvidia"
+
+    def test_nvidia_cross_namespace_meta_prefix_preserved(self):
+        """``meta/llama-...`` under nvidia provider keeps the meta/ prefix."""
+        model, provider, _ = _resolve("meta/llama-3.1-70b-instruct", "nvidia")
+        assert model == "meta/llama-3.1-70b-instruct"
+        assert provider == "nvidia"
+
+    def test_nvidia_static_models_all_resolve_with_prefix_intact(self):
+        """Every static nvidia model in ``_PROVIDER_MODELS`` must resolve to itself.
+
+        Pins the contract that the static dropdown list and the resolver agree on
+        the wire format — same invariant ``test_nous_portal_routing.py`` enforces
+        for the Nous portal.
+        """
+        nvidia_models = config._PROVIDER_MODELS.get("nvidia", [])
+        assert nvidia_models, "nvidia must have at least one static model"
+        for entry in nvidia_models:
+            mid = entry["id"]
+            resolved, provider, _ = _resolve(mid, "nvidia")
+            assert resolved == mid, (
+                f"static model {mid!r} resolved to {resolved!r} — "
+                f"portal must preserve full namespaced id"
+            )
+            assert provider == "nvidia"
+
+
+class TestPortalGuardOrdering:
+    """The ``_PORTAL_PROVIDERS`` guard must run BEFORE the prefix-strip branch.
+
+    Otherwise, any portal user whose model id starts with the literal provider
+    name (nvidia/nvidia-..., nous/nous-..., etc.) hits the strip branch and
+    breaks at the upstream API. This is structural — pinning the ordering so a
+    future refactor cannot quietly reintroduce the bug.
+    """
+
+    def test_hypothetical_nous_self_prefix_preserved(self):
+        """``nous/<model>`` under nous provider must keep the prefix.
+
+        Latent bug shape — there's no shipped ``nous/<model>`` id today, but if
+        Nous ever serves a self-prefixed model the resolver must handle it the
+        same way it handles nvidia/.
+        """
+        model, provider, _ = _resolve("nous/hermes-3-something", "nous")
+        assert model == "nous/hermes-3-something", (
+            f"prefix stripped: {model!r} — portal guard must preserve full id"
+        )
+        assert provider == "nous"
+
+    def test_anthropic_self_prefix_still_strips_for_anthropic(self):
+        """Non-portal providers (anthropic) keep the existing strip behavior.
+
+        ``anthropic/claude-...`` under the anthropic provider should still
+        resolve to bare ``claude-...`` — anthropic's API doesn't want the
+        ``anthropic/`` prefix. Pinning so the fix doesn't over-correct.
+        """
+        model, provider, _ = _resolve("anthropic/claude-opus-4.6", "anthropic")
+        assert model == "claude-opus-4.6", (
+            f"anthropic strip-prefix path broken: got {model!r}, expected 'claude-opus-4.6'"
+        )
+        assert provider == "anthropic"