diff --git a/CHANGELOG.md b/CHANGELOG.md index 22c34a5d..a35214c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ ### Fixed +- **(closes #2177)** — `resolve_model_provider()` no longer strips the `nvidia/` prefix when the model id's prefix matches the configured NVIDIA NIM provider name. Reported on Discord by @vishnu: selecting `nvidia/nemotron-3-super-120b-a12b` with `provider: nvidia` was returning HTTP 404 because the resolver hit the generic `prefix == config_provider` strip branch and sent the bare `nemotron-3-super-120b-a12b` id, but NIM requires the full namespaced path. Same bug class as #854 / #894 for the Nous portal — the `_PORTAL_PROVIDERS` guard at `api/config.py:1658` was added for NVIDIA but was placed *after* the prefix-strip and never fired in the self-prefix case. Fix moves the guard to run before the strip so portal providers (Nous, OpenCode-Zen, OpenCode-Go, NVIDIA NIM) always preserve the full `provider/model` path regardless of whether the prefix happens to equal the provider name. Includes a new `tests/test_issue2177_nvidia_prefix_preservation.py` regression file covering nvidia self-namespace, cross-namespace (qwen/, meta/), the full static nvidia model list, a latent symmetric Nous-self-prefix case, and a regression-pin that non-portal providers (anthropic) still get their existing strip behavior. + - **PR #2136** by @LumenYoung — Stale stream writebacks no longer poison the active session transcript. `cancel_stream()` intentionally clears `active_stream_id` early so the UI can accept a follow-up turn while an old worker is unwinding — but the old worker could still return later from `run_conversation()` and persist its stale result over the newer transcript, causing visible transcript / turn journal / `state.db` to disagree (especially around cancel+retry on compressed continuations). Adds a single-line ownership check `_stream_writeback_is_current(session, stream_id)` (token equality against `session.active_stream_id`) and short-circuits both finalize paths: the success path in `_run_agent_streaming` and the cancel-handler path in `cancel_stream()`. When the stream no longer owns the writeback, both paths log `Skipping stale stream/cancel writeback` and return cleanly without persisting. 89-line regression suite in `tests/test_stale_stream_writeback.py`; companion updates to `tests/test_issue1361_cancel_data_loss.py` and `tests/test_sprint42.py` for the new return-without-persist behavior. ### Added diff --git a/api/config.py b/api/config.py index 91e50e75..42d63574 100644 --- a/api/config.py +++ b/api/config.py @@ -1644,20 +1644,21 @@ def resolve_model_provider(model_id: str) -> tuple: # anthropic/claude-sonnet-4.6). Never strip the prefix for OpenRouter. if config_provider == "openrouter": return model_id, "openrouter", config_base_url + # Portal providers (Nous, OpenCode, NVIDIA NIM) serve models from multiple + # upstream namespaces — check them BEFORE the prefix-strip branch so that + # a model id whose prefix happens to equal the config_provider (e.g. + # nvidia/nemotron-... on NVIDIA NIM) still keeps the full namespaced path. + # The earlier ordering ran this guard AFTER the prefix-strip, so it never + # fired in the prefix==config_provider case, causing HTTP 404 from the + # portal which requires the full provider/model id (#2177; sibling of + # #854 / #894 for Nous, where this guard was originally added). + _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"} + if config_provider in _PORTAL_PROVIDERS: + return model_id, config_provider, config_base_url # If prefix matches config provider exactly, strip it and use that provider directly. # e.g. config=anthropic, model=anthropic/claude-... → bare name to anthropic API if config_provider and prefix == config_provider: return bare, config_provider, config_base_url - # Portal providers (Nous, OpenCode) serve models from multiple upstream - # namespaces — check them BEFORE the config_base_url branch so that a - # Nous user whose config.yaml also has a base_url doesn't accidentally - # fall into the prefix-stripping path (#894: minimax/minimax-m2.7 → bare - # name sent to Nous → 404 because Nous requires the full namespace path). - # NVIDIA NIM also serves models from multiple namespaces (qwen, nvidia, etc.) - # and requires the full model path. - _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"} - if config_provider in _PORTAL_PROVIDERS: - return model_id, config_provider, config_base_url # The OpenAI Codex provider uses a real base_url, but its default # ChatGPT endpoint cannot serve OpenRouter-style provider/model IDs. # Keep that narrow exception before the custom endpoint protection so diff --git a/tests/test_issue2177_nvidia_prefix_preservation.py b/tests/test_issue2177_nvidia_prefix_preservation.py new file mode 100644 index 00000000..66018e5a --- /dev/null +++ b/tests/test_issue2177_nvidia_prefix_preservation.py @@ -0,0 +1,118 @@ +"""Regression tests for #2177 — resolve_model_provider() strips nvidia/ prefix on NVIDIA NIM. + +The bug: when ``config_provider == "nvidia"`` and the model id is +``nvidia/nemotron-3-super-120b-a12b``, the resolver hit the +``prefix == config_provider`` strip BEFORE the ``_PORTAL_PROVIDERS`` guard and +returned the bare name ``nemotron-3-super-120b-a12b``. NVIDIA NIM's inference +endpoint then 404'd because it requires the full namespaced model id. + +Same bug class as #854 / #894 for the Nous portal. The fix moves the +``_PORTAL_PROVIDERS`` guard to run BEFORE the prefix-strip so portal providers +always preserve the full ``provider/model`` path regardless of whether the +prefix happens to equal the config_provider name. +""" +import api.config as config + + +def _resolve(model_id: str, provider: str): + """Resolve a model under a synthesized config_provider, isolated from disk state.""" + old = dict(config.cfg) + old_mtime = config._cfg_mtime + config.cfg.clear() + config.cfg["model"] = {"provider": provider} + try: + config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime + except Exception: + config._cfg_mtime = 0.0 + try: + return config.resolve_model_provider(model_id) + finally: + config.cfg.clear() + config.cfg.update(old) + config._cfg_mtime = old_mtime + + +class TestNvidiaPortalPrefixPreservation: + """NVIDIA NIM requires the full ``namespace/model`` id — prefix must never be stripped.""" + + def test_nvidia_self_namespace_prefix_preserved(self): + """``nvidia/nemotron-...`` under nvidia provider must keep the nvidia/ prefix. + + This is the exact case in #2177 — prefix == config_provider would otherwise + trigger the strip branch and emit a bare ``nemotron-...`` id that NIM 404s. + """ + model, provider, _ = _resolve("nvidia/nemotron-3-super-120b-a12b", "nvidia") + assert model == "nvidia/nemotron-3-super-120b-a12b", ( + f"prefix was stripped: {model!r} — NVIDIA NIM requires the full namespaced id" + ) + assert provider == "nvidia" + + def test_nvidia_cross_namespace_qwen_prefix_preserved(self): + """``qwen/...`` under nvidia provider keeps the qwen/ prefix (regression pin). + + Cross-namespace already worked because ``prefix != config_provider`` — pinning + the test so a later refactor doesn't regress it. + """ + model, provider, _ = _resolve("qwen/qwen2.5-coder-32b-instruct", "nvidia") + assert model == "qwen/qwen2.5-coder-32b-instruct" + assert provider == "nvidia" + + def test_nvidia_cross_namespace_meta_prefix_preserved(self): + """``meta/llama-...`` under nvidia provider keeps the meta/ prefix.""" + model, provider, _ = _resolve("meta/llama-3.1-70b-instruct", "nvidia") + assert model == "meta/llama-3.1-70b-instruct" + assert provider == "nvidia" + + def test_nvidia_static_models_all_resolve_with_prefix_intact(self): + """Every static nvidia model in ``_PROVIDER_MODELS`` must resolve to itself. + + Pins the contract that the static dropdown list and the resolver agree on + the wire format — same invariant ``test_nous_portal_routing.py`` enforces + for the Nous portal. + """ + nvidia_models = config._PROVIDER_MODELS.get("nvidia", []) + assert nvidia_models, "nvidia must have at least one static model" + for entry in nvidia_models: + mid = entry["id"] + resolved, provider, _ = _resolve(mid, "nvidia") + assert resolved == mid, ( + f"static model {mid!r} resolved to {resolved!r} — " + f"portal must preserve full namespaced id" + ) + assert provider == "nvidia" + + +class TestPortalGuardOrdering: + """The ``_PORTAL_PROVIDERS`` guard must run BEFORE the prefix-strip branch. + + Otherwise, any portal user whose model id starts with the literal provider + name (nvidia/nvidia-..., nous/nous-..., etc.) hits the strip branch and + breaks at the upstream API. This is structural — pinning the ordering so a + future refactor cannot quietly reintroduce the bug. + """ + + def test_hypothetical_nous_self_prefix_preserved(self): + """``nous/`` under nous provider must keep the prefix. + + Latent bug shape — there's no shipped ``nous/`` id today, but if + Nous ever serves a self-prefixed model the resolver must handle it the + same way it handles nvidia/. + """ + model, provider, _ = _resolve("nous/hermes-3-something", "nous") + assert model == "nous/hermes-3-something", ( + f"prefix stripped: {model!r} — portal guard must preserve full id" + ) + assert provider == "nous" + + def test_anthropic_self_prefix_still_strips_for_anthropic(self): + """Non-portal providers (anthropic) keep the existing strip behavior. + + ``anthropic/claude-...`` under the anthropic provider should still + resolve to bare ``claude-...`` — anthropic's API doesn't want the + ``anthropic/`` prefix. Pinning so the fix doesn't over-correct. + """ + model, provider, _ = _resolve("anthropic/claude-opus-4.6", "anthropic") + assert model == "claude-opus-4.6", ( + f"anthropic strip-prefix path broken: got {model!r}, expected 'claude-opus-4.6'" + ) + assert provider == "anthropic"