mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 11:10:18 +00:00
Merge pull request #2179 into stage-347
fix(config): preserve nvidia/ prefix on NVIDIA NIM (closes #2177) Self-built. nesquena APPROVED with extensive end-to-end trace including cross-tool agent CLI verification and 12-shape behavioural harness.
This commit is contained in:
@@ -36,6 +36,8 @@
|
||||
|
||||
### Fixed
|
||||
|
||||
- **(closes #2177)** — `resolve_model_provider()` no longer strips the `nvidia/` prefix when the model id's prefix matches the configured NVIDIA NIM provider name. Reported on Discord by @vishnu: selecting `nvidia/nemotron-3-super-120b-a12b` with `provider: nvidia` was returning HTTP 404 because the resolver hit the generic `prefix == config_provider` strip branch and sent the bare `nemotron-3-super-120b-a12b` id, but NIM requires the full namespaced path. Same bug class as #854 / #894 for the Nous portal — the `_PORTAL_PROVIDERS` guard at `api/config.py:1658` was added for NVIDIA but was placed *after* the prefix-strip and never fired in the self-prefix case. Fix moves the guard to run before the strip so portal providers (Nous, OpenCode-Zen, OpenCode-Go, NVIDIA NIM) always preserve the full `provider/model` path regardless of whether the prefix happens to equal the provider name. Includes a new `tests/test_issue2177_nvidia_prefix_preservation.py` regression file covering nvidia self-namespace, cross-namespace (qwen/, meta/), the full static nvidia model list, a latent symmetric Nous-self-prefix case, and a regression-pin that non-portal providers (anthropic) still get their existing strip behavior.
|
||||
|
||||
- **PR #2136** by @LumenYoung — Stale stream writebacks no longer poison the active session transcript. `cancel_stream()` intentionally clears `active_stream_id` early so the UI can accept a follow-up turn while an old worker is unwinding — but the old worker could still return later from `run_conversation()` and persist its stale result over the newer transcript, causing visible transcript / turn journal / `state.db` to disagree (especially around cancel+retry on compressed continuations). Adds a single-line ownership check `_stream_writeback_is_current(session, stream_id)` (token equality against `session.active_stream_id`) and short-circuits both finalize paths: the success path in `_run_agent_streaming` and the cancel-handler path in `cancel_stream()`. When the stream no longer owns the writeback, both paths log `Skipping stale stream/cancel writeback` and return cleanly without persisting. 89-line regression suite in `tests/test_stale_stream_writeback.py`; companion updates to `tests/test_issue1361_cancel_data_loss.py` and `tests/test_sprint42.py` for the new return-without-persist behavior.
|
||||
|
||||
### Added
|
||||
|
||||
+11
-10
@@ -1644,20 +1644,21 @@ def resolve_model_provider(model_id: str) -> tuple:
|
||||
# anthropic/claude-sonnet-4.6). Never strip the prefix for OpenRouter.
|
||||
if config_provider == "openrouter":
|
||||
return model_id, "openrouter", config_base_url
|
||||
# Portal providers (Nous, OpenCode, NVIDIA NIM) serve models from multiple
|
||||
# upstream namespaces — check them BEFORE the prefix-strip branch so that
|
||||
# a model id whose prefix happens to equal the config_provider (e.g.
|
||||
# nvidia/nemotron-... on NVIDIA NIM) still keeps the full namespaced path.
|
||||
# The earlier ordering ran this guard AFTER the prefix-strip, so it never
|
||||
# fired in the prefix==config_provider case, causing HTTP 404 from the
|
||||
# portal which requires the full provider/model id (#2177; sibling of
|
||||
# #854 / #894 for Nous, where this guard was originally added).
|
||||
_PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"}
|
||||
if config_provider in _PORTAL_PROVIDERS:
|
||||
return model_id, config_provider, config_base_url
|
||||
# If prefix matches config provider exactly, strip it and use that provider directly.
|
||||
# e.g. config=anthropic, model=anthropic/claude-... → bare name to anthropic API
|
||||
if config_provider and prefix == config_provider:
|
||||
return bare, config_provider, config_base_url
|
||||
# Portal providers (Nous, OpenCode) serve models from multiple upstream
|
||||
# namespaces — check them BEFORE the config_base_url branch so that a
|
||||
# Nous user whose config.yaml also has a base_url doesn't accidentally
|
||||
# fall into the prefix-stripping path (#894: minimax/minimax-m2.7 → bare
|
||||
# name sent to Nous → 404 because Nous requires the full namespace path).
|
||||
# NVIDIA NIM also serves models from multiple namespaces (qwen, nvidia, etc.)
|
||||
# and requires the full model path.
|
||||
_PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"}
|
||||
if config_provider in _PORTAL_PROVIDERS:
|
||||
return model_id, config_provider, config_base_url
|
||||
# The OpenAI Codex provider uses a real base_url, but its default
|
||||
# ChatGPT endpoint cannot serve OpenRouter-style provider/model IDs.
|
||||
# Keep that narrow exception before the custom endpoint protection so
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
"""Regression tests for #2177 — resolve_model_provider() strips nvidia/ prefix on NVIDIA NIM.
|
||||
|
||||
The bug: when ``config_provider == "nvidia"`` and the model id is
|
||||
``nvidia/nemotron-3-super-120b-a12b``, the resolver hit the
|
||||
``prefix == config_provider`` strip BEFORE the ``_PORTAL_PROVIDERS`` guard and
|
||||
returned the bare name ``nemotron-3-super-120b-a12b``. NVIDIA NIM's inference
|
||||
endpoint then 404'd because it requires the full namespaced model id.
|
||||
|
||||
Same bug class as #854 / #894 for the Nous portal. The fix moves the
|
||||
``_PORTAL_PROVIDERS`` guard to run BEFORE the prefix-strip so portal providers
|
||||
always preserve the full ``provider/model`` path regardless of whether the
|
||||
prefix happens to equal the config_provider name.
|
||||
"""
|
||||
import api.config as config
|
||||
|
||||
|
||||
def _resolve(model_id: str, provider: str):
|
||||
"""Resolve a model under a synthesized config_provider, isolated from disk state."""
|
||||
old = dict(config.cfg)
|
||||
old_mtime = config._cfg_mtime
|
||||
config.cfg.clear()
|
||||
config.cfg["model"] = {"provider": provider}
|
||||
try:
|
||||
config._cfg_mtime = config.Path(config._get_config_path()).stat().st_mtime
|
||||
except Exception:
|
||||
config._cfg_mtime = 0.0
|
||||
try:
|
||||
return config.resolve_model_provider(model_id)
|
||||
finally:
|
||||
config.cfg.clear()
|
||||
config.cfg.update(old)
|
||||
config._cfg_mtime = old_mtime
|
||||
|
||||
|
||||
class TestNvidiaPortalPrefixPreservation:
|
||||
"""NVIDIA NIM requires the full ``namespace/model`` id — prefix must never be stripped."""
|
||||
|
||||
def test_nvidia_self_namespace_prefix_preserved(self):
|
||||
"""``nvidia/nemotron-...`` under nvidia provider must keep the nvidia/ prefix.
|
||||
|
||||
This is the exact case in #2177 — prefix == config_provider would otherwise
|
||||
trigger the strip branch and emit a bare ``nemotron-...`` id that NIM 404s.
|
||||
"""
|
||||
model, provider, _ = _resolve("nvidia/nemotron-3-super-120b-a12b", "nvidia")
|
||||
assert model == "nvidia/nemotron-3-super-120b-a12b", (
|
||||
f"prefix was stripped: {model!r} — NVIDIA NIM requires the full namespaced id"
|
||||
)
|
||||
assert provider == "nvidia"
|
||||
|
||||
def test_nvidia_cross_namespace_qwen_prefix_preserved(self):
|
||||
"""``qwen/...`` under nvidia provider keeps the qwen/ prefix (regression pin).
|
||||
|
||||
Cross-namespace already worked because ``prefix != config_provider`` — pinning
|
||||
the test so a later refactor doesn't regress it.
|
||||
"""
|
||||
model, provider, _ = _resolve("qwen/qwen2.5-coder-32b-instruct", "nvidia")
|
||||
assert model == "qwen/qwen2.5-coder-32b-instruct"
|
||||
assert provider == "nvidia"
|
||||
|
||||
def test_nvidia_cross_namespace_meta_prefix_preserved(self):
|
||||
"""``meta/llama-...`` under nvidia provider keeps the meta/ prefix."""
|
||||
model, provider, _ = _resolve("meta/llama-3.1-70b-instruct", "nvidia")
|
||||
assert model == "meta/llama-3.1-70b-instruct"
|
||||
assert provider == "nvidia"
|
||||
|
||||
def test_nvidia_static_models_all_resolve_with_prefix_intact(self):
|
||||
"""Every static nvidia model in ``_PROVIDER_MODELS`` must resolve to itself.
|
||||
|
||||
Pins the contract that the static dropdown list and the resolver agree on
|
||||
the wire format — same invariant ``test_nous_portal_routing.py`` enforces
|
||||
for the Nous portal.
|
||||
"""
|
||||
nvidia_models = config._PROVIDER_MODELS.get("nvidia", [])
|
||||
assert nvidia_models, "nvidia must have at least one static model"
|
||||
for entry in nvidia_models:
|
||||
mid = entry["id"]
|
||||
resolved, provider, _ = _resolve(mid, "nvidia")
|
||||
assert resolved == mid, (
|
||||
f"static model {mid!r} resolved to {resolved!r} — "
|
||||
f"portal must preserve full namespaced id"
|
||||
)
|
||||
assert provider == "nvidia"
|
||||
|
||||
|
||||
class TestPortalGuardOrdering:
|
||||
"""The ``_PORTAL_PROVIDERS`` guard must run BEFORE the prefix-strip branch.
|
||||
|
||||
Otherwise, any portal user whose model id starts with the literal provider
|
||||
name (nvidia/nvidia-..., nous/nous-..., etc.) hits the strip branch and
|
||||
breaks at the upstream API. This is structural — pinning the ordering so a
|
||||
future refactor cannot quietly reintroduce the bug.
|
||||
"""
|
||||
|
||||
def test_hypothetical_nous_self_prefix_preserved(self):
|
||||
"""``nous/<model>`` under nous provider must keep the prefix.
|
||||
|
||||
Latent bug shape — there's no shipped ``nous/<model>`` id today, but if
|
||||
Nous ever serves a self-prefixed model the resolver must handle it the
|
||||
same way it handles nvidia/.
|
||||
"""
|
||||
model, provider, _ = _resolve("nous/hermes-3-something", "nous")
|
||||
assert model == "nous/hermes-3-something", (
|
||||
f"prefix stripped: {model!r} — portal guard must preserve full id"
|
||||
)
|
||||
assert provider == "nous"
|
||||
|
||||
def test_anthropic_self_prefix_still_strips_for_anthropic(self):
|
||||
"""Non-portal providers (anthropic) keep the existing strip behavior.
|
||||
|
||||
``anthropic/claude-...`` under the anthropic provider should still
|
||||
resolve to bare ``claude-...`` — anthropic's API doesn't want the
|
||||
``anthropic/`` prefix. Pinning so the fix doesn't over-correct.
|
||||
"""
|
||||
model, provider, _ = _resolve("anthropic/claude-opus-4.6", "anthropic")
|
||||
assert model == "claude-opus-4.6", (
|
||||
f"anthropic strip-prefix path broken: got {model!r}, expected 'claude-opus-4.6'"
|
||||
)
|
||||
assert provider == "anthropic"
|
||||
Reference in New Issue
Block a user