From bdc328d0346f563ba68a8bc7e2a6c3b31c1b3783 Mon Sep 17 00:00:00 2001 From: starship-s <45587122+starship-s@users.noreply.github.com> Date: Thu, 30 Apr 2026 23:23:47 -0600 Subject: [PATCH 1/8] fix: preserve webui model provider context Persist session model_provider separately from model IDs so active/default provider selections like gpt-5.5 remain bare while routing through OpenAI Codex. Keep @provider:model for picker disambiguation and runtime bridging, and preserve explicit OpenRouter plus custom/proxy base_url routing. --- api/config.py | 50 ++++++ api/models.py | 8 +- api/routes.py | 277 +++++++++++++++++++++++++----- api/streaming.py | 23 ++- static/boot.js | 39 +++-- static/messages.js | 18 +- static/panels.js | 12 +- static/sessions.js | 24 ++- static/ui.js | 87 ++++++++-- tests/test_provider_mismatch.py | 292 ++++++++++++++++++++++++++++++++ 10 files changed, 757 insertions(+), 73 deletions(-) diff --git a/api/config.py b/api/config.py index 45694f40..eaf9cd59 100644 --- a/api/config.py +++ b/api/config.py @@ -1018,6 +1018,20 @@ def resolve_model_provider(model_id: str) -> tuple: _PORTAL_PROVIDERS = {"nous", "opencode-zen", "opencode-go", "nvidia"} if config_provider in _PORTAL_PROVIDERS: return model_id, config_provider, config_base_url + # The OpenAI Codex provider uses a real base_url, but its default + # ChatGPT endpoint cannot serve OpenRouter-style provider/model IDs. + # Keep that narrow exception before the custom endpoint protection so + # selecting openai/gpt-5.5 from OpenRouter under active Codex still + # routes through OpenRouter. Other base_url-backed real providers may be + # custom/proxy endpoints, so they must fall through to the branch below. + if ( + config_provider == "openai-codex" + and str(config_base_url or "").strip().rstrip("/") + == "https://chatgpt.com/backend-api/codex" + and prefix in _PROVIDER_MODELS + and prefix != config_provider + ): + return model_id, "openrouter", None # If a custom endpoint base_url is configured, don't reroute through OpenRouter # just because the model name contains a slash (e.g. google/gemma-4-26b-a4b). # The user has explicitly pointed at a base_url, so trust their routing config. @@ -1040,6 +1054,42 @@ def resolve_model_provider(model_id: str) -> tuple: return model_id, config_provider, config_base_url +def model_with_provider_context(model_id: str, model_provider: str | None = None) -> str: + """Return the model string to pass to ``resolve_model_provider()``. + + Session persistence keeps the user's selected provider in ``model_provider`` + instead of forcing every selected model into ``@provider:model`` form. At + runtime, however, ``resolve_model_provider()`` still understands that + internal disambiguation form, so use it only when the provider context is + needed to route away from the current default provider. + """ + model = str(model_id or "").strip() + provider = str(model_provider or "").strip().lower() + if not model or not provider or provider == "default" or model.startswith("@"): + return model + + model_cfg = cfg.get("model", {}) + config_provider = None + if isinstance(model_cfg, dict): + config_provider = str(model_cfg.get("provider") or "").strip().lower() + + # If the selected provider is already the configured provider, leaving the + # model bare preserves provider-specific base_url/proxy settings. + if provider == config_provider: + return model + + # OpenRouter selections with slash IDs are explicit provider/model paths. + if provider == "openrouter": + return f"@{provider}:{model}" + + # For non-OpenRouter slash IDs, keep the ID intact so existing custom/proxy + # base_url routing and portal-provider handling remain in charge. + if "/" in model: + return model + + return f"@{provider}:{model}" + + def get_effective_default_model(config_data: dict | None = None) -> str: """Resolve the effective Hermes default model from config, then env overrides.""" active_cfg = config_data if config_data is not None else cfg diff --git a/api/models.py b/api/models.py index 8e8d604d..d6fe0679 100644 --- a/api/models.py +++ b/api/models.py @@ -306,6 +306,7 @@ def _lookup_index_message_count(session_id): class Session: def __init__(self, session_id: str=None, title: str='Untitled', workspace=str(DEFAULT_WORKSPACE), model=DEFAULT_MODEL, + model_provider=None, messages=None, created_at=None, updated_at=None, tool_calls=None, pinned: bool=False, archived: bool=False, project_id: str=None, profile=None, @@ -325,6 +326,7 @@ class Session: self.title = title self.workspace = str(Path(workspace).expanduser().resolve()) self.model = model + self.model_provider = str(model_provider).strip().lower() if model_provider else None self.messages = messages or [] self.tool_calls = tool_calls or [] self.created_at = created_at or time.time() @@ -364,7 +366,7 @@ class Session: # without parsing the full messages array (which may be 400KB+). # Fields are listed in the order they should appear in the JSON file. METADATA_FIELDS = [ - 'session_id', 'title', 'workspace', 'model', 'created_at', 'updated_at', + 'session_id', 'title', 'workspace', 'model', 'model_provider', 'created_at', 'updated_at', 'pinned', 'archived', 'project_id', 'profile', 'input_tokens', 'output_tokens', 'estimated_cost', 'personality', 'active_stream_id', @@ -445,6 +447,7 @@ class Session: 'title': self.title, 'workspace': self.workspace, 'model': self.model, + 'model_provider': self.model_provider, 'message_count': ( self._metadata_message_count if self._metadata_message_count is not None @@ -696,7 +699,7 @@ def get_session(sid, metadata_only=False): return s raise KeyError(sid) -def new_session(workspace=None, model=None, profile=None): +def new_session(workspace=None, model=None, profile=None, model_provider=None): """Create a new in-memory session. The session lives in the SESSIONS dict only — no disk write happens until @@ -730,6 +733,7 @@ def new_session(workspace=None, model=None, profile=None): s = Session( workspace=workspace or get_last_workspace(), model=effective_model, + model_provider=model_provider, profile=profile, ) with LOCK: diff --git a/api/routes.py b/api/routes.py index 03ed0c62..8d5bb386 100644 --- a/api/routes.py +++ b/api/routes.py @@ -174,6 +174,7 @@ from api.config import ( load_settings, save_settings, set_hermes_default_model, + model_with_provider_context, get_reasoning_status, set_reasoning_display, set_reasoning_effort, @@ -356,20 +357,78 @@ def _model_matches_active_provider_family( return False -def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: - """Return (effective_model, was_normalized) for persisted session models. +def _catalog_model_id_matches(candidate: str, model: str) -> bool: + candidate = str(candidate or "").strip() + if candidate.startswith("@") and ":" in candidate: + candidate = candidate.rsplit(":", 1)[1] + if "/" in candidate: + candidate = candidate.split("/", 1)[1] + return candidate.replace("-", ".").lower() == model.replace("-", ".").lower() + + +def _clean_session_model_provider(value: str | None) -> str | None: + provider = str(value or "").strip().lower() + if not provider or provider == "default": + return None + if provider.startswith("@"): + provider = provider[1:] + return provider or None + + +def _split_provider_qualified_model(model: str) -> tuple[str, str | None]: + model = str(model or "").strip() + if model.startswith("@") and ":" in model: + provider_hint, bare_model = model[1:].rsplit(":", 1) + provider = _clean_session_model_provider(provider_hint) + bare = bare_model.strip() + if provider and bare: + return bare, provider + return model, None + + +def _should_attach_codex_provider_context(model: str, raw_active_provider: str, catalog: dict) -> bool: + """Return True when a bare Codex model needs separate provider context. + + OpenAI, OpenAI Codex, Copilot, and OpenRouter can all expose GPT-looking + bare names. If a session stores only ``gpt-...`` while Codex is active, a + later provider-list/default-model round trip can lose the user's Codex + choice. Store the provider separately instead of converting the persisted + model to ``@openai-codex:model``. + """ + if raw_active_provider != "openai-codex": + return False + if not model.lower().startswith("gpt"): + return False + for group in catalog.get("groups") or []: + if str(group.get("provider_id") or "").strip().lower() != "openai-codex": + continue + return any( + _catalog_model_id_matches(entry.get("id"), model) + for entry in group.get("models", []) + if isinstance(entry, dict) + ) + return False + + +def _resolve_compatible_session_model_state( + model_id: str | None, + model_provider: str | None = None, +) -> tuple[str, str | None, bool]: + """Return (effective_model, effective_provider, model_was_normalized). Sessions can outlive provider changes. When an older session still points at a different provider namespace (for example `gemini/...` after switching the agent to OpenAI Codex), reusing that stale model causes chat startup to hit - the wrong backend and fail. Normalize only obvious cross-provider mismatches; - preserve bare model IDs and OpenRouter/custom setups. + the wrong backend and fail. Normalize only obvious cross-provider mismatches. + When a model has an explicit provider context, keep the model string itself + in its picker/API shape and carry the provider as separate state. """ catalog = get_available_models() default_model = str(catalog.get("default_model") or DEFAULT_MODEL or "").strip() model = str(model_id or "").strip() + requested_provider = _clean_session_model_provider(model_provider) if not model: - return default_model, bool(default_model) + return default_model, requested_provider, bool(default_model) active_provider = _normalize_provider_id(catalog.get("active_provider")) # Also keep the raw active_provider slug for cross-provider detection with @@ -379,15 +438,19 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: # is stale relative to this unknown active provider. (#1023) raw_active_provider = str(catalog.get("active_provider") or "").strip().lower() if not active_provider and not raw_active_provider: - return model, False + bare_model, explicit_provider = _split_provider_qualified_model(model) + return model, explicit_provider or requested_provider, False + + bare_for_context, explicit_provider = _split_provider_qualified_model(model) + if requested_provider and not explicit_provider: + return model, requested_provider, False if model.startswith("@") and ":" in model: - provider_hint, bare_model = model[1:].split(":", 1) - provider_raw = provider_hint.strip().lower() + provider_raw = explicit_provider or "" provider_normalized = _normalize_provider_id(provider_raw) - bare_model = bare_model.strip() + bare_model = bare_for_context.strip() if not provider_raw or not bare_model: - return model, False + return model, requested_provider, False raw_provider_ids, normalized_provider_ids = _catalog_provider_id_sets(catalog) hint_matches_active = ( @@ -403,7 +466,7 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: # here would collapse duplicate model IDs from different providers back to the # bare ID, causing the first matching provider to win on the next UI render # and the wrong provider to be used for the agent run. (#1253) - return model, False + return model, provider_raw, False if _catalog_has_provider( provider_raw, @@ -411,13 +474,23 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: raw_provider_ids, normalized_provider_ids, ): - return model, False + return model, provider_raw, False if _model_matches_active_provider_family(bare_model, active_provider): - return bare_model, True + provider_context = ( + raw_active_provider + if _should_attach_codex_provider_context(bare_model, raw_active_provider, catalog) + else None + ) + return bare_model, provider_context, True if default_model: - return default_model, True - return model, False + provider_context = ( + raw_active_provider + if _should_attach_codex_provider_context(default_model, raw_active_provider, catalog) + else None + ) + return default_model, provider_context, True + return model, provider_raw, False slash = model.find("/") if slash < 0: @@ -426,9 +499,19 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: if model_lower.startswith(bare_prefix): model_provider = _normalize_provider_id(bare_prefix) if model_provider and model_provider != active_provider and default_model: - return default_model, True - return model, False - return model, False + provider_context = ( + raw_active_provider + if _should_attach_codex_provider_context(default_model, raw_active_provider, catalog) + else None + ) + return default_model, provider_context, True + provider_context = ( + raw_active_provider + if _should_attach_codex_provider_context(model, raw_active_provider, catalog) + else requested_provider + ) + return model, provider_context, False + return model, requested_provider, False model_provider = _normalize_provider_id(model[:slash]) @@ -440,7 +523,7 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: if active_provider in {"custom", "openrouter"}: # These namespaces are always routable as-is — preserve them. if model_provider in {"", "custom", "openrouter"}: - return model, False + return model, requested_provider, False # Check if any catalog group can actually route this model's prefix. groups = catalog.get("groups") or [] routable_provider_ids = { @@ -451,11 +534,11 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: (g.get("provider_id") or "") == "openrouter" for g in groups ) if model_provider in routable_provider_ids or has_openrouter_group: - return model, False + return model, requested_provider, False # Model prefix is not routable — stale cross-provider reference, clear it. if default_model: - return default_model, True - return model, False + return default_model, requested_provider, True + return model, requested_provider, False # Skip normalization for models on custom/openrouter namespaces — these are # user-controlled and should never be silently replaced. @@ -465,18 +548,35 @@ def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: # active provider name, the session model is stale. (#1023) _active_for_compare = active_provider or raw_active_provider if model_provider and model_provider not in {"", "custom", "openrouter"} and model_provider != _active_for_compare and default_model: - return default_model, True - return model, False + return default_model, requested_provider, True + return model, requested_provider, False + + +def _resolve_compatible_session_model(model_id: str | None) -> tuple[str, bool]: + """Return (effective_model, model_was_normalized) for legacy callers.""" + effective_model, _provider, changed = _resolve_compatible_session_model_state(model_id) + return effective_model, changed def _normalize_session_model_in_place(session) -> str: original_model = getattr(session, "model", None) or "" - effective_model, changed = _resolve_compatible_session_model(original_model or None) + original_provider = _clean_session_model_provider( + getattr(session, "model_provider", None) + ) + effective_model, effective_provider, changed = _resolve_compatible_session_model_state( + original_model or None, + original_provider, + ) + provider_changed = effective_provider != original_provider # Only persist the correction if the session had an explicit model that needed changing. # Sessions with no model stored (empty/None) get the effective default returned without # a disk write — no need to rebuild the index for a fill-in-blank operation. - if changed and effective_model and original_model and original_model != effective_model: - session.model = effective_model + if original_model and effective_model and ( + (changed and original_model != effective_model) or provider_changed + ): + if changed and original_model != effective_model: + session.model = effective_model + session.model_provider = effective_provider session.save(touch_updated_at=False) return effective_model @@ -489,10 +589,46 @@ def _resolve_effective_session_model_for_display(session) -> str: effective model for the response payload only and leave disk state alone. """ original_model = getattr(session, "model", None) or "" - effective_model, _changed = _resolve_compatible_session_model(original_model or None) + effective_model, _provider, _changed = _resolve_compatible_session_model_state( + original_model or None, + getattr(session, "model_provider", None), + ) return effective_model or original_model +def _resolve_effective_session_model_provider_for_display(session) -> str | None: + original_model = getattr(session, "model", None) or "" + _model, provider, _changed = _resolve_compatible_session_model_state( + original_model or None, + getattr(session, "model_provider", None), + ) + return provider + + +def _session_model_state_from_request( + model: str | None, + requested_provider: str | None, + current_provider: str | None = None, +) -> tuple[str | None, str | None]: + model_value = str(model).strip() if model is not None else None + provider = ( + _clean_session_model_provider(requested_provider) + if requested_provider is not None + else None + ) + if model_value: + _bare, explicit_provider = _split_provider_qualified_model(model_value) + if explicit_provider: + provider = explicit_provider + elif requested_provider is None: + provider = _clean_session_model_provider(current_provider) + model_value, provider, _changed = _resolve_compatible_session_model_state( + model_value, + provider, + ) + return model_value, provider + + from api.models import ( Session, get_session, @@ -992,6 +1128,11 @@ def handle_get(handler, parsed) -> bool: if resolve_model else None ) + effective_provider = ( + _resolve_effective_session_model_provider_for_display(s) + if resolve_model + else None + ) _t3 = _time.monotonic() _all_msgs = s.messages if load_messages else [] if load_messages: @@ -1039,6 +1180,8 @@ def handle_get(handler, parsed) -> bool: _t4 = _time.monotonic() if effective_model: raw["model"] = effective_model + if effective_provider: + raw["model_provider"] = effective_provider redact = redact_session_data(raw) _t5 = _time.monotonic() resp = j(handler, {"session": redact}) @@ -1414,9 +1557,18 @@ def handle_post(handler, parsed) -> bool: workspace = str(resolve_trusted_workspace(body.get("workspace"))) if body.get("workspace") else None except ValueError as e: return bad(handler, str(e)) + model, model_provider = _session_model_state_from_request( + body.get("model"), + body.get("model_provider"), + ) # Use the profile sent by the client tab (if any) so that two tabs on # different profiles never clobber each other via the process-level global. - s = new_session(workspace=workspace, model=body.get("model"), profile=body.get("profile") or None) + s = new_session( + workspace=workspace, + model=model, + model_provider=model_provider, + profile=body.get("profile") or None, + ) return j(handler, {"session": s.compact() | {"messages": s.messages}}) if parsed.path == "/api/default-model": @@ -1569,7 +1721,15 @@ def handle_post(handler, parsed) -> bool: return bad(handler, str(e)) with _get_session_agent_lock(body["session_id"]): s.workspace = new_ws - s.model = body.get("model", s.model) + if "model" in body or "model_provider" in body: + model, provider = _session_model_state_from_request( + body.get("model", s.model), + body.get("model_provider") if "model_provider" in body else None, + getattr(s, "model_provider", None), + ) + if model is not None: + s.model = model + s.model_provider = provider s.save() if str(old_ws or "") != str(new_ws or ""): try: @@ -3348,7 +3508,13 @@ def _handle_btw(handler, body): s.active_stream_id = None # Create ephemeral hidden session inheriting context from api.models import new_session as _new_session - ephemeral = _new_session(workspace=s.workspace, model=s.model, profile=getattr(s, 'profile', None)) + model_provider = getattr(s, 'model_provider', None) + ephemeral = _new_session( + workspace=s.workspace, + model=s.model, + model_provider=model_provider, + profile=getattr(s, 'profile', None), + ) # Copy conversation history for context (agent reads from messages) ephemeral.messages = list(s.messages or []) ephemeral.title = f"btw: {question[:60]}" @@ -3364,7 +3530,7 @@ def _handle_btw(handler, body): thr = threading.Thread( target=_run_agent_streaming, args=(ephemeral.session_id, question, s.model, s.workspace, stream_id, None), - kwargs={"ephemeral": True}, + kwargs={"ephemeral": True, "model_provider": model_provider}, daemon=True, ) thr.start() @@ -3390,7 +3556,13 @@ def _handle_background(handler, body): if not prompt: return bad(handler, "prompt is required") from api.models import new_session as _new_session - bg = _new_session(workspace=s.workspace, model=s.model, profile=getattr(s, 'profile', None)) + model_provider = getattr(s, 'model_provider', None) + bg = _new_session( + workspace=s.workspace, + model=s.model, + model_provider=model_provider, + profile=getattr(s, 'profile', None), + ) bg.title = f"bg: {prompt[:60]}" bg.save() stream_id = uuid.uuid4().hex @@ -3412,7 +3584,15 @@ def _handle_background(handler, body): `get_results()` would see a forever-`running` task and return nothing. """ try: - _run_agent_streaming(bg_sid, prompt, s.model, s.workspace, stream_id, None) + _run_agent_streaming( + bg_sid, + prompt, + s.model, + s.workspace, + stream_id, + None, + model_provider=model_provider, + ) # Reload the bg session from disk and extract the final assistant reply. try: from api.models import Session as _Session @@ -3466,7 +3646,15 @@ def _handle_chat_start(handler, body): except ValueError as e: return bad(handler, str(e)) requested_model = body.get("model") or s.model - model, normalized_model = _resolve_compatible_session_model(requested_model) + requested_provider = ( + body.get("model_provider") + if "model_provider" in body + else getattr(s, "model_provider", None) + ) + model, model_provider, normalized_model = _resolve_compatible_session_model_state( + requested_model, + requested_provider, + ) # Prevent duplicate runs in the same session while a stream is still active. # This commonly happens after page refresh/reconnect races and can produce # duplicated clarify cards for what appears to be a single user request. @@ -3489,6 +3677,7 @@ def _handle_chat_start(handler, body): with _get_session_agent_lock(s.session_id): s.workspace = workspace s.model = model + s.model_provider = model_provider s.active_stream_id = stream_id s.pending_user_message = msg s.pending_attachments = attachments @@ -3501,12 +3690,15 @@ def _handle_chat_start(handler, body): thr = threading.Thread( target=_run_agent_streaming, args=(s.session_id, msg, model, workspace, stream_id, attachments), + kwargs={"model_provider": model_provider}, daemon=True, ) thr.start() response = {"stream_id": stream_id, "session_id": s.session_id} if normalized_model: response["effective_model"] = model + if model_provider: + response["effective_model_provider"] = model_provider return j(handler, response) @@ -3552,7 +3744,12 @@ def _handle_chat_sync(handler, body): return bad(handler, str(e)) with _get_session_agent_lock(s.session_id): s.workspace = workspace - s.model = body.get("model") or s.model + model, model_provider = _resolve_compatible_session_model_state( + body.get("model") or s.model, + body.get("model_provider") if "model_provider" in body else getattr(s, "model_provider", None), + )[:2] + s.model = model + s.model_provider = model_provider from api.streaming import _ENV_LOCK with _ENV_LOCK: @@ -3568,7 +3765,9 @@ def _handle_chat_sync(handler, body): with CHAT_LOCK: from api.config import resolve_model_provider - _model, _provider, _base_url = resolve_model_provider(s.model) + _model, _provider, _base_url = resolve_model_provider( + model_with_provider_context(s.model, getattr(s, "model_provider", None)) + ) # Resolve API key via Hermes runtime provider (matches gateway behaviour) _api_key = None try: @@ -4231,7 +4430,9 @@ def _handle_session_compress(handler, body): import hermes_cli.runtime_provider as _runtime_provider import run_agent as _run_agent - resolved_model, resolved_provider, resolved_base_url = _cfg.resolve_model_provider(s.model) + resolved_model, resolved_provider, resolved_base_url = _cfg.resolve_model_provider( + _cfg.model_with_provider_context(s.model, getattr(s, "model_provider", None)) + ) resolved_api_key = None try: diff --git a/api/streaming.py b/api/streaming.py index 4ed12b83..1abbea64 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -26,6 +26,7 @@ from api.config import ( _get_session_agent_lock, _set_thread_env, _clear_thread_env, SESSION_AGENT_LOCKS, SESSION_AGENT_LOCKS_LOCK, resolve_model_provider, + model_with_provider_context, ) from api.helpers import redact_session_data from api.metering import meter @@ -1342,7 +1343,17 @@ def _last_resort_sync_from_core(session, stream_id, agent_lock): ) -def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, attachments=None, *, ephemeral=False): +def _run_agent_streaming( + session_id, + msg_text, + model, + workspace, + stream_id, + attachments=None, + *, + ephemeral=False, + model_provider=None, +): """Run agent in background thread, writing SSE events to STREAMS[stream_id]. When ephemeral=True, session mutations are skipped — used by /btw to get @@ -1418,6 +1429,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta s = get_session(session_id) s.workspace = str(Path(workspace).expanduser().resolve()) s.model = model + provider_context = ( + str(model_provider).strip().lower() + if model_provider is not None + else getattr(s, "model_provider", None) + ) + s.model_provider = provider_context or None _agent_lock = _get_session_agent_lock(session_id) # TD1: set thread-local env context so concurrent sessions don't clobber globals @@ -1701,7 +1718,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta _session_db = SessionDB() except Exception as _db_err: print(f"[webui] WARNING: SessionDB init failed — session_search will be unavailable: {_db_err}", flush=True) - resolved_model, resolved_provider, resolved_base_url = resolve_model_provider(model) + resolved_model, resolved_provider, resolved_base_url = resolve_model_provider( + model_with_provider_context(model, provider_context) + ) # Resolve API key via Hermes runtime provider (matches gateway behaviour). # Pass the resolved provider so non-default providers get their own credentials. diff --git a/static/boot.js b/static/boot.js index f3cf8ea6..4bf1be25 100644 --- a/static/boot.js +++ b/static/boot.js @@ -463,21 +463,31 @@ $('btnClearPreview').onclick=handleWorkspaceClose; $('modelSelect').onchange=async()=>{ if(!S.session)return; const selectedModel=$('modelSelect').value; + const modelState=(typeof _modelStateForSelect==='function') + ? _modelStateForSelect($('modelSelect'),selectedModel) + : {model:selectedModel,model_provider:null}; if(typeof closeModelDropdown==='function') closeModelDropdown(); - localStorage.setItem('hermes-webui-model', selectedModel); - await api('/api/session/update',{method:'POST',body:JSON.stringify({session_id:S.session.session_id,workspace:S.session.workspace,model:selectedModel})}); - S.session.model=selectedModel; + if(typeof _writePersistedModelState==='function') _writePersistedModelState(modelState.model,modelState.model_provider); + else localStorage.setItem('hermes-webui-model', modelState.model); + await api('/api/session/update',{method:'POST',body:JSON.stringify({ + session_id:S.session.session_id, + workspace:S.session.workspace, + model:modelState.model, + model_provider:modelState.model_provider||null, + })}); + S.session.model=modelState.model; + S.session.model_provider=modelState.model_provider||null; if(typeof syncModelChip==='function') syncModelChip(); syncTopbar(); + // Clarify scope: composer model changes are session-local, not the global default. + if(typeof showToast==='function'){ + showToast(t('model_scope_toast')||'Applies to this conversation from your next message.', 3000); + } // Warn if selected model belongs to a different provider than what Hermes is configured for if(typeof _checkProviderMismatch==='function'){ const warn=_checkProviderMismatch(selectedModel); if(warn&&typeof showToast==='function') showToast(warn,4000); } - // Clarify scope: composer model changes are session-local, not the global default. - if(typeof showToast==='function'){ - showToast(t('model_scope_toast')||'Applies to this conversation from your next message.', 3000); - } }; $('msg').addEventListener('input',()=>{ autoResize(); @@ -913,11 +923,20 @@ function applyBotName(){ // options are enough for first paint; the dynamic provider list can settle // after the saved session is visible. const _modelDropdownReady=populateModelDropdown().then(()=>{ - const savedModel=localStorage.getItem('hermes-webui-model'); + const savedState=(typeof _readPersistedModelState==='function') + ? _readPersistedModelState() + : (localStorage.getItem('hermes-webui-model')?{model:localStorage.getItem('hermes-webui-model'),model_provider:null}:null); + const savedModel=savedState&&savedState.model; if(savedModel && $('modelSelect')){ - $('modelSelect').value=savedModel; + const applied=(typeof _applyModelToDropdown==='function') + ? _applyModelToDropdown(savedModel,$('modelSelect'),savedState.model_provider||null) + : null; + if(!applied) $('modelSelect').value=savedModel; // If the value didn't take (model not in list), clear the bad pref - if($('modelSelect').value!==savedModel) localStorage.removeItem('hermes-webui-model'); + if(!applied&&$('modelSelect').value!==savedModel){ + if(typeof _clearPersistedModelState==='function') _clearPersistedModelState(); + else localStorage.removeItem('hermes-webui-model'); + } else if(typeof syncModelChip==='function') syncModelChip(); } if(S.session) syncTopbar(); diff --git a/static/messages.js b/static/messages.js index 7bbf8c19..8974b4b5 100644 --- a/static/messages.js +++ b/static/messages.js @@ -82,7 +82,7 @@ async function send(){ S.pendingFiles=[];renderTray(); } else if(busyMode==='interrupt'){ // Queue the message, then cancel so drain re-sends it. - queueSessionMessage(S.session.session_id,{text,files:[...S.pendingFiles],model:S.session&&S.session.model||($('modelSelect')&&$('modelSelect').value)||'',profile:S.activeProfile||'default'}); + queueSessionMessage(S.session.session_id,{text,files:[...S.pendingFiles],model:S.session&&S.session.model||($('modelSelect')&&$('modelSelect').value)||'',model_provider:S.session&&S.session.model_provider||null,profile:S.activeProfile||'default'}); updateQueueBadge(S.session.session_id); $('msg').value='';autoResize(); S.pendingFiles=[];renderTray(); @@ -95,7 +95,7 @@ async function send(){ } else { // Default: queue mode (current behavior). Also the fallback for // 'steer' mode when no stream is active or _trySteer is unavailable. - queueSessionMessage(S.session.session_id,{text,files:[...S.pendingFiles],model:S.session&&S.session.model||($('modelSelect')&&$('modelSelect').value)||'',profile:S.activeProfile||'default'}); + queueSessionMessage(S.session.session_id,{text,files:[...S.pendingFiles],model:S.session&&S.session.model||($('modelSelect')&&$('modelSelect').value)||'',model_provider:S.session&&S.session.model_provider||null,profile:S.activeProfile||'default'}); $('msg').value='';autoResize(); S.pendingFiles=[];renderTray(); updateQueueBadge(S.session.session_id); @@ -190,12 +190,21 @@ async function send(){ const startData=await api('/api/chat/start',{method:'POST',body:JSON.stringify({ session_id:activeSid,message:msgText, model:S.session.model||$('modelSelect').value,workspace:S.session.workspace, + model_provider:S.session.model_provider||null, attachments:uploaded.length?uploaded:undefined })}); if(startData.effective_model && S.session){ S.session.model=startData.effective_model; + S.session.model_provider=startData.effective_model_provider||S.session.model_provider||null; localStorage.setItem('hermes-webui-model', startData.effective_model); - if($('modelSelect')) _applyModelToDropdown(startData.effective_model, $('modelSelect')); + if(typeof _writePersistedModelState==='function') _writePersistedModelState(startData.effective_model,S.session.model_provider||null); + if($('modelSelect')) _applyModelToDropdown(startData.effective_model, $('modelSelect'),S.session.model_provider||null); + if(typeof syncTopbar==='function') syncTopbar(); + }else if(startData.effective_model_provider && S.session){ + S.session.model_provider=startData.effective_model_provider; + if(typeof _writePersistedModelState==='function') _writePersistedModelState(S.session.model||'',S.session.model_provider||null); + if($('modelSelect')&&typeof _applyModelToDropdown==='function') _applyModelToDropdown(S.session.model||'', $('modelSelect'), S.session.model_provider||null); + if(typeof syncModelChip==='function') syncModelChip(); if(typeof syncTopbar==='function') syncTopbar(); } streamId=startData.stream_id; @@ -221,7 +230,7 @@ async function send(){ stopApprovalPolling(); stopClarifyPolling(); // Keep the user's attempted turn by queueing it for after the current run. - queueSessionMessage(activeSid,{text:msgText,files:[],model:S.session&&S.session.model||($('modelSelect')&&$('modelSelect').value)||'',profile:S.activeProfile||'default'}); + queueSessionMessage(activeSid,{text:msgText,files:[],model:S.session&&S.session.model||($('modelSelect')&&$('modelSelect').value)||'',model_provider:S.session&&S.session.model_provider||null,profile:S.activeProfile||'default'}); updateQueueBadge(activeSid); showToast('Current session is still running. Reconnected and queued your message.',2600); try{ @@ -886,6 +895,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ queueSessionMessage(sid,{ text:txt,files:[], model:S.session&&S.session.model||'', + model_provider:S.session&&S.session.model_provider||null, profile:S.activeProfile||'default', }); if(typeof updateQueueBadge==='function') updateQueueBadge(sid); diff --git a/static/panels.js b/static/panels.js index 2a1bea70..621985e7 100644 --- a/static/panels.js +++ b/static/panels.js @@ -1983,7 +1983,7 @@ async function switchToWorkspace(path,name){ try{ closeWsDropdown(); await api('/api/session/update',{method:'POST',body:JSON.stringify({ - session_id:S.session.session_id, workspace:path, model:S.session.model + session_id:S.session.session_id, workspace:path, model:S.session.model, model_provider:S.session.model_provider||null })}); S.session.workspace=path; // Explicit workspace switch = user overriding any pending profile-switch default. @@ -2236,7 +2236,8 @@ async function switchToProfile(name) { // ── Model + Workspace (parallelized) ─────────────────────────────────── // populateModelDropdown hits /api/models; loadWorkspaceList hits /api/workspaces. // They are fully independent — run both simultaneously to cut switch time ~50%. - localStorage.removeItem('hermes-webui-model'); + if(typeof _clearPersistedModelState==='function') _clearPersistedModelState(); + else localStorage.removeItem('hermes-webui-model'); _skillsData = null; _workspaceList = null; await Promise.all([populateModelDropdown(), loadWorkspaceList()]); @@ -2246,10 +2247,15 @@ async function switchToProfile(name) { const sel = $('modelSelect'); const resolved = _applyModelToDropdown(data.default_model, sel, window._activeProvider||null); const modelToUse = resolved || data.default_model; + const modelState = (typeof _modelStateForSelect==='function') + ? _modelStateForSelect(sel, modelToUse) + : {model:modelToUse,model_provider:null}; S._pendingProfileModel = modelToUse; + S._pendingProfileModelProvider = modelState.model_provider||null; // Only patch the in-memory session model if we're NOT about to replace the session if (S.session && !sessionInProgress) { S.session.model = modelToUse; + S.session.model_provider = modelState.model_provider||null; } } @@ -2269,6 +2275,7 @@ async function switchToProfile(name) { session_id: S.session.session_id, workspace: data.default_workspace, model: S.session.model, + model_provider: S.session.model_provider||null, })}); S.session.workspace = data.default_workspace; } catch (_) {} @@ -2289,6 +2296,7 @@ async function switchToProfile(name) { session_id: S.session.session_id, workspace: S._profileDefaultWorkspace, model: S.session.model, + model_provider: S.session.model_provider||null, })}); S.session.workspace = S._profileDefaultWorkspace; } catch (_) {} diff --git a/static/sessions.js b/static/sessions.js index a75cb38e..bebeb27d 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -276,8 +276,22 @@ async function newSession(flash){ // default_model (from Settings) takes priority over the chat-header dropdown // value, which reflects the *previous* session's model. Fall back to the // dropdown value only when no default_model is configured. - const newModel=window._defaultModel||$('modelSelect').value; - const data=await api('/api/session/new',{method:'POST',body:JSON.stringify({model:newModel,workspace:inheritWs,profile:S.activeProfile||'default'})}); + const modelSel=$('modelSelect'); + const selectedDefaultModel=window._defaultModel||(modelSel&&modelSel.value)||''; + let defaultApplied=false; + if(window._defaultModel&&modelSel&&typeof _applyModelToDropdown==='function'){ + defaultApplied=!!_applyModelToDropdown(window._defaultModel,modelSel,window._activeProvider||null); + } + const canQualify=!window._defaultModel||defaultApplied||(modelSel&&modelSel.value===selectedDefaultModel); + const newModelState=(canQualify&&typeof _modelStateForSelect==='function') + ? _modelStateForSelect(modelSel,selectedDefaultModel) + : {model:selectedDefaultModel,model_provider:null}; + const data=await api('/api/session/new',{method:'POST',body:JSON.stringify({ + model:newModelState.model, + model_provider:newModelState.model_provider||null, + workspace:inheritWs, + profile:S.activeProfile||'default', + })}); S.session=data.session;S.messages=data.session.messages||[]; S.lastUsage={...(data.session.last_usage||{})}; if(flash)S.session._flash=true; @@ -286,7 +300,7 @@ async function newSession(flash){ // Sync chat-header dropdown to the session's model so the UI reflects // the default model the server actually used (#872). if(S.session.model && S.session.model!==$('modelSelect').value && typeof _applyModelToDropdown==='function'){ - _applyModelToDropdown(S.session.model,$('modelSelect')); + _applyModelToDropdown(S.session.model,$('modelSelect'),S.session.model_provider||null); if(typeof syncModelChip==='function') syncModelChip(); } // Reset per-session visual state: a fresh chat is idle even if another @@ -513,8 +527,10 @@ function _resolveSessionModelForDisplaySoon(sid){ try{ const data=await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0&resolve_model=1`); const model=data&&data.session&&data.session.model; + const provider=data&&data.session&&data.session.model_provider; if(!model||!S.session||S.session.session_id!==sid) return; S.session.model=model; + S.session.model_provider=provider||null; S.session._modelResolutionDeferred=false; syncTopbar(); }catch(_){ @@ -860,7 +876,7 @@ function _openSessionActionMenu(session, anchorEl){ async()=>{ closeSessionActionMenu(); try{ - const res=await api('/api/session/new',{method:'POST',body:JSON.stringify({workspace:session.workspace,model:session.model})}); + const res=await api('/api/session/new',{method:'POST',body:JSON.stringify({workspace:session.workspace,model:session.model,model_provider:session.model_provider||null})}); if(res.session){ await api('/api/session/rename',{method:'POST',body:JSON.stringify({session_id:res.session.session_id,title:(session.title||'Untitled')+' (copy)'})}); await loadSession(res.session.session_id); diff --git a/static/ui.js b/static/ui.js index 06d44933..f274a279 100644 --- a/static/ui.js +++ b/static/ui.js @@ -225,6 +225,7 @@ setTimeout(_initMediaPlaybackObserver,0); // Dynamic model labels -- populated by populateModelDropdown(), fallback to static map let _dynamicModelLabels={}; window._configuredModelBadges=window._configuredModelBadges||{}; +const MODEL_STATE_KEY='hermes-webui-model-state'; // ── Smart model resolver ──────────────────────────────────────────────────── // Finds the best matching option value in a + + + + + + +
@@ -359,6 +378,10 @@
+ +
+
+
Usage Analytics
+
+
+
Loading...
+
+
@@ -767,6 +808,13 @@
Group thinking and tool calls into one collapsed activity section per assistant turn.
+
+ +
Self-hosted users can disable for transparency (not recommended for shared instances).
+