diff --git a/api/config.py b/api/config.py
index 2ebabc5f..6da83dab 100644
--- a/api/config.py
+++ b/api/config.py
@@ -1585,6 +1585,7 @@ def set_hermes_default_model(model_id: str) -> dict:
 # ── TTL cache for get_available_models() ─────────────────────────────────────
 _available_models_cache: dict | None = None
 _available_models_cache_ts: float = 0.0
+_available_models_cache_source_fingerprint: dict | None = None
 _AVAILABLE_MODELS_CACHE_TTL: float = 86400.0  # 24 hours
 _available_models_cache_lock = threading.RLock()  # must be RLock: cold path refactoring moved slow work inside this lock, requiring re-entry
 _cache_build_cv = threading.Condition(_available_models_cache_lock)  # shares underlying RLock so notify_all() is safe inside with _available_models_cache_lock
@@ -1641,12 +1642,48 @@ def _current_webui_version() -> str | None:
 # guarantees that even if a future release accidentally reuses the same
 # WebUI version string (or a debug build doesn't have a version), a structural
 # change still invalidates the cache.
-_MODELS_CACHE_SCHEMA_VERSION = 2
+_MODELS_CACHE_SCHEMA_VERSION = 3
 
 
 _models_cache_path = STATE_DIR / "models_cache.json"
 
 
+def _get_auth_store_path() -> Path:
+    """Return the auth.json path for the active Hermes profile."""
+    try:
+        from api.profiles import get_active_hermes_home as _gah
+
+        return _gah() / "auth.json"
+    except ImportError:
+        return HOME / ".hermes" / "auth.json"
+
+
+def _models_cache_file_fingerprint(path: Path) -> dict:
+    """Return non-secret identity metadata for a cache dependency file.
+
+    The /api/models response depends on config.yaml (model/provider defaults)
+    and auth.json (active_provider + credential_pool).  The cache only needs
+    cheap invalidation signals here, not file contents; never include secrets.
+    """
+    fingerprint = {"path": str(Path(path).expanduser())}
+    try:
+        st = Path(path).stat()
+    except OSError:
+        fingerprint["missing"] = True
+        return fingerprint
+    fingerprint["mtime_ns"] = st.st_mtime_ns
+    fingerprint["size"] = st.st_size
+    return fingerprint
+
+
+def _models_cache_source_fingerprint() -> dict:
+    """Return the current config/auth-store fingerprint for /api/models cache."""
+    return {
+        "config_yaml": _models_cache_file_fingerprint(_get_config_path()),
+        "auth_json": _models_cache_file_fingerprint(_get_auth_store_path()),
+    }
+
+
 def _delete_models_cache_on_disk() -> None:
     try:
         os.unlink(str(_models_cache_path))
@@ -1717,6 +1754,15 @@ def _is_loadable_disk_cache(cache: object) -> bool:
                 cached_version, runtime_version,
             )
             return False
+    cached_sources = cache.get("_source_fingerprint")
+    runtime_sources = _models_cache_source_fingerprint()
+    if cached_sources != runtime_sources:
+        logger.debug(
+            "models cache rejected: source_fingerprint=%r vs runtime=%r",
+            cached_sources,
+            runtime_sources,
+        )
+        return False
     return True
 
 
@@ -1772,6 +1818,7 @@ def _save_models_cache_to_disk(cache: dict) -> None:
             return
         payload = {
             "_schema_version": _MODELS_CACHE_SCHEMA_VERSION,
+            "_source_fingerprint": _models_cache_source_fingerprint(),
             "active_provider": cache["active_provider"],
             "default_model": cache["default_model"],
             "configured_model_badges": cache["configured_model_badges"],
@@ -1790,15 +1837,27 @@ def _save_models_cache_to_disk(cache: dict) -> None:
 
 def _get_fresh_memory_models_cache(now: float) -> dict | None:
     """Return a valid fresh in-memory /api/models cache, or clear stale shapes."""
-    global _available_models_cache, _available_models_cache_ts
+    global _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint
     if _available_models_cache is None:
         return None
     if (now - _available_models_cache_ts) >= _AVAILABLE_MODELS_CACHE_TTL:
         return None
+    current_sources = _models_cache_source_fingerprint()
+    if _available_models_cache_source_fingerprint != current_sources:
+        logger.debug(
+            "models memory cache rejected: source_fingerprint=%r vs runtime=%r",
+            _available_models_cache_source_fingerprint,
+            current_sources,
+        )
+        _available_models_cache = None
+        _available_models_cache_ts = 0.0
+        _available_models_cache_source_fingerprint = None
+        return None
     if _is_valid_models_cache(_available_models_cache):
         return copy.deepcopy(_available_models_cache)
     _available_models_cache = None
     _available_models_cache_ts = 0.0
+    _available_models_cache_source_fingerprint = None
     return None
 
 
@@ -1816,10 +1875,11 @@ def invalidate_models_cache():
     result from the disk cache because the disk hit is checked before the memory
     cache rebuild runs.
     """
-    global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _cache_build_cv
+    global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint, _cache_build_cv
     with _available_models_cache_lock:
         _available_models_cache = None
         _available_models_cache_ts = 0.0
+        _available_models_cache_source_fingerprint = None
         _cache_build_in_progress = False
         _cache_build_cv.notify_all()
         # Clear the credential pool cache too. The cache key is provider_id
@@ -1856,10 +1916,11 @@ def invalidate_provider_models_cache(provider_id: str):
     Args:
         provider_id: canonical provider id (e.g. 'openai', 'anthropic', 'custom:my-key')
     """
-    global _available_models_cache, _available_models_cache_ts, _CREDENTIAL_POOL_CACHE
+    global _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint, _CREDENTIAL_POOL_CACHE
     with _available_models_cache_lock:
         _available_models_cache = None
         _available_models_cache_ts = 0.0
+        _available_models_cache_source_fingerprint = None
         _provider_models_invalidated_ts[provider_id] = time.time()
         # Also evict the credential pool so the next cold path re-loads it.
         # Must evict both the original key and its canonical form (load_pool
@@ -1918,7 +1979,7 @@ def get_available_models() -> dict:
         'groups': [{'provider': str, 'models': [{'id': str, 'label': str}]}]
     }
     """
-    global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _cache_build_cv
+    global _cache_build_in_progress, _available_models_cache, _available_models_cache_ts, _available_models_cache_source_fingerprint, _cache_build_cv
     # Config mtime check — must come before any config reads.
     # (Test #585 verifies _current_mtime appears before active_provider = None)
     try:
@@ -2053,12 +2114,7 @@ def get_available_models() -> dict:
 
         # 2. Read auth store (active_provider fallback + credential_pool inspection)
         auth_store = {}
-        try:
-            from api.profiles import get_active_hermes_home as _gah
-
-            auth_store_path = _gah() / "auth.json"
-        except ImportError:
-            auth_store_path = HOME / ".hermes" / "auth.json"
+        auth_store_path = _get_auth_store_path()
         if auth_store_path.exists():
             try:
                 import json as _j
@@ -2939,6 +2995,7 @@ def get_available_models() -> dict:
             reload_config()
             _available_models_cache = None
             _available_models_cache_ts = 0.0
+            _available_models_cache_source_fingerprint = None
             disk_groups = None
 
         # Serve from memory cache if fresh
@@ -2951,6 +3008,7 @@ def get_available_models() -> dict:
         if disk_groups is not None:
             _available_models_cache = disk_groups
             _available_models_cache_ts = now
+            _available_models_cache_source_fingerprint = _models_cache_source_fingerprint()
             _save_models_cache_to_disk(disk_groups)
             return copy.deepcopy(disk_groups)
 
@@ -2968,6 +3026,7 @@ def get_available_models() -> dict:
         with _cache_build_cv:
             _available_models_cache = result
             _available_models_cache_ts = time.monotonic()
+            _available_models_cache_source_fingerprint = _models_cache_source_fingerprint()
             _cache_build_in_progress = False
             _cache_build_cv.notify_all()
         _save_models_cache_to_disk(result)
diff --git a/docs/pr-media/1699/model-cache-auth-store-refresh.png b/docs/pr-media/1699/model-cache-auth-store-refresh.png
new file mode 100644
index 00000000..beb552f6
Binary files /dev/null and b/docs/pr-media/1699/model-cache-auth-store-refresh.png differ
diff --git a/tests/test_issue1633_models_cache_version_stamp.py b/tests/test_issue1633_models_cache_version_stamp.py
index b06c0294..772ed127 100644
--- a/tests/test_issue1633_models_cache_version_stamp.py
+++ b/tests/test_issue1633_models_cache_version_stamp.py
@@ -214,6 +214,7 @@ def test_load_skips_version_check_when_runtime_unknown(isolated_cache, monkeypat
     # Write a cache that's correct except has no _webui_version
     cache = {
         "_schema_version": config._MODELS_CACHE_SCHEMA_VERSION,
+        "_source_fingerprint": config._models_cache_source_fingerprint(),
         # no _webui_version
         **_shape_cache(),
     }
@@ -268,6 +269,7 @@ def test_is_loadable_disk_cache_checks_versions(with_runtime_version):
     good = {
         "_schema_version": config._MODELS_CACHE_SCHEMA_VERSION,
         "_webui_version": "v0.50.293",
+        "_source_fingerprint": config._models_cache_source_fingerprint(),
         **_shape_cache(),
     }
     assert config._is_loadable_disk_cache(good) is True
diff --git a/tests/test_issue1699_model_cache_source_fingerprint.py b/tests/test_issue1699_model_cache_source_fingerprint.py
new file mode 100644
index 00000000..034b62fb
--- /dev/null
+++ b/tests/test_issue1699_model_cache_source_fingerprint.py
@@ -0,0 +1,138 @@
+"""Regression tests for #1699: /api/models cache must track external auth/config changes.
+
+The bug: WebUI caches /api/models for 24h in memory and on disk. When a user
+runs `hermes setup` in a terminal and the Hermes auth store switches the active
+provider outside WebUI, the browser can keep seeing the previous provider's
+PRIMARY badge until the cache is manually cleared or expires.
+"""
+
+import json
+import time
+
+import api.config as config
+
+
+def _reset_memory_cache() -> None:
+    with config._available_models_cache_lock:
+        config._available_models_cache = None
+        config._available_models_cache_ts = 0.0
+        if hasattr(config, "_available_models_cache_source_fingerprint"):
+            config._available_models_cache_source_fingerprint = None
+        config._cache_build_in_progress = False
+        config._cache_build_cv.notify_all()
+
+
+def _valid_models_cache(provider_id: str, model_id: str) -> dict:
+    return {
+        "active_provider": provider_id,
+        "default_model": model_id,
+        "configured_model_badges": {
+            model_id: {"role": "primary", "label": "Primary", "provider": provider_id}
+        },
+        "groups": [
+            {
+                "provider": config._PROVIDER_DISPLAY.get(provider_id, provider_id.title()),
+                "provider_id": provider_id,
+                "models": [{"id": model_id, "label": model_id}],
+            }
+        ],
+    }
+
+
+def _write_auth_store(hermes_home, provider_id: str) -> None:
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(
+        json.dumps({"active_provider": provider_id, "credential_pool": {}}),
+        encoding="utf-8",
+    )
+
+
+def _configure_isolated_sources(tmp_path, monkeypatch, provider_id: str) -> None:
+    hermes_home = tmp_path / "hermes-home"
+    state_dir = tmp_path / "state"
+    cache_path = state_dir / "models_cache.json"
+    state_dir.mkdir(parents=True, exist_ok=True)
+
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    config_path = hermes_home / "config.yaml"
+    # Leave model.provider unset so get_available_models() must honor the auth
+    # store's active_provider fallback, matching CLI setup/auth-store drift.
+    config_path.write_text("model:\n  default: glm-5.1\n", encoding="utf-8")
+    monkeypatch.setenv("HERMES_CONFIG_PATH", str(config_path))
+
+    import api.profiles as profiles
+
+    monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: hermes_home)
+    monkeypatch.setattr(config, "_models_cache_path", cache_path)
+
+    # Keep the test hermetic: do not let real host credentials/providers leak
+    # into provider detection while exercising the auth-store active_provider path.
+    import hermes_cli.auth as hermes_auth
+    import hermes_cli.models as hermes_models
+
+    monkeypatch.setattr(hermes_models, "list_available_providers", lambda: [])
+    monkeypatch.setattr(
+        hermes_auth,
+        "get_auth_status",
+        lambda provider_id: {"logged_in": False, "key_source": ""},
+    )
+
+    _write_auth_store(hermes_home, provider_id)
+    config.reload_config()
+    _reset_memory_cache()
+
+
+def test_memory_models_cache_invalidates_when_auth_store_active_provider_changes(
+    tmp_path, monkeypatch
+):
+    _configure_isolated_sources(tmp_path, monkeypatch, "opencode-go")
+
+    stale_openrouter = _valid_models_cache("openrouter", "minimax-m2.7")
+    with config._available_models_cache_lock:
+        config._available_models_cache = stale_openrouter
+        config._available_models_cache_ts = time.monotonic()
+        if hasattr(config, "_available_models_cache_source_fingerprint"):
+            # Simulate a cache populated before the external CLI auth-store write.
+            config._available_models_cache_source_fingerprint = {
+                "auth_json": {"path": "old-auth.json", "mtime_ns": 1, "size": 10},
+                "config_yaml": {"path": "old-config.yaml", "mtime_ns": 1, "size": 10},
+            }
+
+    result = config.get_available_models()
+
+    assert result["active_provider"] == "opencode-go"
+    assert not any(group.get("provider_id") == "openrouter" for group in result["groups"])
+    assert any(group.get("provider_id") == "opencode-go" for group in result["groups"])
+
+
+def test_disk_models_cache_invalidates_when_auth_store_active_provider_changes(
+    tmp_path, monkeypatch
+):
+    _configure_isolated_sources(tmp_path, monkeypatch, "openrouter")
+    stale_openrouter = _valid_models_cache("openrouter", "minimax-m2.7")
+    config._save_models_cache_to_disk(stale_openrouter)
+    assert config._models_cache_path.exists()
+
+    # External terminal `hermes setup` changes auth.json, not WebUI's in-process cache.
+    hermes_home = config._models_cache_path.parent.parent / "hermes-home"
+    _write_auth_store(hermes_home, "opencode-go")
+    _reset_memory_cache()
+
+    result = config.get_available_models()
+
+    assert result["active_provider"] == "opencode-go"
+    assert not any(group.get("provider_id") == "openrouter" for group in result["groups"])
+    assert any(group.get("provider_id") == "opencode-go" for group in result["groups"])
+
+
+def test_disk_models_cache_still_loads_when_auth_and_config_sources_are_unchanged(
+    tmp_path, monkeypatch
+):
+    _configure_isolated_sources(tmp_path, monkeypatch, "opencode-go")
+    fresh_opencode = _valid_models_cache("opencode-go", "glm-5.1")
+    config._save_models_cache_to_disk(fresh_opencode)
+    _reset_memory_cache()
+
+    result = config.get_available_models()
+
+    assert result == fresh_opencode