feat(encoders): platform-aware default encoder for new albums

lstein · claude · lstein · commit 9c5100e90cc7 · 2026-06-07T13:13:22.000-04:00
OpenCLIP ViT-L-14 is impractically slow to index/search on CPU-only
Linux/Windows hosts. New albums on those hosts now default to the
lightweight OpenAI CLIP ViT-B/32 instead, while CUDA hosts and macOS
(untested for the lighter path) keep the high-quality ViT-L-14 default.

- encoders.py: add CPU_FALLBACK_ENCODER_SPEC + default_encoder_spec()
  resolver (CUDA/macOS -&gt; ViT-L-14, CPU Linux/Windows -&gt; ViT-B/32)
- config.py: Album.encoder_spec uses default_factory=default_encoder_spec
- routers/album.py: GET /default_encoder/ exposes the host-resolved default
- album-manager.js: new-album dropdown pre-selects the server default
  (cached fetch, falls back to recommended option on failure)

Existing albums keep their stored encoder_spec; only the default for
newly created albums changes.

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/photomap/backend/config.py b/photomap/backend/config.py
@@ -15,7 +15,7 @@
 from platformdirs import user_config_dir
 from pydantic import BaseModel, Field, field_validator, model_validator
 
-from .encoders import DEFAULT_ENCODER_SPEC, LEGACY_ENCODER_SPEC
+from .encoders import LEGACY_ENCODER_SPEC, default_encoder_spec
 from .util import atomic_write_text
 
 logger = logging.getLogger(__name__)
@@ -33,10 +33,12 @@ class Album(BaseModel):
     umap_eps: float = Field(default=0.2, description="UMAP epsilon parameter")
     description: str = Field(default="", description="Album description")
     encoder_spec: str = Field(
-        default=DEFAULT_ENCODER_SPEC,
+        # Resolved per-host: OpenCLIP ViT-L-14 on CUDA/macOS, lighter OpenAI CLIP
+        # ViT-B/32 on CPU-only Linux/Windows. See encoders.default_encoder_spec.
+        default_factory=default_encoder_spec,
         description=(
             "Image/text encoder spec. Format: '<backend>:<model>'. "
-            "Examples: 'openai-clip:ViT-B/32' (default, legacy), "
+            "Examples: 'openai-clip:ViT-B/32' (legacy, CPU default), "
             "'open-clip:ViT-L-14/dfn2b', 'siglip:google/siglip2-large-patch16-256'. "
             "Changing this requires re-indexing the album."
         ),
@@ -124,8 +126,9 @@ def from_dict(cls, key: str, data: dict[str, Any]) -> "Album":
             description=data.get("description", ""),
             # Legacy YAML albums predate the encoder_spec field; their indexes
             # were built with the original CLIP, so fall back to that to stay
-            # cache-compatible. New albums get DEFAULT_ENCODER_SPEC via the
-            # Album field default when the frontend creates them.
+            # cache-compatible. New albums get the host-resolved default
+            # (encoders.default_encoder_spec) via the Album field default when
+            # the frontend creates them.
             encoder_spec=data.get("encoder_spec", LEGACY_ENCODER_SPEC),
             min_search_score=data.get("min_search_score"),
             max_search_results=data.get("max_search_results", 100),
diff --git a/photomap/backend/encoders.py b/photomap/backend/encoders.py
@@ -14,6 +14,7 @@
 
 import logging
 import math
+import sys
 import threading
 import time
 from abc import ABC, abstractmethod
@@ -39,6 +40,29 @@
 # a compatibility marker, not a tunable.
 LEGACY_ENCODER_SPEC = "openai-clip:ViT-B/32"
 
+# Default encoder for *new* albums on Linux/Windows hosts without CUDA. The
+# OpenCLIP ViT-L-14 DEFAULT_ENCODER_SPEC is impractically slow to index/search
+# on CPU there, so new albums fall back to the much lighter OpenAI CLIP
+# ViT-B/32 (weaker recall, far faster). This happens to be the same spec string
+# as LEGACY_ENCODER_SPEC, but it's a distinct constant on purpose: this one is
+# a tunable CPU default, not the frozen legacy-cache compatibility marker.
+CPU_FALLBACK_ENCODER_SPEC = "openai-clip:ViT-B/32"
+
+
+def default_encoder_spec() -> str:
+    """Resolve the default encoder spec for *new* albums based on the host.
+
+    Hosts with CUDA, and macOS (left on the high-quality default since the
+    lighter CPU path is untested there), get ``DEFAULT_ENCODER_SPEC``. Linux and
+    Windows hosts without CUDA fall back to ``CPU_FALLBACK_ENCODER_SPEC`` because
+    OpenCLIP ViT-L-14 is far too slow to run on CPU on those platforms.
+    """
+    if torch.cuda.is_available():
+        return DEFAULT_ENCODER_SPEC
+    if sys.platform == "darwin":
+        return DEFAULT_ENCODER_SPEC
+    return CPU_FALLBACK_ENCODER_SPEC
+
 # When True, SigLIP's encode_text wraps each query in every entry of
 # SIGLIP_PROMPT_TEMPLATES, encodes them all, L2-normalizes each per-template
 # embedding, mean-pools across templates, and re-normalizes. Intended to make
diff --git a/photomap/backend/routers/album.py b/photomap/backend/routers/album.py
@@ -9,6 +9,7 @@
 
 from ..config import Album, create_album, get_config_manager
 from ..embeddings import Embeddings
+from ..encoders import default_encoder_spec
 
 
 class UmapEpsSetRequest(BaseModel):
@@ -201,6 +202,17 @@ async def get_available_albums() -> list[dict[str, Any]]:
         return []
 
 
+@album_router.get("/default_encoder/", tags=["Albums"])
+async def get_default_encoder() -> dict[str, str]:
+    """Return the encoder spec new albums should default to on this host.
+
+    The default is platform-aware — CPU-only Linux/Windows hosts get a lighter
+    encoder than CUDA/macOS hosts — so the frontend asks the server for it
+    rather than hardcoding a single default in the dropdown.
+    """
+    return {"encoder_spec": default_encoder_spec()}
+
+
 @album_router.get("/album/{album_key}/", tags=["Albums"])
 async def get_album(album: AlbumDep) -> Album:
     """Get details of a specific album."""
diff --git a/photomap/frontend/static/javascript/album-manager.js b/photomap/frontend/static/javascript/album-manager.js
@@ -8,8 +8,9 @@ import { fetchJson, hideSpinner, showSpinner } from "./utils.js";
 
 // Encoder backends offered in the album manager dropdown. Values must match
 // the spec format consumed by photomap.backend.encoders.build_encoder.
-// The first entry is the default selection for new albums and must stay in
-// sync with backend ``DEFAULT_ENCODER_SPEC`` (encoders.py).
+// The pre-selected default for new albums is fetched from the server
+// (getServerDefaultEncoderSpec) since it's platform-aware; ENCODER_OPTIONS[0]
+// is only the client-side fallback when that request fails.
 export const ENCODER_OPTIONS = [
   {
     value: "open-clip:ViT-L-14/dfn2b_s39b",
@@ -26,6 +27,21 @@ export const ENCODER_OPTIONS = [
 ];
 const DEFAULT_ENCODER_SPEC = ENCODER_OPTIONS[0].value;
 
+// The default encoder for *new* albums is resolved by the server: it's
+// platform-aware (CPU-only Linux/Windows hosts get a lighter encoder than
+// CUDA/macOS hosts), so we can't hardcode ENCODER_OPTIONS[0] here. Fetch it
+// once, cache the promise, and fall back to the recommended option if the
+// request fails.
+let serverDefaultEncoderPromise = null;
+function getServerDefaultEncoderSpec() {
+  if (!serverDefaultEncoderPromise) {
+    serverDefaultEncoderPromise = fetchJson("default_encoder/")
+      .then((data) => data?.encoder_spec || DEFAULT_ENCODER_SPEC)
+      .catch(() => DEFAULT_ENCODER_SPEC);
+  }
+  return serverDefaultEncoderPromise;
+}
+
 function populateEncoderSelect(selectEl, currentValue) {
   if (!selectEl) {
     return;
@@ -273,8 +289,8 @@ export class AlbumManager {
       this.elements.newAlbumPathsContainer.innerHTML = "";
     }
 
-    // Reset encoder dropdown to the default
-    populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC);
+    // Reset encoder dropdown to the host-resolved default
+    getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec));
   }
 
   // Form management
@@ -286,8 +302,8 @@ export class AlbumManager {
     // Initialize path fields for the add album form
     this.initializeNewAlbumPathFields();
 
-    // Initialize encoder dropdown
-    populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC);
+    // Initialize encoder dropdown to the host-resolved default
+    getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec));
 
     // Focus on the first input field
     this.elements.newAlbumKey.focus();
diff --git a/tests/backend/test_albums.py b/tests/backend/test_albums.py
@@ -169,6 +169,31 @@ def test_album_routes(client):
     assert len(albums) == 0
 
 
+def test_default_encoder_endpoint(client, monkeypatch):
+    """/default_encoder/ reports the host-resolved default the frontend
+    pre-selects in the new-album dropdown."""
+    from photomap.backend.encoders import (
+        CPU_FALLBACK_ENCODER_SPEC,
+        DEFAULT_ENCODER_SPEC,
+    )
+
+    monkeypatch.setattr(
+        "photomap.backend.routers.album.default_encoder_spec",
+        lambda: CPU_FALLBACK_ENCODER_SPEC,
+    )
+    assert client.get("/default_encoder/").json() == {
+        "encoder_spec": CPU_FALLBACK_ENCODER_SPEC
+    }
+
+    monkeypatch.setattr(
+        "photomap.backend.routers.album.default_encoder_spec",
+        lambda: DEFAULT_ENCODER_SPEC,
+    )
+    assert client.get("/default_encoder/").json() == {
+        "encoder_spec": DEFAULT_ENCODER_SPEC
+    }
+
+
 def test_encoder_spec_round_trips_through_available_albums(client, tmp_path):
     """Regression: /available_albums/ used to strip encoder_spec, which
     caused the album-manager edit form to always show the default encoder
diff --git a/tests/backend/test_encoders.py b/tests/backend/test_encoders.py
@@ -14,6 +14,7 @@
 
 from photomap.backend import encoders as encoders_module
 from photomap.backend.encoders import (
+    CPU_FALLBACK_ENCODER_SPEC,
     DEFAULT_ENCODER_SPEC,
     LEGACY_ENCODER_SPEC,
     EmbeddingCacheMismatch,
@@ -23,6 +24,7 @@
     SiglipEncoder,
     build_encoder,
     clear_encoder_cache,
+    default_encoder_spec,
     get_cached_encoder,
 )
 
@@ -32,6 +34,33 @@ def test_default_spec_for_new_albums():
     assert DEFAULT_ENCODER_SPEC == "open-clip:ViT-L-14/dfn2b_s39b"
 
 
+def test_cpu_fallback_spec_is_light_clip():
+    """CPU-only Linux/Windows hosts default new albums to the light CLIP."""
+    assert CPU_FALLBACK_ENCODER_SPEC == "openai-clip:ViT-B/32"
+
+
+def test_default_encoder_spec_cuda_uses_high_quality(monkeypatch):
+    """Any host with CUDA gets the high-quality default regardless of OS."""
+    monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: True)
+    monkeypatch.setattr(encoders_module.sys, "platform", "win32")
+    assert default_encoder_spec() == DEFAULT_ENCODER_SPEC
+
+
+def test_default_encoder_spec_macos_uses_high_quality(monkeypatch):
+    """macOS stays on the high-quality default even without CUDA (untested path)."""
+    monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False)
+    monkeypatch.setattr(encoders_module.sys, "platform", "darwin")
+    assert default_encoder_spec() == DEFAULT_ENCODER_SPEC
+
+
+@pytest.mark.parametrize("platform", ["linux", "win32"])
+def test_default_encoder_spec_cpu_linux_windows_falls_back(monkeypatch, platform):
+    """CPU-only Linux/Windows hosts fall back to the lighter encoder."""
+    monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False)
+    monkeypatch.setattr(encoders_module.sys, "platform", platform)
+    assert default_encoder_spec() == CPU_FALLBACK_ENCODER_SPEC
+
+
 def test_legacy_spec_unchanged():
     """LEGACY_ENCODER_SPEC is a compatibility marker for caches that predate
     the encoder swap layer. Don't change this — it's pinned to the original