Skip to content

Commit 9c5100e

Browse files
lsteinclaude
andcommitted
feat(encoders): platform-aware default encoder for new albums
OpenCLIP ViT-L-14 is impractically slow to index/search on CPU-only Linux/Windows hosts. New albums on those hosts now default to the lightweight OpenAI CLIP ViT-B/32 instead, while CUDA hosts and macOS (untested for the lighter path) keep the high-quality ViT-L-14 default. - encoders.py: add CPU_FALLBACK_ENCODER_SPEC + default_encoder_spec() resolver (CUDA/macOS -> ViT-L-14, CPU Linux/Windows -> ViT-B/32) - config.py: Album.encoder_spec uses default_factory=default_encoder_spec - routers/album.py: GET /default_encoder/ exposes the host-resolved default - album-manager.js: new-album dropdown pre-selects the server default (cached fetch, falls back to recommended option on failure) Existing albums keep their stored encoder_spec; only the default for newly created albums changes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent cf60669 commit 9c5100e

6 files changed

Lines changed: 120 additions & 11 deletions

File tree

photomap/backend/config.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from platformdirs import user_config_dir
1616
from pydantic import BaseModel, Field, field_validator, model_validator
1717

18-
from .encoders import DEFAULT_ENCODER_SPEC, LEGACY_ENCODER_SPEC
18+
from .encoders import LEGACY_ENCODER_SPEC, default_encoder_spec
1919
from .util import atomic_write_text
2020

2121
logger = logging.getLogger(__name__)
@@ -33,10 +33,12 @@ class Album(BaseModel):
3333
umap_eps: float = Field(default=0.2, description="UMAP epsilon parameter")
3434
description: str = Field(default="", description="Album description")
3535
encoder_spec: str = Field(
36-
default=DEFAULT_ENCODER_SPEC,
36+
# Resolved per-host: OpenCLIP ViT-L-14 on CUDA/macOS, lighter OpenAI CLIP
37+
# ViT-B/32 on CPU-only Linux/Windows. See encoders.default_encoder_spec.
38+
default_factory=default_encoder_spec,
3739
description=(
3840
"Image/text encoder spec. Format: '<backend>:<model>'. "
39-
"Examples: 'openai-clip:ViT-B/32' (default, legacy), "
41+
"Examples: 'openai-clip:ViT-B/32' (legacy, CPU default), "
4042
"'open-clip:ViT-L-14/dfn2b', 'siglip:google/siglip2-large-patch16-256'. "
4143
"Changing this requires re-indexing the album."
4244
),
@@ -124,8 +126,9 @@ def from_dict(cls, key: str, data: dict[str, Any]) -> "Album":
124126
description=data.get("description", ""),
125127
# Legacy YAML albums predate the encoder_spec field; their indexes
126128
# were built with the original CLIP, so fall back to that to stay
127-
# cache-compatible. New albums get DEFAULT_ENCODER_SPEC via the
128-
# Album field default when the frontend creates them.
129+
# cache-compatible. New albums get the host-resolved default
130+
# (encoders.default_encoder_spec) via the Album field default when
131+
# the frontend creates them.
129132
encoder_spec=data.get("encoder_spec", LEGACY_ENCODER_SPEC),
130133
min_search_score=data.get("min_search_score"),
131134
max_search_results=data.get("max_search_results", 100),

photomap/backend/encoders.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import logging
1616
import math
17+
import sys
1718
import threading
1819
import time
1920
from abc import ABC, abstractmethod
@@ -39,6 +40,29 @@
3940
# a compatibility marker, not a tunable.
4041
LEGACY_ENCODER_SPEC = "openai-clip:ViT-B/32"
4142

43+
# Default encoder for *new* albums on Linux/Windows hosts without CUDA. The
44+
# OpenCLIP ViT-L-14 DEFAULT_ENCODER_SPEC is impractically slow to index/search
45+
# on CPU there, so new albums fall back to the much lighter OpenAI CLIP
46+
# ViT-B/32 (weaker recall, far faster). This happens to be the same spec string
47+
# as LEGACY_ENCODER_SPEC, but it's a distinct constant on purpose: this one is
48+
# a tunable CPU default, not the frozen legacy-cache compatibility marker.
49+
CPU_FALLBACK_ENCODER_SPEC = "openai-clip:ViT-B/32"
50+
51+
52+
def default_encoder_spec() -> str:
53+
"""Resolve the default encoder spec for *new* albums based on the host.
54+
55+
Hosts with CUDA, and macOS (left on the high-quality default since the
56+
lighter CPU path is untested there), get ``DEFAULT_ENCODER_SPEC``. Linux and
57+
Windows hosts without CUDA fall back to ``CPU_FALLBACK_ENCODER_SPEC`` because
58+
OpenCLIP ViT-L-14 is far too slow to run on CPU on those platforms.
59+
"""
60+
if torch.cuda.is_available():
61+
return DEFAULT_ENCODER_SPEC
62+
if sys.platform == "darwin":
63+
return DEFAULT_ENCODER_SPEC
64+
return CPU_FALLBACK_ENCODER_SPEC
65+
4266
# When True, SigLIP's encode_text wraps each query in every entry of
4367
# SIGLIP_PROMPT_TEMPLATES, encodes them all, L2-normalizes each per-template
4468
# embedding, mean-pools across templates, and re-normalizes. Intended to make

photomap/backend/routers/album.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from ..config import Album, create_album, get_config_manager
1111
from ..embeddings import Embeddings
12+
from ..encoders import default_encoder_spec
1213

1314

1415
class UmapEpsSetRequest(BaseModel):
@@ -201,6 +202,17 @@ async def get_available_albums() -> list[dict[str, Any]]:
201202
return []
202203

203204

205+
@album_router.get("/default_encoder/", tags=["Albums"])
206+
async def get_default_encoder() -> dict[str, str]:
207+
"""Return the encoder spec new albums should default to on this host.
208+
209+
The default is platform-aware — CPU-only Linux/Windows hosts get a lighter
210+
encoder than CUDA/macOS hosts — so the frontend asks the server for it
211+
rather than hardcoding a single default in the dropdown.
212+
"""
213+
return {"encoder_spec": default_encoder_spec()}
214+
215+
204216
@album_router.get("/album/{album_key}/", tags=["Albums"])
205217
async def get_album(album: AlbumDep) -> Album:
206218
"""Get details of a specific album."""

photomap/frontend/static/javascript/album-manager.js

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import { fetchJson, hideSpinner, showSpinner } from "./utils.js";
88

99
// Encoder backends offered in the album manager dropdown. Values must match
1010
// the spec format consumed by photomap.backend.encoders.build_encoder.
11-
// The first entry is the default selection for new albums and must stay in
12-
// sync with backend ``DEFAULT_ENCODER_SPEC`` (encoders.py).
11+
// The pre-selected default for new albums is fetched from the server
12+
// (getServerDefaultEncoderSpec) since it's platform-aware; ENCODER_OPTIONS[0]
13+
// is only the client-side fallback when that request fails.
1314
export const ENCODER_OPTIONS = [
1415
{
1516
value: "open-clip:ViT-L-14/dfn2b_s39b",
@@ -26,6 +27,21 @@ export const ENCODER_OPTIONS = [
2627
];
2728
const DEFAULT_ENCODER_SPEC = ENCODER_OPTIONS[0].value;
2829

30+
// The default encoder for *new* albums is resolved by the server: it's
31+
// platform-aware (CPU-only Linux/Windows hosts get a lighter encoder than
32+
// CUDA/macOS hosts), so we can't hardcode ENCODER_OPTIONS[0] here. Fetch it
33+
// once, cache the promise, and fall back to the recommended option if the
34+
// request fails.
35+
let serverDefaultEncoderPromise = null;
36+
function getServerDefaultEncoderSpec() {
37+
if (!serverDefaultEncoderPromise) {
38+
serverDefaultEncoderPromise = fetchJson("default_encoder/")
39+
.then((data) => data?.encoder_spec || DEFAULT_ENCODER_SPEC)
40+
.catch(() => DEFAULT_ENCODER_SPEC);
41+
}
42+
return serverDefaultEncoderPromise;
43+
}
44+
2945
function populateEncoderSelect(selectEl, currentValue) {
3046
if (!selectEl) {
3147
return;
@@ -273,8 +289,8 @@ export class AlbumManager {
273289
this.elements.newAlbumPathsContainer.innerHTML = "";
274290
}
275291

276-
// Reset encoder dropdown to the default
277-
populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC);
292+
// Reset encoder dropdown to the host-resolved default
293+
getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec));
278294
}
279295

280296
// Form management
@@ -286,8 +302,8 @@ export class AlbumManager {
286302
// Initialize path fields for the add album form
287303
this.initializeNewAlbumPathFields();
288304

289-
// Initialize encoder dropdown
290-
populateEncoderSelect(this.elements.newAlbumEncoder, DEFAULT_ENCODER_SPEC);
305+
// Initialize encoder dropdown to the host-resolved default
306+
getServerDefaultEncoderSpec().then((spec) => populateEncoderSelect(this.elements.newAlbumEncoder, spec));
291307

292308
// Focus on the first input field
293309
this.elements.newAlbumKey.focus();

tests/backend/test_albums.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,31 @@ def test_album_routes(client):
169169
assert len(albums) == 0
170170

171171

172+
def test_default_encoder_endpoint(client, monkeypatch):
173+
"""/default_encoder/ reports the host-resolved default the frontend
174+
pre-selects in the new-album dropdown."""
175+
from photomap.backend.encoders import (
176+
CPU_FALLBACK_ENCODER_SPEC,
177+
DEFAULT_ENCODER_SPEC,
178+
)
179+
180+
monkeypatch.setattr(
181+
"photomap.backend.routers.album.default_encoder_spec",
182+
lambda: CPU_FALLBACK_ENCODER_SPEC,
183+
)
184+
assert client.get("/default_encoder/").json() == {
185+
"encoder_spec": CPU_FALLBACK_ENCODER_SPEC
186+
}
187+
188+
monkeypatch.setattr(
189+
"photomap.backend.routers.album.default_encoder_spec",
190+
lambda: DEFAULT_ENCODER_SPEC,
191+
)
192+
assert client.get("/default_encoder/").json() == {
193+
"encoder_spec": DEFAULT_ENCODER_SPEC
194+
}
195+
196+
172197
def test_encoder_spec_round_trips_through_available_albums(client, tmp_path):
173198
"""Regression: /available_albums/ used to strip encoder_spec, which
174199
caused the album-manager edit form to always show the default encoder

tests/backend/test_encoders.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from photomap.backend import encoders as encoders_module
1616
from photomap.backend.encoders import (
17+
CPU_FALLBACK_ENCODER_SPEC,
1718
DEFAULT_ENCODER_SPEC,
1819
LEGACY_ENCODER_SPEC,
1920
EmbeddingCacheMismatch,
@@ -23,6 +24,7 @@
2324
SiglipEncoder,
2425
build_encoder,
2526
clear_encoder_cache,
27+
default_encoder_spec,
2628
get_cached_encoder,
2729
)
2830

@@ -32,6 +34,33 @@ def test_default_spec_for_new_albums():
3234
assert DEFAULT_ENCODER_SPEC == "open-clip:ViT-L-14/dfn2b_s39b"
3335

3436

37+
def test_cpu_fallback_spec_is_light_clip():
38+
"""CPU-only Linux/Windows hosts default new albums to the light CLIP."""
39+
assert CPU_FALLBACK_ENCODER_SPEC == "openai-clip:ViT-B/32"
40+
41+
42+
def test_default_encoder_spec_cuda_uses_high_quality(monkeypatch):
43+
"""Any host with CUDA gets the high-quality default regardless of OS."""
44+
monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: True)
45+
monkeypatch.setattr(encoders_module.sys, "platform", "win32")
46+
assert default_encoder_spec() == DEFAULT_ENCODER_SPEC
47+
48+
49+
def test_default_encoder_spec_macos_uses_high_quality(monkeypatch):
50+
"""macOS stays on the high-quality default even without CUDA (untested path)."""
51+
monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False)
52+
monkeypatch.setattr(encoders_module.sys, "platform", "darwin")
53+
assert default_encoder_spec() == DEFAULT_ENCODER_SPEC
54+
55+
56+
@pytest.mark.parametrize("platform", ["linux", "win32"])
57+
def test_default_encoder_spec_cpu_linux_windows_falls_back(monkeypatch, platform):
58+
"""CPU-only Linux/Windows hosts fall back to the lighter encoder."""
59+
monkeypatch.setattr(encoders_module.torch.cuda, "is_available", lambda: False)
60+
monkeypatch.setattr(encoders_module.sys, "platform", platform)
61+
assert default_encoder_spec() == CPU_FALLBACK_ENCODER_SPEC
62+
63+
3564
def test_legacy_spec_unchanged():
3665
"""LEGACY_ENCODER_SPEC is a compatibility marker for caches that predate
3766
the encoder swap layer. Don't change this — it's pinned to the original

0 commit comments

Comments
 (0)