ATLAS/docker-compose.vulkan.yml at main · itigges22/ATLAS · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# ATLAS docker-compose override for the Vulkan universal backend (#114).
#
# Bring up the stack with both files:
#   docker compose -f docker-compose.yml -f docker-compose.vulkan.yml up -d
#
# Only llama-server differs between backends (it's the only GPU-using
# service). geometric-lens, v3-service, sandbox, and atlas-proxy are
# CPU-only and unchanged.
#
# Vulkan is the safety-net backend — slower than native CUDA/ROCm/Metal
# but one image covers ALL of:
#   * AMD GPUs via Mesa RADV ICD (no rocm install needed)
#   * Intel Arc / Xe iGPU via Mesa ANV ICD
#   * NVIDIA GPUs via nvidia-container-toolkit (Vulkan ICD ships with
#     the host's NVIDIA driver, mounted via the toolkit when
#     NVIDIA_DRIVER_CAPABILITIES includes `graphics`)
#   * Snapdragon Adreno via the user-space driver in the host
#   * Apple Silicon via MoltenVK + qemu (the slow path; native macOS
#     install is the fast path — separate ticket #32)
#   * Pure CPU via Mesa lavapipe software rasterizer (very slow but
#     functional fallback for hardware nothing else supports)
#
# Hardware prereqs on the host:
#   * For AMD/Intel: /dev/dri/* render nodes accessible to docker
#   * For NVIDIA: nvidia-container-toolkit installed (same as the
#     default CUDA stack — `--gpus all` works here too)
#   * For Snapdragon: vendor driver in user space, /dev/dri devices
#   * For CPU lavapipe: nothing extra; the lavapipe ICD ships in
#     mesa-vulkan-drivers inside the image
#
# Verify with: docker compose -f docker-compose.yml -f docker-compose.vulkan.yml \
#                exec llama-server vulkaninfo --summary

services:
  llama-server:
    # Pull / build the Vulkan-built image, not CUDA/ROCm.
    image: ghcr.io/${ATLAS_GHCR_OWNER:-itigges22}/atlas-llama-vulkan:${ATLAS_IMAGE_TAG:-latest}
    build:
      context: ./inference
      dockerfile: Dockerfile.vulkan
      args:
        # 24.04 default: ubuntu 22.04's vulkan-headers (1.3.204) is too
        # old for current llama.cpp Vulkan code which uses 1.3.246+ APIs.
        # See inference/Dockerfile.vulkan ARG UBUNTU_TAG comment for the
        # full rationale. Override via ATLAS_UBUNTU_TAG=22.04 if needed.
        UBUNTU_TAG: ${ATLAS_UBUNTU_TAG:-24.04}
    # Wipe the NVIDIA driver request from the base file when targeting
    # AMD / Intel / Snapdragon / CPU. NVIDIA users should leave the
    # base resource block in place (override via docker-compose.override.yml
    # or just use docker-compose.yml's CUDA image). Same `!reset` pattern
    # the ROCm overlay uses for the same reason.
    # See: https://docs.docker.com/compose/compose-file/13-merge/#reset-value
    deploy:
      resources:
        reservations:
          devices: !reset []
    devices:
      # /dev/dri — DRM render nodes used by Mesa RADV (AMD), Mesa ANV
      # (Intel), and Adreno (Snapdragon). Harmless on hosts without a
      # GPU — the lavapipe CPU ICD doesn't need it and Vulkan just
      # picks lavapipe instead. NVIDIA hosts also need this when using
      # the NVIDIA Vulkan ICD via the toolkit; harmless if the toolkit
      # already mounts in libGLX_nvidia separately.
      - /dev/dri
    # Group membership for /dev/dri access. `video` + `render` are
    # standard on Linux distros (already required for the ROCm overlay).
    # On hosts where these don't exist, llama-server will fail to enumerate
    # GPU ICDs and Vulkan will silently fall back to lavapipe (CPU).
    group_add:
      - video
      - render
    environment:
      # Take the vulkan entrypoint branch even when .env predates this
      # backend (older `atlas init` runs).
      - ATLAS_BACKEND=vulkan
      - ATLAS_GPU_INDEX=${ATLAS_GPU_INDEX:-0}
      # Optional MESA_VK_DEVICE_SELECT pass-through for cases where
      # GGML_VK_VISIBLE_DEVICES isn't granular enough (multiple Intel
      # Arc cards, mixed iGPU+dGPU systems where you want the dGPU).
      # Format: "vendorID:deviceID" hex, or a device name substring.
      - ATLAS_VK_DEVICE_SELECT=${ATLAS_VK_DEVICE_SELECT:-}
      # NVIDIA users — if you're routing through the NVIDIA Vulkan ICD,
      # uncomment in your docker-compose.override.yml to tell the
      # toolkit to expose the graphics capability (the Vulkan ICD lives
      # under `graphics`, not `compute`):
      #   NVIDIA_DRIVER_CAPABILITIES: graphics,compute,utility

  # geometric-lens, v3-service, sandbox, atlas-proxy: no override
  # needed — all CPU-only services. The override merge leaves them alone.