From 00617d8fb051edf0b3dd3b1cb74d2067ca830c79 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 11 Mar 2026 19:37:04 +0100 Subject: [PATCH 1/5] Refactor `Dockerfile-cuda-all` into compute-cap stages --- Dockerfile-cuda-all | 83 ++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 68a609012..44e8f6d43 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -30,9 +30,9 @@ COPY router router COPY Cargo.toml ./ COPY Cargo.lock ./ -RUN cargo chef prepare --recipe-path recipe.json +RUN cargo chef prepare --recipe-path recipe.json -FROM base-builder AS builder +FROM base-builder AS builder-base ARG GIT_SHA ARG DOCKER_LABEL @@ -48,66 +48,81 @@ ARG CARGO_BUILD_INCREMENTAL WORKDIR /usr/src COPY --from=planner /usr/src/recipe.json recipe.json +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml ./ +COPY Cargo.lock ./ -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - cargo chef cook --release --recipe-path recipe.json && sccache -s; +FROM builder-base AS builder-75 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; +RUN nvprune --generate-code code=sm_75 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; + CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features static-linking --no-default-features --recipe-path recipe.json && sccache -s RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; + CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 && \ + sccache -s + +FROM builder-base AS builder-80 + +RUN nvprune --generate-code code=sm_80 --generate-code code=sm_80 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; + CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; + CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 && \ + sccache -s -COPY backends backends -COPY core core -COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +FROM builder-base AS builder-90 + +RUN nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; - -RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 + CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; + CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 && \ + sccache -s -RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 +FROM builder-base AS builder-100 + +RUN nvprune --generate-code code=sm_100 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; - -RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 + CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; + CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100 && \ + sccache -s + +FROM builder-base AS builder-120 -RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100 +RUN nvprune --generate-code code=sm_120 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; + CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s -RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120 +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120 && \ + sccache -s FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base @@ -125,11 +140,11 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins cuda-compat-12-9 \ && rm -rf /var/lib/apt/lists/* -COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 -COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 -COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 -COPY --from=builder /usr/src/target/release/text-embeddings-router-100 /usr/local/bin/text-embeddings-router-100 -COPY --from=builder /usr/src/target/release/text-embeddings-router-120 /usr/local/bin/text-embeddings-router-120 +COPY --from=builder-75 /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 +COPY --from=builder-80 /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 +COPY --from=builder-90 /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 +COPY --from=builder-100 /usr/src/target/release/text-embeddings-router-100 /usr/local/bin/text-embeddings-router-100 +COPY --from=builder-120 /usr/src/target/release/text-embeddings-router-120 /usr/local/bin/text-embeddings-router-120 COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh From eff2ee83b35ee01eaca1d4951b98e931940e01b7 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Wed, 11 Mar 2026 19:48:50 +0100 Subject: [PATCH 2/5] Update `COPY` to be explicit --- Dockerfile | 10 +++++----- Dockerfile-cuda | 10 +++++----- Dockerfile-cuda-all | 9 +++++---- Dockerfile-intel | 10 +++++----- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8a5832a25..c176fa77f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,10 +13,10 @@ FROM chef AS planner COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock -RUN cargo chef prepare --recipe-path recipe.json +RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder @@ -48,8 +48,8 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock FROM builder AS http-builder diff --git a/Dockerfile-cuda b/Dockerfile-cuda index 489f0f444..198eea285 100644 --- a/Dockerfile-cuda +++ b/Dockerfile-cuda @@ -27,10 +27,10 @@ WORKDIR /usr/src COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock -RUN cargo chef prepare --recipe-path recipe.json +RUN cargo chef prepare --recipe-path recipe.json FROM base-builder AS builder @@ -83,8 +83,8 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock FROM builder AS http-builder diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 44e8f6d43..32acc0db1 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -27,8 +27,8 @@ WORKDIR /usr/src COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock RUN cargo chef prepare --recipe-path recipe.json @@ -48,11 +48,12 @@ ARG CARGO_BUILD_INCREMENTAL WORKDIR /usr/src COPY --from=planner /usr/src/recipe.json recipe.json + COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock FROM builder-base AS builder-75 diff --git a/Dockerfile-intel b/Dockerfile-intel index ad5675729..3bbb9a469 100644 --- a/Dockerfile-intel +++ b/Dockerfile-intel @@ -14,10 +14,10 @@ FROM chef AS planner COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock -RUN cargo chef prepare --recipe-path recipe.json +RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder @@ -36,8 +36,8 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ COPY backends backends COPY core core COPY router router -COPY Cargo.toml ./ -COPY Cargo.lock ./ +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ From b9ea648aab8eaa3e6eae8489f3b9e0696fb42f33 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Thu, 12 Mar 2026 11:20:52 +0000 Subject: [PATCH 3/5] Put `COPY` within each builder stage --- Dockerfile-cuda-all | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 32acc0db1..7dc4be2e2 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -49,12 +49,6 @@ WORKDIR /usr/src COPY --from=planner /usr/src/recipe.json recipe.json -COPY backends backends -COPY core core -COPY router router -COPY Cargo.toml Cargo.toml -COPY Cargo.lock Cargo.lock - FROM builder-base AS builder-75 RUN nvprune --generate-code code=sm_75 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a @@ -63,6 +57,12 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features static-linking --no-default-features --recipe-path recipe.json && sccache -s +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F http --no-default-features && \ @@ -71,12 +71,18 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ FROM builder-base AS builder-80 -RUN nvprune --generate-code code=sm_80 --generate-code code=sm_80 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a +RUN nvprune --generate-code code=sm_80 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ @@ -91,6 +97,12 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ @@ -105,6 +117,12 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ @@ -119,6 +137,12 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock + RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ From b66a05646feb0a0c6a6678f0812b9d81bbaec42c Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:46:22 +0000 Subject: [PATCH 4/5] Back to `dynamic-linking` + add 8.9 target (temporarily) --- Dockerfile-cuda-all | 50 +++++++++++++++++++++++++----------------- cuda-all-entrypoint.sh | 4 +++- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 7dc4be2e2..34ecfebeb 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -51,11 +51,9 @@ COPY --from=planner /usr/src/recipe.json recipe.json FROM builder-base AS builder-75 -RUN nvprune --generate-code code=sm_75 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a - RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features static-linking --no-default-features --recipe-path recipe.json && sccache -s + CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core @@ -65,17 +63,15 @@ COPY Cargo.lock Cargo.lock RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F http --no-default-features && \ + CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F dynamic-linking -F http --no-default-features && \ mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 && \ sccache -s FROM builder-base AS builder-80 -RUN nvprune --generate-code code=sm_80 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a - RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s + CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core @@ -85,17 +81,33 @@ COPY Cargo.lock Cargo.lock RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 && \ sccache -s -FROM builder-base AS builder-90 +FROM builder-base AS builder-89 -RUN nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + CUDA_COMPUTE_CAP=89 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s + +COPY backends backends +COPY core core +COPY router router +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s + CUDA_COMPUTE_CAP=89 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-89 && \ + sccache -s + +FROM builder-base AS builder-90 + +RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ + --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ + CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core @@ -105,17 +117,15 @@ COPY Cargo.lock Cargo.lock RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 && \ sccache -s FROM builder-base AS builder-100 -RUN nvprune --generate-code code=sm_100 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a - RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s + CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core @@ -125,17 +135,15 @@ COPY Cargo.lock Cargo.lock RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100 && \ sccache -s FROM builder-base AS builder-120 -RUN nvprune --generate-code code=sm_120 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a - RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s + CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s COPY backends backends COPY core core @@ -145,7 +153,7 @@ COPY Cargo.lock Cargo.lock RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && \ + CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120 && \ sccache -s @@ -163,10 +171,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins libssl-dev \ curl \ cuda-compat-12-9 \ + libcublas-12-9 \ && rm -rf /var/lib/apt/lists/* COPY --from=builder-75 /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75 COPY --from=builder-80 /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 +COPY --from=builder-89 /usr/src/target/release/text-embeddings-router-89 /usr/local/bin/text-embeddings-router-89 COPY --from=builder-90 /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 COPY --from=builder-100 /usr/src/target/release/text-embeddings-router-100 /usr/local/bin/text-embeddings-router-100 COPY --from=builder-120 /usr/src/target/release/text-embeddings-router-120 /usr/local/bin/text-embeddings-router-120 diff --git a/cuda-all-entrypoint.sh b/cuda-all-entrypoint.sh index cbd5e926e..da5e1ae3d 100644 --- a/cuda-all-entrypoint.sh +++ b/cuda-all-entrypoint.sh @@ -30,8 +30,10 @@ compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | se if [ ${compute_cap} -eq 75 ]; then exec text-embeddings-router-75 "$@" -elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then +elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 89 ]; then exec text-embeddings-router-80 "$@" +elif [ ${compute_cap} -eq 89 ]; then + exec text-embeddings-router-89 "$@" elif [ ${compute_cap} -eq 90 ]; then exec text-embeddings-router-90 "$@" elif [ ${compute_cap} -eq 100 ]; then From 61e8ceefb03df60484032005cb52451e89e4e116 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Thu, 12 Mar 2026 21:09:24 +0100 Subject: [PATCH 5/5] Skip `sccache` temporarily to debug issue --- Dockerfile-cuda-all | 75 +++++++++++---------------------------------- 1 file changed, 18 insertions(+), 57 deletions(-) diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 34ecfebeb..5f2054360 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -1,7 +1,5 @@ FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS base-builder -ENV SCCACHE=0.10.0 -ENV RUSTC_WRAPPER=/usr/local/bin/sccache ENV PATH="/root/.cargo/bin:${PATH}" # aligned with `cargo-chef` version in `lukemathwalker/cargo-chef:latest-rust-1.92-bookworm` ENV CARGO_CHEF=0.1.73 @@ -12,10 +10,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins pkg-config \ && rm -rf /var/lib/apt/lists/* -# Donwload and configure sccache -RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ - chmod +x /usr/local/bin/sccache - COPY rust-toolchain.toml rust-toolchain.toml RUN curl https://sh.rustup.rs -sSf | bash -s -- -y RUN cargo install cargo-chef --version $CARGO_CHEF --locked @@ -37,9 +31,6 @@ FROM base-builder AS builder-base ARG GIT_SHA ARG DOCKER_LABEL -# sccache specific variables -ARG SCCACHE_GHA_ENABLED - # Limit parallelism ARG RAYON_NUM_THREADS=4 ARG CARGO_BUILD_JOBS @@ -51,9 +42,7 @@ COPY --from=planner /usr/src/recipe.json recipe.json FROM builder-base AS builder-75 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s +RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features dynamic-linking --no-default-features --recipe-path recipe.json COPY backends backends COPY core core @@ -61,17 +50,12 @@ COPY router router COPY Cargo.toml Cargo.toml COPY Cargo.lock Cargo.lock -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F dynamic-linking -F http --no-default-features && \ - mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 && \ - sccache -s +RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 FROM builder-base AS builder-80 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s +RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json COPY backends backends COPY core core @@ -79,17 +63,12 @@ COPY router router COPY Cargo.toml Cargo.toml COPY Cargo.lock Cargo.lock -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ - mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 && \ - sccache -s +RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 FROM builder-base AS builder-89 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=89 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s +RUN CUDA_COMPUTE_CAP=89 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json COPY backends backends COPY core core @@ -97,17 +76,12 @@ COPY router router COPY Cargo.toml Cargo.toml COPY Cargo.lock Cargo.lock -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=89 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ - mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-89 && \ - sccache -s +RUN CUDA_COMPUTE_CAP=89 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-89 FROM builder-base AS builder-90 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s +RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json COPY backends backends COPY core core @@ -115,17 +89,12 @@ COPY router router COPY Cargo.toml Cargo.toml COPY Cargo.lock Cargo.lock -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ - mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 && \ - sccache -s +RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 FROM builder-base AS builder-100 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s +RUN CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json COPY backends backends COPY core core @@ -133,17 +102,12 @@ COPY router router COPY Cargo.toml Cargo.toml COPY Cargo.lock Cargo.lock -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ - mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100 && \ - sccache -s +RUN CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100 FROM builder-base AS builder-120 -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json && sccache -s +RUN CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json COPY backends backends COPY core core @@ -151,11 +115,8 @@ COPY router router COPY Cargo.toml Cargo.toml COPY Cargo.lock Cargo.lock -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ - mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120 && \ - sccache -s +RUN CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \ + mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120 FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base