Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ FROM chef AS planner
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN cargo chef prepare --recipe-path recipe.json
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

Expand Down Expand Up @@ -48,8 +48,8 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

FROM builder AS http-builder

Expand Down
10 changes: 5 additions & 5 deletions Dockerfile-cuda
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ WORKDIR /usr/src
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN cargo chef prepare --recipe-path recipe.json
RUN cargo chef prepare --recipe-path recipe.json

FROM base-builder AS builder

Expand Down Expand Up @@ -83,8 +83,8 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

FROM builder AS http-builder

Expand Down
127 changes: 69 additions & 58 deletions Dockerfile-cuda-all
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
FROM nvidia/cuda:12.9.1-devel-ubuntu24.04 AS base-builder

ENV SCCACHE=0.10.0
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
ENV PATH="/root/.cargo/bin:${PATH}"
# aligned with `cargo-chef` version in `lukemathwalker/cargo-chef:latest-rust-1.92-bookworm`
ENV CARGO_CHEF=0.1.73
Expand All @@ -12,10 +10,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
pkg-config \
&& rm -rf /var/lib/apt/lists/*

# Donwload and configure sccache
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
chmod +x /usr/local/bin/sccache

COPY rust-toolchain.toml rust-toolchain.toml
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN cargo install cargo-chef --version $CARGO_CHEF --locked
Expand All @@ -27,19 +21,16 @@ WORKDIR /usr/src
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN cargo chef prepare --recipe-path recipe.json
RUN cargo chef prepare --recipe-path recipe.json

FROM base-builder AS builder
FROM base-builder AS builder-base

ARG GIT_SHA
ARG DOCKER_LABEL

# sccache specific variables
ARG SCCACHE_GHA_ENABLED

# Limit parallelism
ARG RAYON_NUM_THREADS=4
ARG CARGO_BUILD_JOBS
Expand All @@ -49,65 +40,83 @@ WORKDIR /usr/src

COPY --from=planner /usr/src/recipe.json recipe.json

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
cargo chef cook --release --recipe-path recipe.json && sccache -s;
FROM builder-base AS builder-75

RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --features dynamic-linking --no-default-features --recipe-path recipe.json

COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F dynamic-linking -F http --no-default-features && \
mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75

FROM builder-base AS builder-80

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s;
RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \
mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
FROM builder-base AS builder-89

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;
RUN CUDA_COMPUTE_CAP=89 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json

COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s;
RUN CUDA_COMPUTE_CAP=89 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \
mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-89

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75
FROM builder-base AS builder-90

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \
mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90

FROM builder-base AS builder-100

RUN CUDA_COMPUTE_CAP=100 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90
RUN CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \
mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=100 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
FROM builder-base AS builder-120

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-100
RUN CUDA_COMPUTE_CAP=120 cargo chef cook --release --features candle-cuda --features dynamic-linking --no-default-features --recipe-path recipe.json

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120
RUN CUDA_COMPUTE_CAP=120 cargo build --release --bin text-embeddings-router -F candle-cuda -F dynamic-linking -F http --no-default-features && \
mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-120

FROM nvidia/cuda:12.9.1-runtime-ubuntu24.04 AS base

Expand All @@ -123,13 +132,15 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
libssl-dev \
curl \
cuda-compat-12-9 \
libcublas-12-9 \
&& rm -rf /var/lib/apt/lists/*

COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
COPY --from=builder /usr/src/target/release/text-embeddings-router-100 /usr/local/bin/text-embeddings-router-100
COPY --from=builder /usr/src/target/release/text-embeddings-router-120 /usr/local/bin/text-embeddings-router-120
COPY --from=builder-75 /usr/src/target/release/text-embeddings-router-75 /usr/local/bin/text-embeddings-router-75
COPY --from=builder-80 /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
COPY --from=builder-89 /usr/src/target/release/text-embeddings-router-89 /usr/local/bin/text-embeddings-router-89
COPY --from=builder-90 /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90
COPY --from=builder-100 /usr/src/target/release/text-embeddings-router-100 /usr/local/bin/text-embeddings-router-100
COPY --from=builder-120 /usr/src/target/release/text-embeddings-router-120 /usr/local/bin/text-embeddings-router-120

COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh

Expand Down
10 changes: 5 additions & 5 deletions Dockerfile-intel
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ FROM chef AS planner
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN cargo chef prepare --recipe-path recipe.json
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

Expand All @@ -36,8 +36,8 @@ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./
COPY Cargo.toml Cargo.toml
COPY Cargo.lock Cargo.lock

RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
Expand Down
4 changes: 3 additions & 1 deletion cuda-all-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | se

if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 "$@"
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 89 ]; then
exec text-embeddings-router-80 "$@"
elif [ ${compute_cap} -eq 89 ]; then
exec text-embeddings-router-89 "$@"
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 "$@"
elif [ ${compute_cap} -eq 100 ]; then
Expand Down
Loading