diff --git a/docker/Dockerfile b/docker/Dockerfile index fe574edd0..db2b8d2ad 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,12 +29,12 @@ ARG UBUNTU_MIRROR ARG GITHUB_ARTIFACTORY=github.com ARG FLASHINFER_VERSION=0.5.3 -# ktransformers wheel version (cu128torch28 for CUDA 12.8 + PyTorch 2.8) -ARG KTRANSFORMERS_VERSION=0.5.3 -ARG KTRANSFORMERS_WHEEL=ktransformers-0.5.3+cu128torch28fancy-cp312-cp312-linux_x86_64.whl +# ktransformers is installed from the cloned source tree (see below); no +# prebuilt wheel is downloaded because the release assets are not published. -# flash_attn wheel for fine-tune env -ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.8cxx11abiTRUE-cp312-cp312-linux_x86_64.whl +# flash_attn wheel for fine-tune env (torch 2.9 build to match the torch pin below +# and the torch==2.9.1 requirement of kt-kernel pulled in by ktransformers[sft]) +ARG FLASH_ATTN_WHEEL=flash_attn-2.8.3+cu12torch2.9cxx11abiTRUE-cp312-cp312-linux_x86_64.whl ENV DEBIAN_FRONTEND=noninteractive \ CUDA_HOME=/usr/local/cuda \ @@ -198,8 +198,6 @@ ARG HOPPER_SBO ARG HOPPER_SBO_DEEPEP_COMMIT ARG DEEPEP_COMMIT ARG GITHUB_ARTIFACTORY -ARG KTRANSFORMERS_VERSION -ARG KTRANSFORMERS_WHEEL ARG FLASH_ATTN_WHEEL ARG FUNCTIONALITY=sft @@ -223,11 +221,11 @@ RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.g git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \ fi -# Download ktransformers wheel and flash_attn wheel for fine-tune env (sft mode only) +# Download flash_attn wheel for fine-tune env (sft mode only). +# ktransformers itself is installed from the cloned source tree (see below); +# the prebuilt release wheel is no longer published as a release asset. RUN if [ "$FUNCTIONALITY" = "sft" ]; then \ - curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \ - https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \ - && curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \ + curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \ https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}; \ fi @@ -342,9 +340,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \ 12.9.1) CUINDEX=129 ;; \ 13.0.1) CUINDEX=130 ;; \ esac \ - && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel hatchling \ + && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel hatchling editables \ && /opt/miniconda3/envs/fine-tune/bin/pip install \ - torch==2.8.0 \ + torch==2.9.1 \ torchvision \ torchaudio \ --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}; \ @@ -357,10 +355,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \ && /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation; \ fi -# Install ktransformers wheel in fine-tune env +# Build and install the local kt-kernel into the fine-tune env (same as the serve +# env), then install ktransformers (with sft extras) from the cloned source. Building +# kt-kernel from source first means the locally checked-out C++ kernels are used, and +# pip does not fall back to a PyPI kt-kernel (which may not exist for an unreleased version). RUN --mount=type=cache,target=/root/.cache/pip \ if [ "$FUNCTIONALITY" = "sft" ]; then \ - /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}; \ + . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune \ + && cd /workspace/ktransformers/kt-kernel \ + && CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build \ + && pip install "/workspace/ktransformers[sft]"; \ fi # Install flash_attn wheel in fine-tune env @@ -385,7 +389,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ # Clean up downloaded wheels RUN if [ "$FUNCTIONALITY" = "sft" ]; then \ - rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}; \ + rm -f /workspace/${FLASH_ATTN_WHEEL}; \ fi # Initialize conda for bash