baidu · marcelMaier · Jun 26, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,26 @@
+.git/
+.github/
+.venv/
+venv/
+env/
+ENV/
+.cache/
+__pycache__/
+*.py[cod]
+*.log
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.ipynb_checkpoints/
+outputs/
+log/
+assets/
+*.pdf
+build/
+dist/
+*.egg-info/
+.eggs/
+.DS_Store
+Thumbs.db
+.vscode/
+.idea/
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,121 @@
+name: Docker
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - "v*.*.*"
+    paths:
+      - ".github/workflows/docker.yml"
+      - ".dockerignore"
+      - "Dockerfile"
+      - "docker-compose.yml"
+      - "infer.py"
+      - "requirements-sglang.txt"
+      - "wheel/**"
+  pull_request:
+    paths:
+      - ".github/workflows/docker.yml"
+      - ".dockerignore"
+      - "Dockerfile"
+      - "docker-compose.yml"
+      - "infer.py"
+      - "requirements-sglang.txt"
+      - "wheel/**"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  packages: write
+
+concurrency:
+  group: docker-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: Build and publish Docker image
+    runs-on: ubuntu-latest
+    env:
+      DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
+      DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v5
+
+      - name: Set image names
+        id: images
+        shell: bash
+        run: |
+          echo "ghcr=ghcr.io/${GITHUB_REPOSITORY,,}" >> "${GITHUB_OUTPUT}"
+          if [ -n "${DOCKERHUB_USERNAME}" ]; then
+            echo "dockerhub=${DOCKERHUB_USERNAME}/unlimited-ocr" >> "${GITHUB_OUTPUT}"
+          fi
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        if: ${{ github.event_name != 'pull_request' }}
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract GHCR metadata
+        id: meta-ghcr
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ steps.images.outputs.ghcr }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=ref,event=tag
+            type=sha,prefix=sha-
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build and optionally push GHCR image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta-ghcr.outputs.tags }}
+          labels: ${{ steps.meta-ghcr.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+      - name: Log in to Docker Hub
+        if: ${{ github.event_name != 'pull_request' && env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}
+        uses: docker/login-action@v3
+        with:
+          username: ${{ env.DOCKERHUB_USERNAME }}
+          password: ${{ env.DOCKERHUB_TOKEN }}
+
+      - name: Extract Docker Hub metadata
+        if: ${{ github.event_name != 'pull_request' && env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}
+        id: meta-dockerhub
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ steps.images.outputs.dockerhub }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=tag
+            type=sha,prefix=sha-
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build and push Docker Hub image
+        if: ${{ github.event_name != 'pull_request' && env.DOCKERHUB_USERNAME != '' && env.DOCKERHUB_TOKEN != '' }}
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta-dockerhub.outputs.tags }}
+          labels: ${{ steps.meta-dockerhub.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ __pycache__/
 venv/
 env/
 ENV/
+.cache/
 
 # Build and packaging
 build/

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,88 @@
+# syntax=docker/dockerfile:1.7
+
+ARG CUDA_IMAGE=nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+ARG CUDA_RUNTIME_IMAGE=nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04
+FROM ${CUDA_IMAGE} AS build
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+ENV PATH=/opt/venv/bin:$PATH \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONUNBUFFERED=1
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        curl \
+        git \
+        libgl1 \
+        libglib2.0-0 \
+        python3.12 \
+        python3.12-dev \
+        python3.12-venv \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY requirements-sglang.txt ./
+COPY wheel/ ./wheel/
+RUN python3.12 -m venv /opt/venv \
+    && python -m pip install --upgrade pip setuptools wheel \
+    && mkdir /wheelhouse \
+    && python -m pip wheel --wheel-dir /wheelhouse -r requirements-sglang.txt
+
+FROM ${CUDA_RUNTIME_IMAGE} AS runtime
+
+ARG DEBIAN_FRONTEND=noninteractive
+ARG USER_ID=1000
+ARG GROUP_ID=1000
+
+ENV HF_HOME=/home/unlimited/.cache/huggingface \
+    PATH=/opt/venv/bin:$PATH \
+    PYTHONUNBUFFERED=1
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        git \
+        libgl1 \
+        libglib2.0-0 \
+        python3.12 \
+        python3.12-venv \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=build /wheelhouse /wheelhouse
+RUN python3.12 -m venv /opt/venv \
+    && python -m pip install --upgrade pip setuptools wheel \
+    && python -m pip install --no-index --find-links=/wheelhouse \
+        "sglang==0.0.0.dev11416+g92e8bb79e" \
+        "pymupdf==1.27.2.2" \
+    && rm -rf /wheelhouse
+
+WORKDIR /app
+
+COPY infer.py README.md LICENSE ./
+
+RUN groupadd --gid "${GROUP_ID}" unlimited \
+    && useradd --uid "${USER_ID}" --gid "${GROUP_ID}" --create-home --shell /bin/bash unlimited \
+    && mkdir -p /app/log /app/outputs "${HF_HOME}" \
+    && chown -R unlimited:unlimited /app /home/unlimited
+
+USER unlimited
+
+EXPOSE 10000
+VOLUME ["/data", "/app/outputs", "/app/log", "/home/unlimited/.cache/huggingface"]
+
+CMD ["python", "-m", "sglang.launch_server", \
+    "--model", "baidu/Unlimited-OCR", \
+    "--trust-remote-code", \
+    "--served-model-name", "Unlimited-OCR", \
+    "--attention-backend", "fa3", \
+    "--page-size", "1", \
+    "--mem-fraction-static", "0.8", \
+    "--context-length", "32768", \
+    "--enable-custom-logit-processor", \
+    "--disable-overlap-schedule", \
+    "--skip-server-warmup", \
+    "--host", "0.0.0.0", \
+    "--port", "10000"]
diff --git a/README.md b/README.md
@@ -127,20 +127,20 @@ model.infer_multi(
 ### SGLang
 
 Set up the environment (uv-managed virtualenv). Install the local SGLang wheel first,
-then pin `kernels==0.9.0` and install PyMuPDF for PDF-to-image conversion:
+then install PyMuPDF for PDF-to-image conversion:
 ```shell
 uv venv --python 3.12
 source .venv/bin/activate
 
 uv pip install wheel/sglang-0.0.0.dev11416+g92e8bb79e-py3-none-any.whl
-uv pip install kernels==0.11.7
 uv pip install pymupdf==1.27.2.2
 ```
 
 Start the SGLang server:
 ```shell
 python -m sglang.launch_server \
     --model baidu/Unlimited-OCR \
+    --trust-remote-code \
     --served-model-name Unlimited-OCR \
     --attention-backend fa3 \
     --page-size 1 \
@@ -258,7 +258,7 @@ python infer.py \
     --pdf ./examples/document.pdf \
     --output_dir ./outputs \
     --concurrency 8 \
-    --image_mode gundam
+    --image_mode base
 ```
 
 Useful options:
@@ -268,6 +268,73 @@ Useful options:
 --server_log ./log/sglang_server.log
 ```
 
+### Docker
+
+The Docker image starts the OpenAI-compatible SGLang API server by default.
+It requires an NVIDIA GPU, a compatible host driver, and the NVIDIA Container Toolkit.
+
+Build the image locally:
+```shell
+docker build -t unlimited-ocr:local .
+```
+
+Start the API server:
+```shell
+docker run --rm --gpus all --shm-size 16g \
+    -p 10000:10000 \
+    -v "$HOME/.cache/huggingface:/home/unlimited/.cache/huggingface" \
+    unlimited-ocr:local
+```
+
+The API is available at `http://127.0.0.1:10000` and accepts the same
+OpenAI-compatible requests shown in the SGLang example above.
+
+By default, SGLang uses one GPU. For multi-GPU inference, expose the GPUs to
+Docker and add `--tensor-parallel-size N` to the server command.
+
+You can also edit `docker-compose.yml` for local paths and run:
+```shell
+docker compose up
+```
+
+For batch inference, override the default command with `python infer.py`.
+
+Run OCR for an image directory:
+```shell
+docker run --rm --gpus all --shm-size 16g \
+    -v "$PWD/examples/images:/data/images:ro" \
+    -v "$PWD/outputs:/app/outputs" \
+    -v "$PWD/log:/app/log" \
+    -v "$HOME/.cache/huggingface:/home/unlimited/.cache/huggingface" \
+    unlimited-ocr:local \
+    python infer.py \
+    --image_dir /data/images \
+    --output_dir /app/outputs \
+    --concurrency 8 \
+    --image_mode gundam
+```
+
+Run OCR for a PDF:
+```shell
+docker run --rm --gpus all --shm-size 16g \
+    -v "$PWD/examples/document.pdf:/data/document.pdf:ro" \
+    -v "$PWD/outputs:/app/outputs" \
+    -v "$PWD/log:/app/log" \
+    -v "$HOME/.cache/huggingface:/home/unlimited/.cache/huggingface" \
+    unlimited-ocr:local \
+    python infer.py \
+    --pdf /data/document.pdf \
+    --output_dir /app/outputs \
+    --concurrency 8 \
+    --image_mode base
+```
+
+The included GitHub Actions workflow builds images for pull requests and publishes
+images on pushes to `main`, version tags, and manual runs:
+
+- GitHub Container Registry: publishes to `ghcr.io/<owner>/<repo>` using the built-in `GITHUB_TOKEN`.
+- Docker Hub: set repository secrets `DOCKERHUB_USERNAME` and `DOCKERHUB_TOKEN` to publish `DOCKERHUB_USERNAME/unlimited-ocr`.
+
 
 ## Visualization
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,24 @@
+services:
+  unlimited-ocr:
+    build:
+      context: .
+    image: unlimited-ocr:local
+    shm_size: 16gb
+    ports:
+      - "10000:10000"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities:
+                - gpu
+    environment:
+      NVIDIA_VISIBLE_DEVICES: all
+      NVIDIA_DRIVER_CAPABILITIES: compute,utility
+    volumes:
+      - ./data:/data:ro
+      - ./outputs:/app/outputs
+      - ./log:/app/log
+      - ./.cache/huggingface:/home/unlimited/.cache/huggingface
diff --git a/requirements-sglang.txt b/requirements-sglang.txt
@@ -0,0 +1,2 @@
+./wheel/sglang-0.0.0.dev11416+g92e8bb79e-py3-none-any.whl
+pymupdf==1.27.2.2
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,7 @@ __pycache__/ @@
     venv/
     env/
     ENV/
+    .cache/
     # Build and packaging
     build/
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		./wheel/sglang-0.0.0.dev11416+g92e8bb79e-py3-none-any.whl
		pymupdf==1.27.2.2