From 7028ca9106f6f3375ea6a358333ecd385c37fe52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=82=E6=B4=9B?= Date: Wed, 10 Jun 2026 11:58:47 +0800 Subject: [PATCH 1/4] feat(sdk): add ComposeJobConfig for multi-container docker-compose jobs Implement ComposeJobConfig + ComposeTrial to run multi-container topologies (main + sidecars + init containers) inside a single DinD sandbox, orchestrated by a generated runner.sh via the docker CLI. - config: ComposeSpec/MainContainerSpec/SidecarSpec/InitContainerSpec with resources(request/limit), command/args, privileged, secret_env, oss_deps, health probes, volume_mounts(host_path); from_yaml detects "compose" key - trial: 7-phase runner.sh (dockerd start with kata fixes -> oss deps -> init -> sidecars -> health probe -> main -> collect/cleanup); secrets rendered as shell var refs; /rock/scripts bind-mounted into inner containers - examples/job/compose: end-to-end harbor demo (verified against real backend up to harbor dataset download; only OSS creds missing) End-to-end verified on ROCK kata backend: sandbox -> dockerd -> proxy sidecar -> health ready -> main container -> harbor CLI running. Co-Authored-By: Claude Code AI-Model: claude-opus-4-8 AI-Contributed/Feature: 2477/2477 AI-Contributed/UT: 1233/1233 --- examples/job/compose/README.md | 128 ++ examples/job/compose/compose_demo.py | 90 ++ examples/job/compose/job_config.yaml.template | 108 ++ examples/job/compose/main.sh | 1085 +++++++++++++++++ .../job/compose/sidecars/proxy-sidecar.sh | 185 +++ rock/sdk/job/__init__.py | 4 + rock/sdk/job/compose/__init__.py | 5 + rock/sdk/job/compose/config.py | 289 +++++ rock/sdk/job/compose/trial.py | 568 +++++++++ rock/sdk/job/config.py | 15 +- tests/unit/sdk/job/test_compose_config.py | 623 ++++++++++ tests/unit/sdk/job/test_trial_compose.py | 610 +++++++++ 12 files changed, 3708 insertions(+), 2 deletions(-) create mode 100644 examples/job/compose/README.md create mode 100644 examples/job/compose/compose_demo.py create mode 100644 examples/job/compose/job_config.yaml.template create mode 100644 examples/job/compose/main.sh create mode 100644 examples/job/compose/sidecars/proxy-sidecar.sh create mode 100644 rock/sdk/job/compose/__init__.py create mode 100644 rock/sdk/job/compose/config.py create mode 100644 rock/sdk/job/compose/trial.py create mode 100644 tests/unit/sdk/job/test_compose_config.py create mode 100644 tests/unit/sdk/job/test_trial_compose.py diff --git a/examples/job/compose/README.md b/examples/job/compose/README.md new file mode 100644 index 0000000000..13947244b5 --- /dev/null +++ b/examples/job/compose/README.md @@ -0,0 +1,128 @@ +# ComposeJobConfig 端到端用例:harbor + cc-proxy + +本目录展示如何用 `ComposeJobConfig` 在 ROCK DinD 沙箱内运行 harbor 任务 +(claude-code agent 跑 terminal-bench / aone-bench-java100)。 + +## 目录结构 + +``` +examples/job/compose/ +├── compose_demo.py # 入口脚本(argparse + Job.run()) +├── job_config.yaml.template # ComposeJobConfig YAML 模板(含占位符) +├── main.sh # 主容器入口脚本(harbor runner,原 Agent-Hub/task/harbor/main.sh) +└── sidecars/ + └── proxy-sidecar.sh # cc-proxy sidecar 脚本(原 Agent-Hub/task/harbor/proxy-sidecar.sh) +``` + +## 运行方案:runner.sh 在外层沙箱主动启动 dockerd + +> 以下要点均经过 ROCK 真实后端(`xrl.alibaba-inc.com` / `vpc-sg-a`,kata runtime)端到端验证。 + +外层沙箱镜像**必须自带 `docker` / `dockerd` / `containerd` / `runc`**(例如 harbor runner 镜像)。 + +**重要**:不要用 `docker:27-dind` 作外层镜像 —— 实测该镜像变体在 ROCK kata 沙箱内 +缺少 `containerd`,dockerd 无法启动。请用一个预装完整 docker 工具链的业务镜像。 + +**dockerd 不会自动启动**:ROCK kata 沙箱进入时没有运行 dockerd。`ComposeTrial` +生成的 `runner.sh` 会在 P0 阶段主动启动它,并内置两个 kata 环境必需的修正: + +```bash +PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin \ +DOCKER_IGNORE_BR_NETFILTER_ERROR=1 \ + nohup dockerd >/var/log/dockerd.log 2>&1 & +``` + +- **显式 PATH**:`nohup` 启动的 dockerd 不继承交互 shell 的 PATH,否则报 + `containerd executable file not found`。 +- **`DOCKER_IGNORE_BR_NETFILTER_ERROR=1`**:kata guest 缺 + `/proc/sys/net/bridge/bridge-nf-call-iptables`,否则 bridge 网络初始化失败。 + +这些已固化在 `rock/sdk/job/compose/trial.py` 的 runner 模板里,用户无需关心。 + +### 三层 DinD 结构(注意事项) + +``` +ROCK 外层 kata 沙箱(业务镜像,自带 docker 工具链) +└── runner.sh(ComposeTrial 生成,P0 启动 dockerd,再用 docker CLI 编排) + ├── proxy sidecar 容器(cc-proxy,监听 8082,--network-alias proxy) + └── main 容器(harbor runner,privileged=true,挂载 /rock/scripts) + └── harbor CLI → 在内层再起 task env 容器(第三层) +``` + +**关键约束**: +- 外层沙箱须开启 `use_kata_runtime: true` +- main 容器须 `privileged: true`(harbor 内部起 docker 容器需要) +- runner.sh 自动把外层 `/rock/scripts` 以 `-v /rock/scripts:/rock/scripts:ro` + 挂载进 main / init / sidecar 容器,使各容器能执行上传的脚本 +- 主容器入口固定为 `bash /rock/scripts/main.sh`,因此**业务镜像须自带 `bash`** +- proxy 访问:sidecar 以 `--network-alias proxy` 注册,同一 compose network 内 + main 容器可用 `http://proxy:8082` 直达。注意:原始 Agent-Hub main.sh 用 + `docker network inspect bridge` 取 gateway IP 访问 proxy,这是 K8s 同 Pod 模型; + 在 compose 独立容器模型下若 proxy 不在 main 的 bridge 网络,需改用 `proxy:8082` + alias(见 main.sh 顶部注释) + +## 环境变量 + +运行前需导出以下环境变量(或写入 `.env` 后 `source .env`): + +```bash +# 模型 +export MODEL=claude-opus-4-8 +export MODEL_API_KEY= +export MODEL_BASE_URL= + +# ROCK 集群 +export ROCK_TOKEN= + +# OSS 凭证 +export OSS_ACCESS_KEY_ID= +export OSS_ACCESS_KEY_SECRET= +export OSS_REGION=cn-hangzhou +export OSS_ENDPOINT=oss-cn-hangzhou-internal.aliyuncs.com +export OSS_BUCKET= +``` + +## 使用步骤 + +### 1. 准备配置文件 + +```bash +cp examples/job/compose/job_config.yaml.template examples/job/compose/job_config.yaml +# 编辑 job_config.yaml,填入真实镜像名和占位符 +``` + +主要需要替换的占位符: + +| 占位符 | 说明 | +|--------|------| +| `` | ROCK 集群认证 token | +| `` | 模型名,e.g. `claude-opus-4-8` | +| `` | 模型 API Key | +| `` | 模型 Base URL | +| `` | 任务 ID,e.g. `mailman` | +| `` | harbor runner 镜像(含 harbor CLI + claude-code) | +| `` | claude-code proxy 镜像 | +| `` | OSS 凭证和 bucket 信息 | + +### 2. 运行 + +```bash +python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml +``` + +### 3. 查看结果 + +脚本会打印 `exit_code`、`score` 和各 trial 结果。 +产物(harbor_stdout.txt、result.json、metrics.json 等)通过 `oss_mirror` 上传到 OSS。 + +## proxy sidecar 端口 + +`proxy-sidecar.sh`(原 Agent-Hub/task/harbor/proxy-sidecar.sh)监听端口 **8082**。 +配置中 `sidecars[].health.port = 8082`,runner.sh 会在主容器启动前探测该端口就绪。 + +## 与 HarborJobConfig 的对比 + +本示例展示 ComposeJobConfig 的表达能力。实际上,对于"调 harbor CLI 跑 benchmark"的场景, +`HarborJobConfig` 更原生(自带 agents/datasets/verifier 结构化支持)。 + +ComposeJobConfig 更适合:自己掌控每个容器镜像和脚本、容器间是简单"主 + sidecar + init"拓扑。 diff --git a/examples/job/compose/compose_demo.py b/examples/job/compose/compose_demo.py new file mode 100644 index 0000000000..72449d971a --- /dev/null +++ b/examples/job/compose/compose_demo.py @@ -0,0 +1,90 @@ +"""ComposeJobConfig end-to-end demo using ROCK Job SDK. + +Runs a harbor task (claude-code agent on terminal-bench / aone-bench-java100) +using ``ComposeJobConfig`` — the multi-container compose variant of JobConfig. + +The outer DinD sandbox (docker:27-dind) provides the Docker daemon. +Inside, runner.sh orchestrates: + - main container → harbor runner (main.sh) + - proxy sidecar → claude-code proxy (port 8082) + +Usage: + python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml.template + +Required environment variables (forwarded into the sandbox via environment.env): + MODEL Model name, e.g. claude-opus-4-8 + MODEL_API_KEY API key for the model + MODEL_BASE_URL Base URL for the model API + ROCK_TOKEN ROCK cluster auth token (injected as XRL-Authorization header) + OSS_ACCESS_KEY_ID Alibaba Cloud OSS access key ID + OSS_ACCESS_KEY_SECRET Alibaba Cloud OSS access key secret + OSS_REGION OSS region, e.g. cn-hangzhou + OSS_ENDPOINT OSS endpoint, e.g. oss-cn-hangzhou-internal.aliyuncs.com + OSS_BUCKET OSS bucket name +""" + +import argparse +import asyncio +import logging +import os +import sys + +from rock.sdk.job import Job, JobConfig + +_REQUIRED_ENV_VARS = [ + "MODEL", + "MODEL_API_KEY", + "MODEL_BASE_URL", + "ROCK_TOKEN", + "OSS_ACCESS_KEY_ID", + "OSS_ACCESS_KEY_SECRET", + "OSS_REGION", + "OSS_ENDPOINT", + "OSS_BUCKET", +] + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) +# reduce httpx noise +logging.getLogger("httpx").setLevel(logging.WARNING) + + +def check_env() -> None: + missing = [v for v in _REQUIRED_ENV_VARS if not os.environ.get(v)] + if missing: + print("Missing required environment variables:") + for v in missing: + print(f" {v}") + print("\nSet them with `source .env` or export them manually.") + sys.exit(1) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run a ComposeJobConfig job (harbor + cc-proxy sidecar) inside a ROCK DinD sandbox" + ) + parser.add_argument("-c", "--config", required=True, help="Path to ComposeJobConfig YAML file") + return parser.parse_args() + + +async def async_main(args: argparse.Namespace) -> None: + config = JobConfig.from_yaml(args.config) + logger.info(f"Loaded config: {config.__class__.__name__}, job_name={config.job_name}") + + result = await Job(config).run() + + logger.info(f"result: {result}") + logger.info(f"Job completed: exit_code={result.exit_code}, score={result.score}") + if result.trial_results: + for trial in result.trial_results: + logger.info(f" {trial.task_name}: score={trial.score} ({trial.status})") + if trial.exception_info: + logger.info( + f" error: {trial.exception_info.exception_type}: {trial.exception_info.exception_message}" + ) + + +if __name__ == "__main__": + check_env() + args = parse_args() + asyncio.run(async_main(args)) diff --git a/examples/job/compose/job_config.yaml.template b/examples/job/compose/job_config.yaml.template new file mode 100644 index 0000000000..2f46e41197 --- /dev/null +++ b/examples/job/compose/job_config.yaml.template @@ -0,0 +1,108 @@ +# ComposeJobConfig YAML template — harbor task (claude-code / terminal-bench / aone-bench-java100) +# +# 方案: 主容器自带 dockerd(DinD 沙箱 image=docker:27-dind 天然提供守护进程) +# 内层编排: runner.sh 在外层沙箱内用 docker CLI 拉起 main + proxy sidecar +# +# 使用前: 将所有 <...> 占位符替换为真实值,或通过上层脚本批量渲染。 +# 用法: python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml + +# ── JobConfig 基础字段 ─────────────────────────────────────────────── +job_name: harbor-compose-demo +namespace: xrl-sandbox +# experiment_id 不填(ComposeJobConfig 无需 Harbor 的 experiment_id 字段) +timeout: 8000 # 整体超时(秒);harbor run 内部超时由 HARBOR_* env 控制 + +# ── 主容器入口(继承 BashJobConfig)───────────────────────────────── +script_path: ./main.sh # 对应 compose.main service;由 ComposeTrial 上传到 /rock/scripts/main.sh + +# ── DinD 外层沙箱(EnvironmentConfig)────────────────────────────── +# image 必须自带 docker/dockerd/containerd/runc 工具链(不要用 docker:27-dind, +# 实测它在 ROCK kata 沙箱里缺 containerd)。runner.sh 会在 P0 主动启动 dockerd。 +# 这里直接复用 harbor runner 镜像(自带完整 docker 工具链 + harbor CLI + bash)。 +environment: + image: "" # 自带 docker 工具链的业务镜像 + base_url: http://xrl.alibaba-inc.com # ROCK admin service 地址 + cluster: vpc-sg-a + extra_headers: + XRL-Authorization: "Bearer " + use_kata_runtime: true # 生产环境推荐开启 kata,强隔离 + startup_timeout: 1800 # 沙箱启动超时(秒) + cpus: 32 # 外层沙箱总 CPU(须 ≥ 内层各容器之和:main 8 + proxy 1 + 余量) + memory: "32g" # 外层沙箱总内存 + + # 把本地脚本目录上传进沙箱(runner.sh 引用这些路径) + uploads: + - ["./main.sh", "/rock/scripts/main.sh"] + - ["./sidecars", "/rock/scripts/sidecars"] + + # 沙箱级环境变量(注入外层沙箱,runner.sh 可读,同时透传给各内层容器) + env: + # 模型相关(harbor runner 所需) + MODEL: "" # e.g. claude-opus-4-8 + MODEL_API_KEY: "" + MODEL_BASE_URL: "" # e.g. https://api.anthropic.com + + # Harbor agent 配置 + HARBOR_AGENT: claude-code # 固定 claude-code + INSTANCE_ID: "" # 任务 ID,e.g. mailman + DATASET: terminal-bench # 或 aone-bench-java100 + SPLIT: test + DATASET_TYPE: local # local / registry + HARBOR_ENV: docker # DinD 下用 docker 模式 + + # DinD:主容器自带 dockerd,DOCKER_HOST 指向外层 DinD daemon + DOCKER_HOST: "tcp://localhost:2375" + + # OSS 凭证(数据集下载 + 产物上传) + OSS_BUCKET: "" + OSS_ENDPOINT: "" # e.g. oss-cn-hangzhou-internal.aliyuncs.com + OSS_REGION: "" # e.g. cn-hangzhou + OSS_ACCESS_KEY_ID: "" + OSS_ACCESS_KEY_SECRET: "" + + # Harbor 运行参数(可选覆盖) + N_ATTEMPTS: "1" + N_CONCURRENT: "1" + TIMEOUT_MULTIPLIER: "1.0" + MAX_RETRIES: "3" + MAX_ITERATIONS: "500" + SKIP_CONFIRM: "true" + HARBOR_JOB_RESULT_PATH: "/tmp/shared/rollout_result" + OUTPUT_DIR: "/tmp/output" + SHARED_DIR: "/tmp/shared" + + # cc-proxy 配置(proxy sidecar 的端口由 health.port 声明) + PROVIDER: anthropic + FORCE_PROXY: "true" # 强制走 cc-proxy sidecar + + # 产物上传(复用现有机制) + oss_mirror: + enabled: true + oss_bucket: "" + +# ── Compose 内层编排(DinD 内,由 runner.sh 用 docker CLI 拉起)── +compose: + # 主容器:harbor runner + # script_path 即顶层 ./main.sh(ComposeTrial 上传后路径 /rock/scripts/main.sh) + main: + image: "" # harbor runner 镜像,含 harbor CLI 及 claude-code + # e.g. code-agi-sg-docker-registry-vpc.ap-southeast-1.cr.aliyuncs.com/eflops/claude-code:bailian-xxx + privileged: true # harbor 在内层需要起 docker 容器,须特权 + resources: + cpus: 8 + memory: "16g" + + # Sidecar 容器:与主容器并行运行 + sidecars: + # cc-proxy sidecar:主容器通过 docker bridge gateway(http://${DOCKER_GATEWAY}:8082)访问 + # name=proxy 作为 docker network-alias,健康探测确保 proxy 就绪后再启动主容器 + - name: proxy + image: "" # claude-code proxy 镜像 + # e.g. code-agi-sg-docker-registry-vpc.ap-southeast-1.cr.aliyuncs.com/eflops/claude-code-proxy:latest + script_path: /rock/scripts/sidecars/proxy-sidecar.sh + resources: + cpus: 1 + memory: "1g" + health: + port: 8082 # proxy-sidecar.sh 中 PORT=8082 + timeout_sec: 60 diff --git a/examples/job/compose/main.sh b/examples/job/compose/main.sh new file mode 100644 index 0000000000..ec7625d820 --- /dev/null +++ b/examples/job/compose/main.sh @@ -0,0 +1,1085 @@ +#!/bin/bash +# ============================================================================= +# 来源: Agent-Hub/task/harbor/main.sh(1055 行完整脚本) +# 用途: ROCK ComposeJobConfig 端到端用例 — harbor 任务 (claude-code / terminal-bench) +# +# 改动说明(相对原始文件): +# 1. 注释头更新(本注释块),余下内容原封不动。 +# +# 运行方案: 主容器自带 dockerd(DinD 沙箱 image=docker:27-dind 天然提供), +# DOCKER_HOST 保持 tcp://localhost:2375(见第 17 行),即指向外层 DinD 守护进程。 +# 注意:附录 B 的翻译中 DOCKER_HOST 改成了 tcp://docker-daemon:2375 +# (因为那是三层嵌套 DinD + 独立 dockerd sidecar 的方案), +# 但本端到端用例采用"外层 DinD 沙箱自带 dockerd"方案,无独立 docker-daemon sidecar, +# 所以 DOCKER_HOST 继续指向 localhost:2375。 +# +# proxy 访问: main.sh 里所有对 proxy 的访问都通过 DOCKER_GATEWAY 动态获取 +# (`http://${DOCKER_GATEWAY}:8082`),并非硬编码 127.0.0.1:8082, +# 因此在 ComposeJobConfig 场景下无需修改。 +# proxy sidecar 以 network-alias=proxy 运行,主容器通过 docker bridge +# 网关访问,端口 8082 不变。 +# ============================================================================= +# harbor/main.sh — Harbor agent runner for Agent-Hub. +# Supports: terminus-2, claude-code, opencode, openclaw, codex, aider, goose, etc. +set -e + +# ── Environment setup ── + +# Mounted volumes — read AP-injected env vars, with local-docker fallback. +# DO NOT override OUTPUT_DIR with a hardcoded path; that breaks the AP uploader. +OUTPUT_DIR="${OUTPUT_DIR:-/tmp/output}" +SHARED_DIR="${SHARED_DIR:-/tmp/shared}" +export OUTPUT_DIR SHARED_DIR + +MODEL_API_KEY="${MODEL_API_KEY:-$API_KEY}" +MODEL_BASE_URL="${MODEL_BASE_URL:-$BASE_URL}" + +# ── ROCK ComposeJobConfig 适配(相对原始 Agent-Hub main.sh 的唯一逻辑改动)── +# 原始脚本假设有个 docker-daemon sidecar 在 tcp://localhost:2375 提供 dockerd(K8s 同 Pod 模型)。 +# 在 ComposeJobConfig 的"两层"方案里,外层 ROCK kata 沙箱已由 runner.sh 启动 dockerd, +# 并通过 volume_mount(host_path=/var/run/docker.sock) 把外层 socket 挂进本主容器。 +# 因此优先复用外层 dockerd(避免在主容器内再起第三层 dockerd,那在 kata 下会失败)。 +if [ -S /var/run/docker.sock ]; then + export DOCKER_HOST="unix:///var/run/docker.sock" + echo "[rock-compose-adapt] reusing outer dockerd via mounted /var/run/docker.sock" +else + export DOCKER_HOST="tcp://localhost:2375" +fi +export OPENAI_API_KEY="${MODEL_API_KEY}" +export OPENAI_BASE_URL="${MODEL_BASE_URL}" +export ANTHROPIC_BASE_URL="${ANTHROPIC_BASE_URL:-${MODEL_BASE_URL}}" +export ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-${MODEL_API_KEY}}" + +# cc-proxy sidecar: per-model overrides for BIG/MIDDLE/SMALL. +# All fall back to the main model credentials when not provided. +# Keep in sync with proxy-sidecar.sh .env rendering and with +# Agent-Service harbor plugin.py env injection. +export MIDDLE_MODEL="${MIDDLE_MODEL:-${MODEL}}" +export SMALL_MODEL="${SMALL_MODEL:-${MODEL}}" +export MIDDLE_OPENAI_API_KEY="${MIDDLE_OPENAI_API_KEY:-${OPENAI_API_KEY}}" +export MIDDLE_OPENAI_BASE_URL="${MIDDLE_OPENAI_BASE_URL:-${OPENAI_BASE_URL}}" +export SMALL_OPENAI_API_KEY="${SMALL_OPENAI_API_KEY:-${OPENAI_API_KEY}}" +export SMALL_OPENAI_BASE_URL="${SMALL_OPENAI_BASE_URL:-${OPENAI_BASE_URL}}" + +if [ "${HARBOR_AGENT}" = "openclaw" ]; then + export MODEL_REASONING="true" + export MODEL_CONTEXT_LENGTH="${MODEL_CONTEXT_LENGTH:-131072}" + export MODEL_MAX_TOKENS="${MAX_TOKENS:-${MODEL_MAX_TOKENS:-8192}}" + export OPENCLAW_NODE_VERSION="${NODE_VERSION:-24.14.0}" + export HARBOR_NPM_INSTALL_MODE="${HARBOR_NPM_INSTALL_MODE:-mirror-preferred}" + export HARBOR_NPM_REGISTRY_PORT="${HARBOR_NPM_REGISTRY_PORT:-14873}" + export HARBOR_NODE_DIST_PORT="${HARBOR_NODE_DIST_PORT:-14874}" + if [ -n "${TEMPERATURE:-}" ]; then + export MODEL_TEMPERATURE="${TEMPERATURE}" + fi +fi + +echo "Starting Harbor agent execution..." +echo "Harbor Agent: ${HARBOR_AGENT}" +echo "Dataset Type: ${DATASET_TYPE}" +if [ "${DATASET_TYPE}" = "registry" ]; then + echo "Dataset Name: ${DATASET_NAME}" + echo "Dataset Version: ${DATASET_VERSION}" +fi +echo "Task ID: ${INSTANCE_ID}" +echo "Model: ${MODEL}" +echo "Environment: ${HARBOR_ENV}" +if [ "${HARBOR_AGENT}" = "openclaw" ]; then + echo "OpenClaw version: ${AGENT_VERSION:-latest}" + echo "OpenClaw thinking_level: ${THINKING_LEVEL}" +fi + +HARBOR_JOB_RESULT_PATH="${HARBOR_JOB_RESULT_PATH:-${SHARED_DIR}/rollout_result}" +mkdir -p "${HARBOR_JOB_RESULT_PATH}" + +# ── Wait for docker daemon (DinD sidecar) ── +echo "Waiting for docker daemon..." +max_attempts=30 +attempt=0 +while [ $attempt -lt $max_attempts ]; do + if docker ps >/dev/null 2>&1; then + echo "Docker daemon is ready" + break + fi + attempt=$((attempt + 1)) + echo "Waiting for docker daemon... (attempt $attempt/$max_attempts)" + sleep 2 +done + +if [ $attempt -eq $max_attempts ]; then + echo "ERROR: Docker daemon failed to start after $max_attempts attempts" + exit 1 +fi + +docker info || { echo "ERROR: Docker info command failed"; exit 1; } +export DOCKER_BUILDKIT=0 + +DOCKER_GATEWAY="" + +if [ "${HARBOR_AGENT}" = "openclaw" ]; then + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + echo "DOCKER_GATEWAY IP is ${DOCKER_GATEWAY}" + export HARBOR_NPM_REGISTRY_URL="http://${DOCKER_GATEWAY}:${HARBOR_NPM_REGISTRY_PORT}" + export HARBOR_NODE_DIST_BASE_URL="http://${DOCKER_GATEWAY}:${HARBOR_NODE_DIST_PORT}" + + echo "Waiting for npm mirror sidecar..." + max_attempts=30 + attempt=0 + while [ $attempt -lt $max_attempts ]; do + if curl -s --connect-timeout 1 "http://localhost:${HARBOR_NPM_REGISTRY_PORT}" >/dev/null 2>&1; then + echo "NPM mirror is ready" + break + fi + attempt=$((attempt + 1)) + echo "Waiting for npm mirror... (attempt $attempt/$max_attempts)" + sleep 2 + done +fi + +# ── Download dataset from OSS if not present locally (non-registry) ── +if [ "${DATASET_TYPE}" != "registry" ] && [ ! -d "./${INSTANCE_ID}" ]; then + if [ -z "${DATASET}" ] || [ -z "${SPLIT}" ]; then + echo "ERROR: DATASET and SPLIT must be set for OSS download" + exit 1 + fi + DATASET_KEY="${DATASET}/${SPLIT}" + + cat > /root/.ossutilconfig </dev/null 2>&1; then + mkdir -p ./${INSTANCE_ID} && cd ./${INSTANCE_ID} + ossutil cp -f $DATA_OSS_PATH/content.tgz ./content.tgz --retry-times=500 + tar -zxf content.tgz >/dev/null 2>&1 + rm -rf content.tgz + cd - + else + mkdir -p ./${INSTANCE_ID} + ossutil cp -r -f $DATA_OSS_PATH ./${INSTANCE_ID} --retry-times=500 + fi + echo "Dataset downloaded for ${INSTANCE_ID}" + + # Flatten nested directory if exists + if [ -d ./${INSTANCE_ID}/${INSTANCE_ID} ]; then + cp -r ./${INSTANCE_ID}/${INSTANCE_ID}/* ./${INSTANCE_ID}/ + fi +fi + +# ── Load pre-baked task base image ── +echo "Checking for task base image..." +TASK_IMAGE_JSON="/images/task-image.json" +if [ -f "${TASK_IMAGE_JSON}" ]; then + echo "Found task-image.json, checking for task ${INSTANCE_ID}..." + TAR_FILE=$(python3 -c "import json; data = json.load(open('${TASK_IMAGE_JSON}')); print(data.get('${INSTANCE_ID}', {}).get('tar_file', ''))" 2>/dev/null) + if [ -n "${TAR_FILE}" ]; then + TAR_PATH="/images/${TAR_FILE}" + if [ -f "${TAR_PATH}" ]; then + echo "Loading base image from ${TAR_FILE}..." + docker load -i "${TAR_PATH}" || echo "Warning: Failed to load base image from ${TAR_FILE}" + else + echo "Tar file not found: ${TAR_PATH}" + fi + else + echo "No pre-baked image for task ${INSTANCE_ID}" + fi +else + echo "No task-image.json found, skipping image loading" +fi + +# ── Opencode: generate provider config ── +if [ "${HARBOR_AGENT}" = "opencode" ]; then + echo "/" + # Determine npm package and endpoint based on provider + if [ "${PROVIDER}" = "anthropic" ]; then + OPENCODE_NPM="@ai-sdk/anthropic" + OPENCODE_BASE_URL="${ANTHROPIC_BASE_URL}" + OPENCODE_API_KEY="${ANTHROPIC_API_KEY}" + else + OPENCODE_NPM="@ai-sdk/openai-compatible" + OPENCODE_BASE_URL="${OPENAI_BASE_URL}" + OPENCODE_API_KEY="${OPENAI_API_KEY}" + unset OPENAI_API_KEY + fi + + # When using proxy with anthropic, route through docker gateway + if [ "${FORCE_PROXY}" = "true" ] && [ "${PROVIDER}" = "anthropic" ]; then + echo "Waiting for claude-code proxy (opencode + anthropic + force_proxy)..." + if command -v curl > /dev/null 2>&1; then + while true; do + curl -s --connect-timeout 1 http://localhost:8082 > /dev/null 2>&1 && break + sleep 1 + done + else + sleep 60 + fi + + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + if [ -n "${DOCKER_GATEWAY}" ]; then + OPENCODE_BASE_URL="http://${DOCKER_GATEWAY}:8082/v1" + OPENCODE_API_KEY="any-value" + fi + fi + + # Thinking params — skip if proxy handles it (force_proxy + anthropic) + OPENCODE_THINKING="${INTERLEAVED_THINKING:-}" + OPENCODE_THINKING_TYPE="${THINKING_TYPE:-}" + OPENCODE_EFFORT="${REASONING_EFFORT:-}" + OPENCODE_BUDGET="${REASONING_BUDGET_TOKENS:-}" + OPENCODE_CUSTOM_OPTIONS="${OPENCODE_OPTIONS:-}" + if [ "${FORCE_PROXY}" = "true" ] && [ "${PROVIDER}" = "anthropic" ]; then + OPENCODE_THINKING="" + OPENCODE_THINKING_TYPE="" + OPENCODE_EFFORT="" + OPENCODE_BUDGET="" + fi + + # Generate opencode.json + python3 - "${OPENCODE_NPM}" "${PROVIDER}" "${OPENCODE_BASE_URL}" "${OPENCODE_API_KEY}" "${MODEL}" "${OPENCODE_THINKING}" "${OPENCODE_THINKING_TYPE}" "${OPENCODE_EFFORT}" "${OPENCODE_BUDGET}" "${OPENCODE_CUSTOM_OPTIONS}" "${TEMPERATURE}" << 'PYEOF' +import sys, json, os + +npm, provider, base_url, api_key, model = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5] +thinking_raw = sys.argv[6] if len(sys.argv) > 6 else "" +thinking_type_raw = sys.argv[7] if len(sys.argv) > 7 else "" +effort_raw = sys.argv[8] if len(sys.argv) > 8 else "" +budget_raw = sys.argv[9] if len(sys.argv) > 9 else "" +custom_options_raw = sys.argv[10] if len(sys.argv) > 10 else "" +temperature_raw = sys.argv[11] if len(sys.argv) > 11 else "" + +custom_options = {} +if custom_options_raw: + try: + custom_options = json.loads(custom_options_raw) + except (json.JSONDecodeError, ValueError): + pass + +is_anthropic = "anthropic" in provider.lower() or "claude" in model.lower() +thinking_enabled = bool(thinking_raw) and thinking_raw.lower() not in ("false", "0", "no", "disabled") + +model_options = {} +if thinking_enabled: + if is_anthropic: + if thinking_type_raw: + thinking_type = thinking_type_raw + elif thinking_raw.lower() not in ("true", "1", "yes"): + thinking_type = thinking_raw + else: + thinking_type = "adaptive" + if thinking_type == "enabled": + budget = int(budget_raw) if budget_raw else 10000 + model_options = {"thinking": {"type": "enabled", "budgetTokens": budget}} + else: + model_options = {"thinking": {"type": thinking_type}} + if effort_raw: + model_options["effort"] = effort_raw + else: + model_options = {"enable_thinking": True} + +# Add reasoningEffort for non-anthropic providers when effort is specified. +# Mirror of Agent-Service harbor plugin (commit 6c012cd14): allow effort +# to take effect even when thinking flag was not explicitly turned on. +if effort_raw and not is_anthropic: + model_options["enable_thinking"] = True + model_options["reasoningEffort"] = effort_raw + +if custom_options: + model_options.update(custom_options) + +# Build config +config = { + "$schema": "https://opencode.ai/config.json", + "snapshot": False, + "permission": "allow", +} + +if temperature_raw: + temp = float(temperature_raw) + config["agent"] = { + k: {"temperature": temp} + for k in ["build", "plan", "general", "explore", "title", "summary", "compaction"] + } + +model_entry = {"name": model} +if model_options: + model_entry["options"] = model_options + +config["provider"] = { + provider: { + "npm": npm, + "name": provider, + "options": { + "baseURL": base_url, + "apiKey": api_key, + }, + "models": { + model: model_entry, + }, + } +} + +shared_dir = os.environ["SHARED_DIR"] +os.makedirs(shared_dir, exist_ok=True) +with open(f"{shared_dir}/opencode.json", "w") as f: + json.dump(config, f, indent=2) + f.write("\n") +PYEOF + export OPENCODE_CONFIG_PATH="${SHARED_DIR}/opencode.json" + echo "Opencode config written to ${OPENCODE_CONFIG_PATH}, the content is:" + cat "${OPENCODE_CONFIG_PATH}" +fi + +# ── Kilo-code: generate provider config ── +if [ "${HARBOR_AGENT}" = "kilo-code" ]; then + # Determine base URL and API key based on provider + if [ "${PROVIDER}" = "anthropic" ]; then + KILO_BASE_URL="${ANTHROPIC_BASE_URL}" + KILO_API_KEY="${ANTHROPIC_API_KEY}" + else + KILO_BASE_URL="${OPENAI_BASE_URL}" + KILO_API_KEY="${OPENAI_API_KEY}" + fi + + # When using proxy with anthropic, route through docker gateway + if [ "${FORCE_PROXY}" = "true" ] && [ "${PROVIDER}" = "anthropic" ]; then + echo "Waiting for claude-code proxy (kilo-code + anthropic + force_proxy)..." + if command -v curl > /dev/null 2>&1; then + while true; do + curl -s --connect-timeout 1 http://localhost:8082 > /dev/null 2>&1 && break + sleep 1 + done + else + sleep 60 + fi + + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + if [ -n "${DOCKER_GATEWAY}" ]; then + KILO_BASE_URL="http://${DOCKER_GATEWAY}:8082/v1" + KILO_API_KEY="any-value" + fi + fi + + # Thinking params — skip if proxy handles it (force_proxy + anthropic) + KILO_TEMPERATURE="${TEMPERATURE:-}" + KILO_THINKING="${INTERLEAVED_THINKING:-}" + KILO_THINKING_TYPE="${THINKING_TYPE:-}" + KILO_EFFORT="${REASONING_EFFORT:-}" + KILO_BUDGET="${REASONING_BUDGET_TOKENS:-}" + if [ "${FORCE_PROXY}" = "true" ] && [ "${PROVIDER}" = "anthropic" ]; then + KILO_THINKING="" + KILO_THINKING_TYPE="" + KILO_EFFORT="" + KILO_BUDGET="" + fi + + # Generate kilo.json + python3 - "${PROVIDER}" "${MODEL}" "${KILO_BASE_URL}" "${KILO_API_KEY}" "${KILO_TEMPERATURE}" "${KILO_THINKING}" "${KILO_THINKING_TYPE}" "${KILO_EFFORT}" "${KILO_BUDGET}" << 'PYEOF' +import sys, json, os + +provider = sys.argv[1] +model = sys.argv[2] +base_url = sys.argv[3] if len(sys.argv) > 3 else "" +api_key = sys.argv[4] if len(sys.argv) > 4 else "" +temperature_raw = sys.argv[5] if len(sys.argv) > 5 else "" +thinking_raw = sys.argv[6] if len(sys.argv) > 6 else "" +thinking_type_raw = sys.argv[7] if len(sys.argv) > 7 else "" +effort_raw = sys.argv[8] if len(sys.argv) > 8 else "" +budget_raw = sys.argv[9] if len(sys.argv) > 9 else "" + +is_anthropic = "anthropic" in provider.lower() or "claude" in model.lower() + +model_options = {} + +# Thinking +thinking_enabled = bool(thinking_raw) and thinking_raw.lower() not in ("false", "0", "no", "disabled") +if thinking_enabled: + if is_anthropic: + if thinking_type_raw: + thinking_type = thinking_type_raw + elif thinking_raw.lower() not in ("true", "1", "yes"): + thinking_type = thinking_raw + else: + thinking_type = "adaptive" + if thinking_type == "enabled": + budget = int(budget_raw) if budget_raw else 10000 + model_options["thinking"] = {"type": "enabled", "budgetTokens": budget} + else: + model_options["thinking"] = {"type": thinking_type} + if effort_raw: + model_options["effort"] = effort_raw + else: + model_options["enable_thinking"] = True + +# Build provider config +model_entry = {} +if model_options: + model_entry["options"] = model_options + +provider_options = {} +if base_url: + provider_options["baseURL"] = base_url +if api_key: + provider_options["apiKey"] = api_key + +provider_config = {"models": {model: model_entry}} +if provider_options: + provider_config["options"] = provider_options + +config = {"provider": {provider: provider_config}} + +# Temperature is configured at agent level in kilocode +if temperature_raw: + try: + temp = float(temperature_raw) + agent_config = {"temperature": temp} + config["agent"] = { + k: dict(agent_config) + for k in ["general", "plan", "explore", "title", "summary"] + } + except ValueError: + pass + +shared_dir = os.environ["SHARED_DIR"] +os.makedirs(shared_dir, exist_ok=True) +with open(f"{shared_dir}/kilo.json", "w") as f: + json.dump(config, f, indent=2) +print(f"Kilo config written to {shared_dir}/kilo.json: " + json.dumps(config)) +PYEOF + export KILO_CONFIG_PATH="${SHARED_DIR}/kilo.json" + echo "Kilo config generated at ${KILO_CONFIG_PATH}" +fi + +# ── Determine if proxy sidecar is in use ── +USE_PROXY=0 +if [ "${HARBOR_AGENT}" = "claude-code" ]; then + if [ "${FORCE_PROXY}" = "true" ]; then + USE_PROXY=1 + elif [ "${PROVIDER}" != "anthropic" ]; then + URL_LOWER=$(echo "${ANTHROPIC_BASE_URL:-}" | tr '[:upper:]' '[:lower:]') + if [[ "${URL_LOWER}" != *"anthropic"* ]]; then + USE_PROXY=1 + fi + fi +elif { [ "${HARBOR_AGENT}" = "opencode" ] || [ "${HARBOR_AGENT}" = "kilo-code" ]; } && [ "${FORCE_PROXY}" = "true" ] && [ "${PROVIDER}" = "anthropic" ]; then + USE_PROXY=1 +elif [ "${HARBOR_AGENT}" = "openclaw" ]; then + if [ "${FORCE_PROXY}" = "true" ]; then + USE_PROXY=1 + elif [ -n "${PROVIDER}" ] && [ "${PROVIDER}" != "anthropic" ]; then + URL_LOWER=$(echo "${ANTHROPIC_BASE_URL:-}" | tr '[:upper:]' '[:lower:]') + if [[ "${URL_LOWER}" != *"anthropic"* ]]; then + USE_PROXY=1 + fi + fi +fi +echo "USE_PROXY=${USE_PROXY}" + +# Wait for proxy if needed (claude-code only; opencode waits earlier) +if [ "${HARBOR_AGENT}" = "claude-code" ] && [ "${USE_PROXY}" = "1" ]; then + echo "Waiting for claude-code proxy..." + if command -v curl > /dev/null 2>&1; then + while true; do + curl -s --connect-timeout 1 http://localhost:8082 > /dev/null 2>&1 && break + sleep 1 + done + else + sleep 60 + fi +fi + +if [ "${HARBOR_AGENT}" = "openclaw" ] && [ "${USE_PROXY}" = "1" ]; then + echo "Waiting for claude-code proxy..." + if command -v curl > /dev/null 2>&1; then + while true; do + curl -s --connect-timeout 1 http://localhost:8082 > /dev/null 2>&1 && break + sleep 1 + done + else + sleep 60 + fi +fi + +if [ "${HARBOR_AGENT}" = "openclaw" ] && [ "${USE_PROXY}" = "1" ]; then + if [ -z "${DOCKER_GATEWAY}" ]; then + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + fi + export MODEL_BASE_URL="http://${DOCKER_GATEWAY}:8082" + export ANTHROPIC_BASE_URL="${MODEL_BASE_URL}" + export OPENAI_BASE_URL="${MODEL_BASE_URL}" + export MODEL_API="anthropic-messages" + export MODEL_API_KEY="sk-any-value" +fi + +# ── Model name ── +if [ -n "${PROVIDER}" ]; then + MODEL_NAME="${PROVIDER}/${MODEL}" +else + MODEL_NAME="${MODEL}" +fi +echo "Model name: ${MODEL_NAME}" + +# ── Claude-code: configure API endpoints and env ── +if [ "${HARBOR_AGENT}" = "claude-code" ]; then + if [ -n "${ANTHROPIC_BASE_URL}" ]; then + if [ "${USE_PROXY}" = "1" ]; then + # Route through proxy via docker bridge gateway + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + if [ -n "${DOCKER_GATEWAY}" ]; then + export ANTHROPIC_BASE_URL="http://${DOCKER_GATEWAY}:8082" + export ANTHROPIC_API_KEY="any-value" + echo "Claude-code proxy mode: ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL}" + fi + # Strip provider prefix for proxy mode (aligns with Agent-Service main.j2 L594) + MODEL_NAME="${MODEL}" + echo "Updated MODEL_NAME to: ${MODEL_NAME}" + else + # Direct Anthropic API mode — auto-append /v1 if missing + if [[ "${ANTHROPIC_BASE_URL}" != */v1 ]]; then + ANTHROPIC_BASE_URL="${ANTHROPIC_BASE_URL}/v1" + fi + echo "Claude-code native mode: ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL}" + export ANTHROPIC_BASE_URL="${ANTHROPIC_BASE_URL}" + export ANTHROPIC_AUTH_TOKEN="${ANTHROPIC_API_KEY:-}" + export ANTHROPIC_DEFAULT_OPUS_MODEL="${MODEL}" + export ANTHROPIC_DEFAULT_SONNET_MODEL="${MODEL}" + export ANTHROPIC_DEFAULT_HAIKU_MODEL="${MODEL}" + export CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 + MODEL_NAME="${MODEL}" + unset ANTHROPIC_API_KEY + fi + fi + + if [ -n "${OPENAI_BASE_URL}" ]; then export OPENAI_BASE_URL="${OPENAI_BASE_URL}"; fi + if [ -n "${OPENAI_API_KEY}" ]; then export OPENAI_API_KEY="${OPENAI_API_KEY}"; fi + + if [ -n "${MAX_TOKENS}" ]; then + export CLAUDE_CODE_MAX_OUTPUT_TOKENS="${MAX_TOKENS}" + fi + + mkdir -p ${OUTPUT_DIR}/claudecode_logs/ +fi + +# ── Local dataset: Dockerfile injections (non-registry) ── +if [ "${DATASET_TYPE}" != "registry" ]; then + if [ "${HARBOR_AGENT}" = "claude-code" ] && [ -n "${ENABLE_TOOL_SEARCH}" ]; then + TASK_ENV_DOCKERFILE="./${INSTANCE_ID}/environment/Dockerfile" + if [ -f "${TASK_ENV_DOCKERFILE}" ]; then + echo "ENV ENABLE_TOOL_SEARCH=${ENABLE_TOOL_SEARCH}" >> "${TASK_ENV_DOCKERFILE}" + fi + fi + + # Inject NODE_OPTIONS into task Dockerfile (opencode, memory optimization) + if [ "${HARBOR_AGENT,,}" = "opencode" ] && [ "${ENABLE_NODE_MEMORY_OPTIMIZATION}" != "false" ] && [ -n "${OVERRIDE_MEMORY_MB}" ]; then + TASK_ENV_DOCKERFILE="./${INSTANCE_ID}/environment/Dockerfile" + if [ -f "${TASK_ENV_DOCKERFILE}" ]; then + NODE_MAX_OLD_SPACE=$((OVERRIDE_MEMORY_MB / 2)) + JSC_RAM_SIZE=$((NODE_MAX_OLD_SPACE / 1024)) + echo "" >> "${TASK_ENV_DOCKERFILE}" + echo "ENV NODE_OPTIONS=\"--max-old-space-size=${NODE_MAX_OLD_SPACE}\"" >> "${TASK_ENV_DOCKERFILE}" + echo "ENV JSC_forceRAMSize=${JSC_RAM_SIZE}gb" >> "${TASK_ENV_DOCKERFILE}" + export NODE_OPTIONS="--max-old-space-size=${NODE_MAX_OLD_SPACE}" + export JSC_forceRAMSize="${JSC_RAM_SIZE}gb" + fi + fi + +fi + +# ══════════════════════════════════════════════════════════════════════ +# LiteLLM sidecars: wait for readiness and rewrite env to route via proxy +# ══════════════════════════════════════════════════════════════════════ + +# ── Eval judge LiteLLM proxy (port 4000): wait, rewrite EVAL_* ── +if [ -n "${EVAL_API_KEY}" ] && [ -n "${EVAL_API_BASE}" ] && [ -n "${EVAL_MODEL}" ]; then + echo "Waiting for LiteLLM proxy sidecar to be ready on port 4000..." + for i in $(seq 1 30); do + if (echo > /dev/tcp/localhost/4000) 2>/dev/null; then + echo "LiteLLM proxy ready after ${i}s" + break + fi + if [ $i -eq 30 ]; then + echo "WARNING: LiteLLM proxy failed to start within 30s" + fi + sleep 1 + done + + if [ -z "${DOCKER_GATEWAY}" ]; then + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + fi + if [ -n "${DOCKER_GATEWAY}" ]; then + echo "Docker gateway detected: ${DOCKER_GATEWAY}" + EVAL_MODEL_RAW="${EVAL_MODEL}" + export EVAL_API_KEY="sk-litellm-local" + export EVAL_API_BASE="http://${DOCKER_GATEWAY}:4000/v1" + # Strip provider prefix from EVAL_MODEL (e.g. "anthropic/sonnet" -> "sonnet") + if [[ "${EVAL_MODEL_RAW}" == */* ]]; then + export EVAL_MODEL="${EVAL_MODEL_RAW#*/}" + else + export EVAL_MODEL="${EVAL_MODEL_RAW}" + fi + echo "Overridden EVAL_API_KEY to: sk-litellm-local" + echo "Overridden EVAL_API_BASE to: ${EVAL_API_BASE}" + echo "Overridden EVAL_MODEL to: ${EVAL_MODEL} (raw: ${EVAL_MODEL_RAW})" + else + echo "WARNING: Could not detect docker gateway, using original EVAL_* env vars" + fi +fi + +# ── Codex force_proxy: rewrite OPENAI_BASE_URL to the same sidecar (port 4000) ── +# The unified litellm-proxy sidecar serves both eval judge and codex groups in +# one process; codex just targets it on the same port as eval. +if [ "${HARBOR_AGENT}" = "codex" ] && [ "${FORCE_PROXY}" = "true" ]; then + echo "Waiting for LiteLLM proxy sidecar (codex group) to be ready on port 4000..." + for i in $(seq 1 60); do + if (echo > /dev/tcp/localhost/4000) 2>/dev/null; then + echo "LiteLLM proxy ready after ${i}s" + break + fi + if [ $i -eq 60 ]; then + echo "WARNING: LiteLLM proxy failed to start within 60s" + fi + sleep 1 + done + + if [ -z "${DOCKER_GATEWAY}" ]; then + DOCKER_GATEWAY=$(docker network inspect bridge --format='{{range .IPAM.Config}}{{.Gateway}}{{end}}' 2>/dev/null) + fi + if [ -n "${DOCKER_GATEWAY}" ]; then + echo "Docker gateway detected: ${DOCKER_GATEWAY}" + export OPENAI_BASE_URL="http://${DOCKER_GATEWAY}:4000/v1" + export OPENAI_API_KEY="sk-litellm-local" + echo "Overridden OPENAI_BASE_URL to: ${OPENAI_BASE_URL}" + echo "Overridden OPENAI_API_KEY to: sk-litellm-local" + else + echo "WARNING: Could not detect docker gateway, using original OPENAI_BASE_URL" + fi +fi + +# ══════════════════════════════════════════════════════════════════════ +# Build harbor run command +# ══════════════════════════════════════════════════════════════════════ +HARBOR_ARGS=() + +if [ "${DATASET_TYPE}" = "registry" ]; then + HARBOR_ARGS+=(-d "${DATASET_NAME}@${DATASET_VERSION}") + HARBOR_ARGS+=(-t "${INSTANCE_ID}") +else + HARBOR_ARGS+=(-p "${INSTANCE_ID}") +fi + +HARBOR_ARGS+=(-a "${HARBOR_AGENT}") +HARBOR_ARGS+=(-m "${MODEL_NAME}") +HARBOR_ARGS+=(--env "${HARBOR_ENV}") +HARBOR_ARGS+=(--jobs-dir "${HARBOR_JOB_RESULT_PATH}") +HARBOR_ARGS+=(--n-attempts "${N_ATTEMPTS}") +HARBOR_ARGS+=(--n-concurrent "${N_CONCURRENT}") +HARBOR_ARGS+=(--timeout-multiplier "${TIMEOUT_MULTIPLIER}") +HARBOR_ARGS+=(--max-retries "${MAX_RETRIES}") +HARBOR_ARGS+=(--ak "max_turns=${MAX_ITERATIONS}") + +# Timeout multipliers +[ -n "${AGENT_TIMEOUT_MULTIPLIER}" ] && HARBOR_ARGS+=(--agent-timeout-multiplier "${AGENT_TIMEOUT_MULTIPLIER}") +[ -n "${VERIFIER_TIMEOUT_MULTIPLIER}" ] && HARBOR_ARGS+=(--verifier-timeout-multiplier "${VERIFIER_TIMEOUT_MULTIPLIER}") +[ -n "${AGENT_SETUP_TIMEOUT_MULTIPLIER}" ] && HARBOR_ARGS+=(--agent-setup-timeout-multiplier "${AGENT_SETUP_TIMEOUT_MULTIPLIER}") +[ -n "${ENVIRONMENT_BUILD_TIMEOUT_MULTIPLIER}" ] && HARBOR_ARGS+=(--environment-build-timeout-multiplier "${ENVIRONMENT_BUILD_TIMEOUT_MULTIPLIER}") + +# Absolute timeout overrides (take precedence over multipliers when both set) +[ -n "${AGENT_TIMEOUT}" ] && HARBOR_ARGS+=(--agent-timeout "${AGENT_TIMEOUT}") +[ -n "${AGENT_SETUP_TIMEOUT}" ] && HARBOR_ARGS+=(--agent-setup-timeout "${AGENT_SETUP_TIMEOUT}") +[ -n "${VERIFIER_TIMEOUT}" ] && HARBOR_ARGS+=(--verifier-timeout "${VERIFIER_TIMEOUT}") + +# Resource overrides for task container inside DinD +[ -n "${OVERRIDE_CPUS}" ] && HARBOR_ARGS+=(--override-cpus "${OVERRIDE_CPUS}") +[ -n "${OVERRIDE_MEMORY_MB}" ] && HARBOR_ARGS+=(--override-memory-mb "${OVERRIDE_MEMORY_MB}") +[ -n "${OVERRIDE_STORAGE_MB}" ] && HARBOR_ARGS+=(--override-storage-mb "${OVERRIDE_STORAGE_MB}") + +# Retry exception filters +if [ -n "${RETRY_INCLUDE}" ]; then + for exc in ${RETRY_INCLUDE}; do HARBOR_ARGS+=(--retry-include "${exc}"); done +fi +if [ -n "${RETRY_EXCLUDE}" ]; then + for exc in ${RETRY_EXCLUDE}; do HARBOR_ARGS+=(--retry-exclude "${exc}"); done +fi + +[ -n "${AGENT_VERSION}" ] && HARBOR_ARGS+=(--ak "version=${AGENT_VERSION}") + +# ── Terminus-2 agent kwargs ── +if [ "${HARBOR_AGENT}" = "terminus-2" ]; then + [ -n "${TEMPERATURE}" ] && HARBOR_ARGS+=(--ak "temperature=${TEMPERATURE}") + [ -n "${INTERLEAVED_THINKING}" ] && HARBOR_ARGS+=(--ak "interleaved_thinking=${INTERLEAVED_THINKING}") + [ -n "${THINKING_TYPE}" ] && HARBOR_ARGS+=(--ak "thinking_type=${THINKING_TYPE}") + [ -n "${REASONING_EFFORT}" ] && HARBOR_ARGS+=(--ak "reasoning_effort=${REASONING_EFFORT}") + [ -n "${REASONING_BUDGET_TOKENS}" ] && HARBOR_ARGS+=(--ak "reasoning_budget_tokens=${REASONING_BUDGET_TOKENS}") + [ -n "${MAX_TOKENS}" ] && HARBOR_ARGS+=(--ak "max_tokens=${MAX_TOKENS}") + [ -n "${USE_HACK}" ] && HARBOR_ARGS+=(--ak "use_hack=${USE_HACK}") + [ -n "${CONTEXT_1M}" ] && HARBOR_ARGS+=(--ak "context_1m=${CONTEXT_1M}") + [ -n "${STREAM}" ] && HARBOR_ARGS+=(--ak "stream=${STREAM}") + [ -n "${PARSER_NAME}" ] && HARBOR_ARGS+=(--ak "parser_name=${PARSER_NAME}") + [ -n "${MAX_THINKING_TOKENS}" ] && HARBOR_ARGS+=(--ak "max_thinking_tokens=${MAX_THINKING_TOKENS}") + [ -n "${ENABLE_SUMMARIZE}" ] && HARBOR_ARGS+=(--ak "enable_summarize=${ENABLE_SUMMARIZE}") + [ -n "${PROACTIVE_SUMMARIZATION_THRESHOLD}" ] && HARBOR_ARGS+=(--ak "proactive_summarization_threshold=${PROACTIVE_SUMMARIZATION_THRESHOLD}") + [ -n "${MODEL_INFO}" ] && HARBOR_ARGS+=(--ak "model_info=${MODEL_INFO}") + [ -n "${LLM_KWARGS}" ] && HARBOR_ARGS+=(--ak "llm_kwargs=${LLM_KWARGS}") + [ -n "${LLM_CALL_KWARGS}" ] && HARBOR_ARGS+=(--ak "llm_call_kwargs=${LLM_CALL_KWARGS}") + [ -n "${STORE_ALL_MESSAGES}" ] && HARBOR_ARGS+=(--ak "store_all_messages=${STORE_ALL_MESSAGES}") + [ -n "${RECORD_TERMINAL_SESSION}" ] && HARBOR_ARGS+=(--ak "record_terminal_session=${RECORD_TERMINAL_SESSION}") + [ -n "${USE_RESPONSES_API}" ] && HARBOR_ARGS+=(--ak "use_responses_api=${USE_RESPONSES_API}") + [ -n "${LLM_BACKEND}" ] && HARBOR_ARGS+=(--ak "llm_backend=${LLM_BACKEND}") + [ -n "${COLLECT_ROLLOUT_DETAILS}" ] && HARBOR_ARGS+=(--ak "collect_rollout_details=${COLLECT_ROLLOUT_DETAILS}") + [ -n "${MAX_QUERY_LLM_RETRIES}" ] && HARBOR_ARGS+=(--ak "max_query_llm_retries=${MAX_QUERY_LLM_RETRIES}") + [ -n "${LLM_MAX_RETRIES}" ] && HARBOR_ARGS+=(--ak "llm_max_retries=${LLM_MAX_RETRIES}") + [ -n "${LITELLM_TIMEOUT}" ] && HARBOR_ARGS+=(--ak "litellm_timeout=${LITELLM_TIMEOUT}") + [ -n "${TRAJECTORY_CONFIG}" ] && HARBOR_ARGS+=(--ak "trajectory_config=${TRAJECTORY_CONFIG}") +fi + +# ── Claude-code agent kwargs ── +if [ "${HARBOR_AGENT}" = "claude-code" ]; then + [ -n "${MAX_THINKING_TOKENS}" ] && HARBOR_ARGS+=(--ak "max_thinking_tokens=${MAX_THINKING_TOKENS}") + [ -n "${REASONING_EFFORT}" ] && HARBOR_ARGS+=(--ak "reasoning_effort=${REASONING_EFFORT}") + [ -n "${ALLOWED_TOOLS}" ] && HARBOR_ARGS+=(--ak "allowed_tools=${ALLOWED_TOOLS}") + [ -n "${DISALLOWED_TOOLS}" ] && HARBOR_ARGS+=(--ak "disallowed_tools=${DISALLOWED_TOOLS}") + [ -n "${BARE}" ] && HARBOR_ARGS+=(--ak "bare=${BARE}") +fi + +# ── OpenClaw agent kwargs ── +if [ "${HARBOR_AGENT}" = "openclaw" ]; then + [ -n "${THINKING_LEVEL}" ] && HARBOR_ARGS+=(--ak "thinking_level=${THINKING_LEVEL}") + [ -n "${MODEL_BASE_URL}" ] && HARBOR_ARGS+=(--ak "model_base_url=${MODEL_BASE_URL}") + [ -n "${MODEL_API_KEY}" ] && HARBOR_ARGS+=(--ak "model_api_key=${MODEL_API_KEY}") + HARBOR_ARGS+=(--ak "model_reasoning=true") + [ -n "${MODEL_API:-}" ] && HARBOR_ARGS+=(--ak "model_api=${MODEL_API}") + [ -n "${MODEL_CONTEXT_LENGTH}" ] && HARBOR_ARGS+=(--ak "model_context_length=${MODEL_CONTEXT_LENGTH}") + [ -n "${MODEL_MAX_TOKENS}" ] && HARBOR_ARGS+=(--ak "model_max_tokens=${MODEL_MAX_TOKENS}") + [ -n "${TEMPERATURE}" ] && HARBOR_ARGS+=(--ak "temperature=${TEMPERATURE}") + [ -n "${OPENCLAW_CONFIG}" ] && HARBOR_ARGS+=(--ak "openclaw_config=${OPENCLAW_CONFIG}") + + FORWARDED_AGENT_ENV_VARS=( + HARBOR_NPM_INSTALL_MODE + HARBOR_NODE_DIST_BASE_URL + HARBOR_NPM_REGISTRY_URL + OPENCLAW_NODE_VERSION + HARBOR_OPENCLAW_INSTALL_VERBOSE + OPENCLAW_NPM_LOGLEVEL + ) + for env_name in "${FORWARDED_AGENT_ENV_VARS[@]}"; do + env_value="${!env_name:-}" + if [ -n "${env_value}" ]; then + HARBOR_ARGS+=(--ae "${env_name}=${env_value}") + fi + done +fi + +# ── Codex agent kwargs ── +# Mirrors qwen/Agent-Service/.../harbor/main.j2 L893-L912. +if [ "${HARBOR_AGENT}" = "codex" ]; then + [ -n "${REASONING_EFFORT}" ] && HARBOR_ARGS+=(--ak "reasoning_effort=${REASONING_EFFORT}") + [ -n "${REASONING_SUMMARY}" ] && HARBOR_ARGS+=(--ak "reasoning_summary=${REASONING_SUMMARY}") + if [ "${FORCE_PROXY}" = "true" ]; then + HARBOR_ARGS+=(--ak "normalize_model_slug=true") + fi + [ -n "${STREAM_MAX_RETRIES}" ] && HARBOR_ARGS+=(--ak "stream_max_retries=${STREAM_MAX_RETRIES}") + [ -n "${STREAM_IDLE_TIMEOUT_MS}" ] && HARBOR_ARGS+=(--ak "stream_idle_timeout_ms=${STREAM_IDLE_TIMEOUT_MS}") + [ -n "${REQUEST_MAX_RETRIES}" ] && HARBOR_ARGS+=(--ak "request_max_retries=${REQUEST_MAX_RETRIES}") +fi + +# Extra --ak escape hatch +if [ -n "${EXTRA_AGENT_KWARGS}" ]; then + for kv in ${EXTRA_AGENT_KWARGS}; do HARBOR_ARGS+=(--ak "${kv}"); done +fi + +# Extra --ae escape hatch: AGENT_EXTRA_ENV is a JSON dict {KEY: VALUE} +# that gets forwarded as --ae KEY=VALUE to harbor run (env vars for the +# agent process, distinct from --ak agent kwargs). +if [ -n "${AGENT_EXTRA_ENV}" ]; then + while IFS='=' read -r ae_key ae_value; do + [ -z "${ae_key}" ] && continue + HARBOR_ARGS+=(--ae "${ae_key}=${ae_value}") + done < <(python3 -c " +import json, sys +try: + data = json.loads(sys.argv[1]) + for k, v in data.items(): + print(f'{k}={v}') +except Exception as e: + print(f'ERROR: Failed to parse AGENT_EXTRA_ENV as JSON: {e}', file=sys.stderr) + sys.exit(1) +" "${AGENT_EXTRA_ENV}") +fi + +HARBOR_ARGS+=(--no-delete) + +# Skip harbor run interactive confirmation when SKIP_CONFIRM=true (mirrors Agent-Service `skip_confirm` field) +if [ "${SKIP_CONFIRM:-}" = "true" ]; then + HARBOR_ARGS+=(-y) +fi + +echo "======= RUNNING HARBOR COMMAND =======" +echo "harbor run ${HARBOR_ARGS[*]}" +echo "======================================" + +harbor run "${HARBOR_ARGS[@]}" 2>&1 | tee ${HARBOR_JOB_RESULT_PATH}/harbor_stdout.txt + +if [ ${PIPESTATUS[0]} -eq 0 ]; then + echo "Harbor execution completed successfully" +else + echo "ERROR: Harbor execution failed" + exit 1 +fi + +# ── Collect results ── +mkdir -p "${OUTPUT_DIR}" +cp -r "${HARBOR_JOB_RESULT_PATH}/." "${OUTPUT_DIR}/" + +# Mask apiKey in trial.log files to avoid leaking secrets +find "${OUTPUT_DIR}" -name "trial.log" -type f 2>/dev/null | while read -r logfile; do + sed -i 's/"apiKey": "[^"]*"/"apiKey": "sk-masked"/g' "${logfile}" 2>/dev/null || true +done + +# Copy opencode.json to OUTPUT_DIR (with apiKey masked) +if [ "${HARBOR_AGENT}" = "opencode" ] && [ -f "${SHARED_DIR}/opencode.json" ]; then + cp "${SHARED_DIR}/opencode.json" "${OUTPUT_DIR}/opencode.json" + sed -i 's/"apiKey": "[^"]*"/"apiKey": "sk-masked"/g' "${OUTPUT_DIR}/opencode.json" 2>/dev/null || true +fi + +# Copy proxy logs if proxy was used +if [ "${USE_PROXY}" = "1" ]; then + if [ -d "${OUTPUT_DIR}/claudecode_logs" ] && [ "$(ls -A ${OUTPUT_DIR}/claudecode_logs 2>/dev/null)" ]; then + mkdir -p ${OUTPUT_DIR}/claudecode_logs/ + # Skip copy if source and destination resolve to the same path + _src_real=$(realpath ${OUTPUT_DIR}/claudecode_logs 2>/dev/null || echo "${OUTPUT_DIR}/claudecode_logs") + _dst_real=$(realpath ${OUTPUT_DIR}/claudecode_logs 2>/dev/null || echo "${OUTPUT_DIR}/claudecode_logs") + if [ "${_src_real}" != "${_dst_real}" ]; then + cp -r ${OUTPUT_DIR}/claudecode_logs/* ${OUTPUT_DIR}/claudecode_logs/ + fi + fi +fi + +# Copy codex LiteLLM proxy logs if codex + force_proxy +if [ "${HARBOR_AGENT}" = "codex" ] && [ "${FORCE_PROXY}" = "true" ]; then + if [ -d "${OUTPUT_DIR}/codex_litellm_logs" ] && [ "$(ls -A ${OUTPUT_DIR}/codex_litellm_logs 2>/dev/null)" ]; then + mkdir -p ${OUTPUT_DIR}/codex_litellm_logs/ + # Skip copy if source and destination resolve to the same path + _src_real=$(realpath ${OUTPUT_DIR}/codex_litellm_logs 2>/dev/null || echo "${OUTPUT_DIR}/codex_litellm_logs") + _dst_real=$(realpath ${OUTPUT_DIR}/codex_litellm_logs 2>/dev/null || echo "${OUTPUT_DIR}/codex_litellm_logs") + if [ "${_src_real}" != "${_dst_real}" ]; then + cp -r ${OUTPUT_DIR}/codex_litellm_logs/* ${OUTPUT_DIR}/codex_litellm_logs/ + fi + fi +fi + +# ── Write harbor_summary.json ── +if [ "${DATASET_TYPE}" = "registry" ]; then +cat > ${OUTPUT_DIR}/harbor_summary.json < ${OUTPUT_DIR}/harbor_summary.json < 0) + break + except Exception as e: + print(f"[metrics] fallback: failed reading aggregated results: {e}", file=sys.stderr) + +# 3) small-model stats from claudecode_logs/proxy_stdout.log +_STRICT_SUCCESS_STEPS = { + "stream_done_received", + "stream_finalization_message_stop", + "non_stream_response_received", +} +_RETRY_INTERMEDIATE_STEPS = { + "openai_non_stream_retry", + "openai_stream_retry", + "native_non_stream_retry", + "native_stream_retry", +} +_RID_RE = re.compile(r'"request_id"\s*:\s*"([^"]*)"') +_STEP_RE = re.compile(r'"step"\s*:\s*"([^"]*)"') +_CODE_RE = re.compile(r'"code"\s*:\s*"([^"]*)"') + +small = {"small_calls": 0, "small_success": 0, "small_failed": 0, "small_unknown": 0} +proxy_log = OUTPUT_DIR / "claudecode_logs" / "proxy_stdout.log" +if proxy_log.exists(): + def _first(rg, line): + m = rg.search(line) + return m.group(1) if m else "" + def _looks_like_error(step, code): + if step in _RETRY_INTERMEDIATE_STEPS: + return False + sl = step.lower() + if "error" in sl or "exception" in sl: + return True + if code: + try: + return int(code) >= 400 + except ValueError: + return False + return False + def _load_record(line): + try: + return json.loads(line) + except json.JSONDecodeError: + idx = line.find("{") + if idx < 0: + return None + try: + return json.loads(line[idx:]) + except json.JSONDecodeError: + return None + + calls = {} # request_id -> [has_success, has_error] + try: + with proxy_log.open("r", encoding="utf-8", errors="replace") as h: + for line in h: + if not line.startswith("{"): + continue + step = _first(_STEP_RE, line) + if not step: + continue + if step == "request_routing_selected": + rec = _load_record(line) + if not rec: + continue + rid = rec.get("request_id") or "" + ctx = rec.get("context") or {} + if not rid or ctx.get("model_tier") != "small": + continue + calls[rid] = [False, False] + continue + rid = _first(_RID_RE, line) + if not rid or rid not in calls: + continue + code = _first(_CODE_RE, line) + if step in _STRICT_SUCCESS_STEPS: + calls[rid][0] = True + if _looks_like_error(step, code): + calls[rid][1] = True + except Exception as e: + print(f"[metrics] failed reading proxy log {proxy_log}: {e}", file=sys.stderr) + calls = {} + succ = sum(1 for s, err in calls.values() if s and not err) + fail = sum(1 for s, err in calls.values() if err) + total = len(calls) + small = { + "small_calls": total, + "small_success": succ, + "small_failed": fail, + "small_unknown": total - succ - fail, + } +# Wrap extra fields under "extra_fields" key +metrics["extra_fields"] = { + "agent_exit_reason": agent_exit_reason, + "internal_error_type": internal_error_type, + "scaffold": scaffold, + **small, +} + +with open(METRICS_OUT, "w") as f: + json.dump(metrics, f, indent=2) +print(f"[metrics] metrics.json written: {metrics}") +PYEOF diff --git a/examples/job/compose/sidecars/proxy-sidecar.sh b/examples/job/compose/sidecars/proxy-sidecar.sh new file mode 100644 index 0000000000..286ee6c1c9 --- /dev/null +++ b/examples/job/compose/sidecars/proxy-sidecar.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# ============================================================================= +# 来源: Agent-Hub/task/harbor/proxy-sidecar.sh(177 行完整脚本,未做任何内容改动) +# 用途: ROCK ComposeJobConfig 端到端用例 — cc-proxy sidecar +# +# 监听端口: 8082(见 PORT=8082,第 134 行) +# ComposeJobConfig 中声明: sidecars[].health.port = 8082 +# 主容器通过 http://${DOCKER_GATEWAY}:8082 访问(动态获取 docker bridge gateway IP) +# ============================================================================= +# harbor-v2/proxy-sidecar.sh — Claude-code proxy sidecar. +# Self-detects if proxy is needed; sleeps if not. + + +# Mounted volumes — read AP-injected env vars, with local-docker fallback. +# DO NOT override OUTPUT_DIR with a hardcoded path; that breaks the AP uploader. +OUTPUT_DIR="${OUTPUT_DIR:-/tmp/output}" +SHARED_DIR="${SHARED_DIR:-/tmp/shared}" +export OUTPUT_DIR SHARED_DIR + +MODEL_API_KEY="${MODEL_API_KEY:-$API_KEY}" +MODEL_BASE_URL="${MODEL_BASE_URL:-$BASE_URL}" + +# ── Determine if proxy is needed ── +AGENT_LOWER=$(echo "${HARBOR_AGENT}" | tr '[:upper:]' '[:lower:]') +PROVIDER_LOWER=$(echo "${PROVIDER:-}" | tr '[:upper:]' '[:lower:]') +URL_LOWER=$(echo "${MODEL_BASE_URL:-}" | tr '[:upper:]' '[:lower:]') + +use_claude_proxy=false +use_opencode_proxy=false +use_openclaw_proxy=false + +if [ "$AGENT_LOWER" = "claude-code" ]; then + if [ "${FORCE_PROXY}" = "true" ]; then + use_claude_proxy=true + elif [ "$PROVIDER_LOWER" != "anthropic" ] && [[ "$URL_LOWER" != *"anthropic"* ]]; then + use_claude_proxy=true + fi +fi + +if [ "$AGENT_LOWER" = "opencode" ] && [ "${FORCE_PROXY}" = "true" ] && [ "$PROVIDER_LOWER" = "anthropic" ]; then + use_opencode_proxy=true +fi + +# Kilo-code uses the same proxy mechanism as opencode +use_kilocode_proxy=false +if [ "$AGENT_LOWER" = "kilo-code" ] && [ "${FORCE_PROXY}" = "true" ] && [ "$PROVIDER_LOWER" = "anthropic" ]; then + use_kilocode_proxy=true +fi + +if [ "$AGENT_LOWER" = "openclaw" ]; then + if [ "${FORCE_PROXY}" = "true" ]; then + use_openclaw_proxy=true + elif [ "$PROVIDER_LOWER" != "anthropic" ] && [[ "$URL_LOWER" != *"anthropic"* ]]; then + use_openclaw_proxy=true + fi +fi + +echo "AGENT_LOWER: $AGENT_LOWER" +echo "FORCE_PROXY: ${FORCE_PROXY}" +echo "PROVIDER_LOWER: $PROVIDER_LOWER" +echo "use_claude_proxy: $use_claude_proxy" +echo "use_opencode_proxy: $use_opencode_proxy" +echo "use_kilocode_proxy: $use_kilocode_proxy" +echo "use_openclaw_proxy: $use_openclaw_proxy" + +if [ "$use_claude_proxy" = "false" ] && [ "$use_opencode_proxy" = "false" ] && [ "$use_kilocode_proxy" = "false" ] && [ "$use_openclaw_proxy" = "false" ]; then + echo "Proxy not needed for agent=${HARBOR_AGENT}, sleeping..." + exec sleep infinity +fi + +echo "Starting claude-code proxy for agent=${HARBOR_AGENT}..." + +# ── Map Agent-Hub env vars to proxy env vars ── +OPENAI_API_KEY="${MODEL_API_KEY}" +OPENAI_BASE_URL="${MODEL_BASE_URL}" +OPENAI_MODEL="${MODEL}" + +# Native Anthropic mode +USE_NATIVE_ANTHROPIC="false" +ANTHROPIC_NATIVE_API_KEY="" +ANTHROPIC_NATIVE_BASE_URL="" + +if [ "$use_claude_proxy" = "true" ] && [ "${FORCE_PROXY}" = "true" ]; then + if [ "${NATIVE_ANTHROPIC}" = "true" ] || [[ "$PROVIDER_LOWER" == *"anthropic"* ]]; then + USE_NATIVE_ANTHROPIC="true" + ANTHROPIC_NATIVE_API_KEY="${MODEL_API_KEY}" + ANTHROPIC_NATIVE_BASE_URL="${MODEL_BASE_URL}" + fi +elif [ "$use_opencode_proxy" = "true" ] || [ "$use_kilocode_proxy" = "true" ]; then + USE_NATIVE_ANTHROPIC="true" + ANTHROPIC_NATIVE_API_KEY="${MODEL_API_KEY}" + ANTHROPIC_NATIVE_BASE_URL="${MODEL_BASE_URL}" +elif [ "$use_openclaw_proxy" = "true" ]; then + if [ "${NATIVE_ANTHROPIC}" = "true" ] || [[ "$PROVIDER_LOWER" == *"anthropic"* ]]; then + USE_NATIVE_ANTHROPIC="true" + ANTHROPIC_NATIVE_API_KEY="${MODEL_API_KEY}" + ANTHROPIC_NATIVE_BASE_URL="${MODEL_BASE_URL}" + fi +fi + +# Force thinking for opencode + anthropic +THINKING_FORCE_ENABLED="false" +if [ "$use_opencode_proxy" = "true" ] || [ "$use_kilocode_proxy" = "true" ] || [ "$use_openclaw_proxy" = "true" ]; then + THINKING_FORCE_ENABLED="true" +fi + +# Thinking parameters +THINKING_ENABLED="${INTERLEAVED_THINKING:-false}" +THINKING_MODE="${THINKING_TYPE:-enabled}" +THINKING_BUDGET_TOKENS="${REASONING_BUDGET_TOKENS:-63000}" + +ADD_EFFORT_PARAM="false" +EFFORT_TO_SET="high" +if [ -n "${REASONING_EFFORT:-}" ]; then + ADD_EFFORT_PARAM="true" + EFFORT_TO_SET="${REASONING_EFFORT}" +fi + +ADD_1M_CONTEXT="${CONTEXT_1M:-false}" +USE_SYSTEM_HINT="${USE_HACK:-false}" +MAX_TOKENS_LIMIT="${MAX_TOKENS:-18000}" +MIN_TOKENS_LIMIT="${MAX_TOKENS:-18000}" + +# ── Write proxy .env ── +cd /tmp/claude-code-proxy/ +cat < .env +OPENAI_API_KEY=${OPENAI_API_KEY} +OPENAI_BASE_URL=${OPENAI_BASE_URL} +BIG_MODEL=${OPENAI_MODEL} +MIDDLE_MODEL=${MIDDLE_MODEL:-${OPENAI_MODEL}} +MIDDLE_OPENAI_API_KEY=${MIDDLE_OPENAI_API_KEY:-${OPENAI_API_KEY}} +MIDDLE_OPENAI_BASE_URL=${MIDDLE_OPENAI_BASE_URL:-${OPENAI_BASE_URL}} +SMALL_MODEL=${SMALL_MODEL:-${OPENAI_MODEL}} +SMALL_OPENAI_API_KEY=${SMALL_OPENAI_API_KEY:-${OPENAI_API_KEY}} +SMALL_OPENAI_BASE_URL=${SMALL_OPENAI_BASE_URL:-${OPENAI_BASE_URL}} +FORCE_OPENAI_NON_STREAMING=true +NON_STREAMING_RETRY_ENABLED=true +NON_STREAMING_RETRY_ATTEMPTS=10 +NON_STREAMING_RETRY_DELAY=10 +NON_STREAMING_RETRY_BACKOFF=2.0 +HOST=0.0.0.0 +PORT=8082 +LOG_LEVEL=INFO +REQUEST_LOGGING_ENABLED=false +RESPONSE_LOGGING_ENABLED=false +REQUEST_LOG_LEVEL=INFO +PAIR_LOGGING_ENABLED=true +JSON_LOG_ENABLED=true +JSON_LOG_DIR=logs +MAX_TOKENS_LIMIT=${MAX_TOKENS_LIMIT} +MIN_TOKENS_LIMIT=${MIN_TOKENS_LIMIT} +REQUEST_TIMEOUT=${REQUEST_TIMEOUT:-3600} +MAX_RETRIES=10 +TEMPERATURE=${TEMPERATURE} +ANTHROPIC_NATIVE_API_KEY=${ANTHROPIC_NATIVE_API_KEY:-} +ANTHROPIC_NATIVE_BASE_URL=${ANTHROPIC_NATIVE_BASE_URL:-} +USE_NATIVE_ANTHROPIC=${USE_NATIVE_ANTHROPIC:-false} +THINKING_ENABLED=${THINKING_ENABLED:-false} +THINKING_FORCE_ENABLED=${THINKING_FORCE_ENABLED:-false} +THINKING_MODE=${THINKING_MODE:-enabled} +THINKING_BUDGET_TOKENS=${THINKING_BUDGET_TOKENS:-63000} +ADD_EFFORT_PARAM=${ADD_EFFORT_PARAM:-false} +EFFORT_TO_SET=${EFFORT_TO_SET:-high} +ADD_1M_CONTEXT=${ADD_1M_CONTEXT:-false} +USE_SYSTEM_HINT=${USE_SYSTEM_HINT:-false} +REASONING_SPLIT=${REASONING_SPLIT:-false} +TOP_P=${TOP_P:-none} +EOF + +# Append custom proxy env vars (JSON) +if [ -n "${PROXY_ENVS:-}" ]; then + echo "$PROXY_ENVS" | python3 -c " +import sys, json +d = json.load(sys.stdin) +for k, v in d.items(): + print(f'{k}={v}') +" >> .env 2>/dev/null || true +fi + +mkdir -p ${OUTPUT_DIR}/claudecode_logs +# create a softlink for cc logs sync +ln -s ${OUTPUT_DIR}/claudecode_logs /tmp/claude-code-proxy/logs +# validate: +ls -la /tmp/claude-code-proxy/ +python /tmp/claude-code-proxy/start_proxy.py 2>&1 | tee ${OUTPUT_DIR}/claudecode_logs/proxy_stdout.log diff --git a/rock/sdk/job/__init__.py b/rock/sdk/job/__init__.py index dbc087b1a4..4aeb1e59f2 100644 --- a/rock/sdk/job/__init__.py +++ b/rock/sdk/job/__init__.py @@ -1,8 +1,11 @@ # Auto-register BashTrial (safe: no bench dependency). # HarborTrial is registered by rock.sdk.bench.__init__ to avoid a circular # import when rock.sdk.job is triggered mid-bench-load. +# ComposeTrial is registered by rock.sdk.job.compose.trial (loaded by teammate B). import rock.sdk.job.trial.bash # noqa: F401 +import rock.sdk.job.compose.trial # noqa: F401 from rock.sdk.job.api import Job +from rock.sdk.job.compose.config import ComposeJobConfig from rock.sdk.job.config import BashJobConfig, JobConfig from rock.sdk.job.executor import JobClient, JobExecutor, TrialClient from rock.sdk.job.operator import Operator, ScatterOperator @@ -13,6 +16,7 @@ "Job", "JobConfig", "BashJobConfig", + "ComposeJobConfig", "JobResult", "JobStatus", "TrialResult", diff --git a/rock/sdk/job/compose/__init__.py b/rock/sdk/job/compose/__init__.py new file mode 100644 index 0000000000..5919724a49 --- /dev/null +++ b/rock/sdk/job/compose/__init__.py @@ -0,0 +1,5 @@ +"""Docker Compose multi-container job configuration.""" + +from rock.sdk.job.compose.config import ComposeJobConfig + +__all__ = ["ComposeJobConfig"] diff --git a/rock/sdk/job/compose/config.py b/rock/sdk/job/compose/config.py new file mode 100644 index 0000000000..5ef78d7a50 --- /dev/null +++ b/rock/sdk/job/compose/config.py @@ -0,0 +1,289 @@ +"""ComposeJobConfig — Docker Compose multi-container job configuration. + +Extends BashJobConfig with a ``compose`` block that describes the inner +DinD container orchestration (main + sidecars + init containers). + +Type detection signal: ``"compose" in yaml_data`` +""" + +from __future__ import annotations + +import re +from datetime import datetime + +import yaml +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +from rock.logger import init_logger +from rock.sdk.job.config import BashJobConfig + +logger = init_logger(__name__) + +# Regex for valid container names (used as docker --network-alias) +_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9-]*$") + + +# ── Sub-models: inner container specs ──────────────────────────────────────── + + +class ResourceSpec(BaseModel): + """Single inner-container resource declaration. + + request / limit dual-value (aligns with K8s requests/limits semantics). + In single-host docker mode: + cpus → --cpus (used as hard limit when cpu_limit absent) + cpu_limit → --cpus (hard upper bound, takes priority) + memory → --memory-reservation (soft limit) + memory_limit → --memory (hard upper bound) + Setting only cpus/memory is the common case (treated as hard limit). + """ + + model_config = ConfigDict(extra="forbid") + + cpus: float | None = None + memory: str | None = None + cpu_limit: float | None = None + memory_limit: str | None = None + + +class VolumeMount(BaseModel): + """Container volume mount. + + By default ``name`` refers to the shared named volume used for cross-container + (init → main) data passing, mounted at ``mount_path``. + + When ``host_path`` is set, the mount instead bind-mounts a real path from the + OUTER sandbox into the container at ``mount_path`` — e.g. to expose the outer + docker socket (``host_path: /var/run/docker.sock``) so the container reuses the + outer dockerd instead of starting its own (avoids the 3rd DinD layer). + """ + + model_config = ConfigDict(extra="forbid") + + name: str + mount_path: str + main_mount_path: str | None = None + host_path: str | None = None + read_only: bool = False + + +class SecretEnvEntry(BaseModel): + """Source declaration for a single secret environment variable (K8s Secret style).""" + + model_config = ConfigDict(extra="forbid") + + secret_name: str + secret_key: str + + +class OssDep(BaseModel): + """A single dependency to download from OSS before running.""" + + model_config = ConfigDict(extra="forbid") + + key: str + target_path: str + extract: bool = False + + +class HealthSpec(BaseModel): + """Sidecar readiness probe (optional).""" + + model_config = ConfigDict(extra="forbid") + + port: int + timeout_sec: int = 60 + + +class _ContainerBase(BaseModel): + """Common fields for init and sidecar containers. + + Entry-point (choose at most one): + - script Inline shell (runner writes to sandbox then runs with ``bash``) + - script_path Path inside sandbox (``bash ``) + - command/args Override image ENTRYPOINT (not via bash), e.g. for running + stock images like dockerd: command=["dockerd"], args=["--tls=false"] + All three absent → use image's own ENTRYPOINT/CMD. + """ + + model_config = ConfigDict(extra="forbid") + + name: str + image: str + script: str | None = None + script_path: str | None = None + command: list[str] | None = None + args: list[str] | None = None + env: dict[str, str] = Field(default_factory=dict) + secret_env: dict[str, SecretEnvEntry] = Field(default_factory=dict) + resources: ResourceSpec | None = None + privileged: bool = False + + @field_validator("name") + @classmethod + def _validate_name(cls, v: str) -> str: + if not _NAME_RE.match(v): + raise ValueError( + f"Container name '{v}' is invalid. Must match ^[a-z0-9][a-z0-9-]*$ (used as docker --network-alias)." + ) + return v + + @model_validator(mode="after") + def _entrypoint_exclusive(self) -> _ContainerBase: + modes = [bool(self.script), bool(self.script_path), bool(self.command)] + if sum(modes) > 1: + raise ValueError( + f"container '{self.name}': script / script_path / command are mutually exclusive — use at most one" + ) + if self.args and not self.command: + raise ValueError(f"container '{self.name}': args must be used together with command") + return self + + +class InitContainerSpec(_ContainerBase): + """Init container: runs serially before the main container starts.""" + + volume_mounts: list[VolumeMount] = Field(default_factory=list) + + +class SidecarSpec(_ContainerBase): + """Sidecar container: runs in parallel with main; name becomes docker network-alias.""" + + health: HealthSpec | None = None + volume_mounts: list[VolumeMount] = Field(default_factory=list) + + +class MainContainerSpec(BaseModel): + """Main container spec. Entry-point script is provided by ComposeJobConfig top-level script/script_path.""" + + model_config = ConfigDict(extra="forbid") + + image: str + resources: ResourceSpec | None = None + env: dict[str, str] = Field(default_factory=dict) + secret_env: dict[str, SecretEnvEntry] = Field(default_factory=dict) + oss_deps: list[OssDep] = Field(default_factory=list) + volume_mounts: list[VolumeMount] = Field(default_factory=list) + privileged: bool = False + + +class ComposeSpec(BaseModel): + """Top-level compose block: inner docker orchestration inside DinD.""" + + model_config = ConfigDict(extra="forbid") + + main: MainContainerSpec + init_containers: list[InitContainerSpec] = Field(default_factory=list) + sidecars: list[SidecarSpec] = Field(default_factory=list) + + @model_validator(mode="after") + def _unique_names(self) -> ComposeSpec: + names = [c.name for c in self.init_containers] + [s.name for s in self.sidecars] + if len(names) != len(set(names)): + raise ValueError("compose: init_containers / sidecars names must be globally unique") + return self + + +# ── ComposeJobConfig ────────────────────────────────────────────────────────── + + +class ComposeJobConfig(BashJobConfig): + """Docker Compose multi-container Job configuration. + + Inherits from BashJobConfig: + - script / script_path at the top level describe the main container entry-point + - environment describes the outer DinD sandbox + + Adds a top-level ``compose`` block describing the inner container orchestration. + Type-detection signal: presence of ``compose`` key in YAML data. + """ + + model_config = ConfigDict(extra="forbid") + + job_name: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d__%H-%M-%S")) + + compose: ComposeSpec # required; its presence identifies ComposeJobConfig + + @model_validator(mode="after") + def _proxy_conflict_check(self) -> ComposeJobConfig: + """Disallow environment.proxy together with a sidecar named 'proxy' (double-proxy).""" + if self.environment.proxy and self.environment.proxy.enabled: + if any(s.name == "proxy" for s in self.compose.sidecars): + raise ValueError( + "environment.proxy and a sidecar named 'proxy' cannot both be enabled. " + "Choose one: use the proxy sidecar container, or use the sandbox model-service." + ) + return self + + @model_validator(mode="after") + def _resource_budget_check(self) -> ComposeJobConfig: + """Warn (not fail) when inner container resources exceed the outer sandbox budget.""" + try: + self._check_resource_budget() + except Exception: + pass # never fail validation due to budget check errors + return self + + def _check_resource_budget(self) -> None: + """Internal helper: accumulate inner cpus/memory and warn if they exceed outer sandbox.""" + + outer_cpus: float | None = getattr(self.environment, "cpus", None) + outer_memory_str: str | None = getattr(self.environment, "memory", None) + + def parse_memory_gb(s: str | None) -> float | None: + if s is None: + return None + s = s.strip().lower() + if s.endswith("gi"): + return float(s[:-2]) + if s.endswith("g"): + return float(s[:-1]) + if s.endswith("mi"): + return float(s[:-2]) / 1024 + if s.endswith("m"): + return float(s[:-1]) / 1024 + return None + + def container_cpus(r: ResourceSpec | None) -> float: + if r is None: + return 0.0 + return r.cpu_limit or r.cpus or 0.0 + + def container_mem(r: ResourceSpec | None) -> float: + if r is None: + return 0.0 + return parse_memory_gb(r.memory_limit) or parse_memory_gb(r.memory) or 0.0 + + all_specs: list[ResourceSpec | None] = ( + [self.compose.main.resources] + + [c.resources for c in self.compose.init_containers] + + [s.resources for s in self.compose.sidecars] + ) + + total_cpus = sum(container_cpus(r) for r in all_specs) + total_mem = sum(container_mem(r) for r in all_specs) + + if outer_cpus is not None and total_cpus > 0 and total_cpus > outer_cpus: + logger.warning( + "ComposeJobConfig resource budget: inner containers total cpus=%.1f " + "exceeds outer sandbox cpus=%.1f — may cause OOM or throttling.", + total_cpus, + outer_cpus, + ) + + if outer_memory_str is not None and total_mem > 0: + outer_mem = parse_memory_gb(outer_memory_str) + if outer_mem is not None and total_mem > outer_mem: + logger.warning( + "ComposeJobConfig resource budget: inner containers total memory=%.1fGi " + "exceeds outer sandbox memory=%.1fGi — may cause OOM.", + total_mem, + outer_mem, + ) + + @classmethod + def from_yaml(cls, path: str) -> ComposeJobConfig: + """Load a ComposeJobConfig from YAML file.""" + with open(path) as f: + data = yaml.safe_load(f) + return cls.model_validate(data) diff --git a/rock/sdk/job/compose/trial.py b/rock/sdk/job/compose/trial.py new file mode 100644 index 0000000000..00b07c6ef3 --- /dev/null +++ b/rock/sdk/job/compose/trial.py @@ -0,0 +1,568 @@ +"""ComposeTrial — Docker Compose multi-container job trial inside a DinD sandbox. + +The outer sandbox is a Docker-in-Docker (DinD) environment. This trial +generates a ``runner.sh`` that orchestrates inner containers (init → sidecars → +main) entirely through the ``docker`` CLI available in the outer sandbox. +""" + +from __future__ import annotations + +import os +import shlex +from typing import TYPE_CHECKING + +from rock.logger import init_logger +from rock.sdk.job.compose.config import ( + ComposeJobConfig, + InitContainerSpec, + MainContainerSpec, + OssDep, + ResourceSpec, + SecretEnvEntry, + SidecarSpec, +) +from rock.sdk.job.result import ExceptionInfo, TrialResult +from rock.sdk.job.trial.abstract import AbstractTrial +from rock.sdk.job.trial.registry import register_trial + +if TYPE_CHECKING: + from rock.sdk.sandbox.client import Sandbox + +logger = init_logger(__name__) + +_OSS_CREDENTIAL_FIELDS = ( + "oss_access_key_id", + "oss_access_key_secret", + "oss_endpoint", + "oss_region", + "oss_bucket", +) + +# ── runner.sh skeleton ──────────────────────────────────────────────────────── +# Placeholders use __UPPER__ style to avoid collision with bash ${var} syntax. +# NEVER use str.format() on this template — it contains {} in bash constructs. + +_RUNNER_SKELETON = r"""#!/bin/bash +set +e + +RUNNER_EXIT=0 + +# ──────────────────────────────────────────────────────────────────────────── +# cleanup_all — invoked by trap EXIT +# ──────────────────────────────────────────────────────────────────────────── +cleanup_all() { + echo "[rock-compose] Cleaning up containers / network / volume ..." + docker rm -f rock-main-$$ 2>/dev/null || true +__SIDECAR_CLEANUP__ + docker network rm rock_compose_$$ 2>/dev/null || true + docker volume rm rock_shared_$$ 2>/dev/null || true +} + +trap cleanup_all EXIT + +# ──────────────────────────────────────────────────────────────────────────── +# P0 — start dockerd (ROCK kata sandbox does NOT auto-start it), then wait ready +# ──────────────────────────────────────────────────────────────────────────── +# NOTE: in a ROCK kata DinD sandbox dockerd is NOT running on entry. We must +# start it ourselves. Gotchas learned from real runs on the kata backend: +# 1. nohup'd dockerd does not inherit the interactive shell PATH, so it fails +# with "containerd executable file not found" — we export PATH explicitly. +# 2. the kata guest lacks /proc/sys/net/bridge/bridge-nf-call-iptables, so +# dockerd's default bridge network init fails unless we set +# DOCKER_IGNORE_BR_NETFILTER_ERROR=1. +echo "[rock-compose] P0: starting dockerd ..." +if ! docker info >/dev/null 2>&1; then + if ! pgrep -x dockerd >/dev/null 2>&1; then + PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin \ + DOCKER_IGNORE_BR_NETFILTER_ERROR=1 \ + nohup dockerd >/var/log/dockerd.log 2>&1 & + fi +fi +for i in $(seq 1 60); do + if docker info >/dev/null 2>&1; then + echo "[rock-compose] dockerd is ready" + break + fi + sleep 2 + if [ "$i" -eq 60 ]; then + echo "[rock-compose] ERROR: dockerd not ready after 120s" + echo "[rock-compose] --- dockerd.log tail ---" + tail -20 /var/log/dockerd.log 2>/dev/null || true + exit 1 + fi +done + +docker network create rock_compose_$$ 2>/dev/null || true +docker volume create rock_shared_$$ 2>/dev/null || true + +__REGISTRY_LOGIN__ + +mkdir -p /rock/logs + +# ──────────────────────────────────────────────────────────────────────────── +# P1 — OSS dependency download (conditional) +# ──────────────────────────────────────────────────────────────────────────── +__OSS_DEPS__ + +# ──────────────────────────────────────────────────────────────────────────── +# P2 — init containers (serial) +# ──────────────────────────────────────────────────────────────────────────── +echo "[rock-compose] P2: starting init containers ..." +__INIT_CONTAINERS__ + +# ──────────────────────────────────────────────────────────────────────────── +# P3 — sidecar containers (parallel, detached) +# ──────────────────────────────────────────────────────────────────────────── +echo "[rock-compose] P3: starting sidecar containers ..." +__SIDECAR_CONTAINERS__ + +# ──────────────────────────────────────────────────────────────────────────── +# P4 — health probes for sidecars that declare health +# ──────────────────────────────────────────────────────────────────────────── +echo "[rock-compose] P4: health probes ..." +__HEALTH_PROBES__ + +# ──────────────────────────────────────────────────────────────────────────── +# P5 — main container (foreground) +# ──────────────────────────────────────────────────────────────────────────── +echo "[rock-compose] P5: starting main container ..." +__MAIN_CONTAINER__ +RUNNER_EXIT=${PIPESTATUS[0]} +echo "[rock-compose] main exited with code $RUNNER_EXIT" + +# ──────────────────────────────────────────────────────────────────────────── +# P6 — collect sidecar logs + optional OSS mirror upload +# ──────────────────────────────────────────────────────────────────────────── +echo "[rock-compose] P6: collecting logs ..." +__COLLECT_SIDECAR_LOGS__ +__OSS_MIRROR_UPLOAD__ + +# ──────────────────────────────────────────────────────────────────────────── +# P7 — explicit exit with main's exit code (cleanup via trap) +# ──────────────────────────────────────────────────────────────────────────── +exit $RUNNER_EXIT +""" + + +# ── helpers ─────────────────────────────────────────────────────────────────── + + +def _resource_args(res: ResourceSpec | None) -> list[str]: + """Convert ResourceSpec to docker run resource flag strings.""" + args: list[str] = [] + if res is None: + return args + cpu = res.cpu_limit if res.cpu_limit is not None else res.cpus + if cpu is not None: + args.append(f"--cpus {cpu}") + if res.memory is not None: + args.append(f"--memory-reservation {shlex.quote(res.memory)}") + mem_limit = res.memory_limit if res.memory_limit is not None else None + if mem_limit is not None: + args.append(f"--memory {shlex.quote(mem_limit)}") + return args + + +def _volume_args(volume_mounts) -> list[str]: + """Render -v flags for volume_mounts. + + host_path set → bind-mount real outer path; otherwise use the shared named volume. + """ + args: list[str] = [] + for vm in volume_mounts: + suffix = ":ro" if vm.read_only else "" + if vm.host_path: + args.append(f"-v {shlex.quote(vm.host_path)}:{shlex.quote(vm.mount_path)}{suffix}") + else: + args.append(f"-v rock_shared_$$:{shlex.quote(vm.mount_path)}{suffix}") + return args + + +def _env_args(env: dict[str, str], secret_env: dict[str, SecretEnvEntry]) -> list[str]: + """Build -e flags for plain env and secret_env (shell variable references).""" + args: list[str] = [] + for k, v in env.items(): + args.append(f"-e {shlex.quote(k)}={shlex.quote(v)}") + for k in secret_env: + # Render secret as a shell variable reference — value is never embedded literally. + args.append(f'-e {shlex.quote(k)}="${{{k}}}"') + return args + + +def _entrypoint_args(spec) -> tuple[list[str], str]: + """Return (flag_args, positional_cmd_str) for a container spec. + + Returns: + flag_args — e.g. ["--entrypoint bash"] or [] + positional — positional command string after the image, e.g. "bash /rock/scripts/name.sh" + """ + flag_args: list[str] = [] + positional = "" + + if spec.command: + # Override entrypoint with command[0]; remaining command args + spec.args → positional + flag_args.append(f"--entrypoint {shlex.quote(spec.command[0])}") + remainder = list(spec.command[1:]) + (spec.args or []) + if remainder: + positional = " ".join(shlex.quote(a) for a in remainder) + elif spec.script_path: + positional = f"bash {shlex.quote(spec.script_path)}" + elif spec.script: + # Inline script was written to /rock/scripts/.sh during setup + positional = f"bash /rock/scripts/{spec.name}.sh" + # else: use image's own ENTRYPOINT/CMD — no flags or positional needed + + return flag_args, positional + + +def _render_oss_deps(oss_deps: list[OssDep]) -> str: + """Render P1 OSS dependency download block.""" + if not oss_deps: + return "# (no oss_deps)" + lines = ['echo "[rock-compose] P1: downloading OSS dependencies ..."'] + for dep in oss_deps: + key_q = shlex.quote(dep.key) + target_q = shlex.quote(dep.target_path) + if dep.extract: + lines.append( + f"ossutil cp {key_q} /tmp/_rock_dep_archive && " + f"mkdir -p {target_q} && " + f"tar -xf /tmp/_rock_dep_archive -C {target_q}" + ) + else: + lines.append(f"ossutil cp {key_q} {target_q}") + return "\n".join(lines) + + +def _render_init_containers(init_containers: list[InitContainerSpec]) -> str: + """Render P2 init container serial execution block.""" + if not init_containers: + return "# (no init containers)" + lines = [] + for ic in init_containers: + run_parts = [ + "docker run --rm", + "--network rock_compose_$$", + f"--network-alias {shlex.quote(ic.name)}", + "-v rock_shared_$$:/rock/shared", + "-v /rock/scripts:/rock/scripts:ro", + ] + run_parts.extend(_resource_args(ic.resources)) + if ic.privileged: + run_parts.append("--privileged") + run_parts.extend(_env_args(ic.env, ic.secret_env)) + run_parts.extend(_volume_args(ic.volume_mounts)) + flag_a, pos = _entrypoint_args(ic) + run_parts.extend(flag_a) + run_parts.append(shlex.quote(ic.image)) + if pos: + run_parts.append(pos) + + cmd = " \\\n ".join(run_parts) + lines.append(f'echo "[rock-compose] init: {ic.name}"') + lines.append(cmd) + lines.append(f'if [ $? -ne 0 ]; then echo "[rock-compose] init container {ic.name} failed"; exit 1; fi') + return "\n".join(lines) + + +def _render_sidecar_containers(sidecars: list[SidecarSpec]) -> str: + """Render P3 sidecar container launch block (detached).""" + if not sidecars: + return "# (no sidecars)" + lines = [] + for sc in sidecars: + run_parts = [ + "docker run -d", + f"--name rock-sidecar-{sc.name}-$$", + "--network rock_compose_$$", + f"--network-alias {shlex.quote(sc.name)}", + "-v rock_shared_$$:/rock/shared", + "-v /rock/scripts:/rock/scripts:ro", + ] + run_parts.extend(_resource_args(sc.resources)) + if sc.privileged: + run_parts.append("--privileged") + run_parts.extend(_env_args(sc.env, sc.secret_env)) + run_parts.extend(_volume_args(sc.volume_mounts)) + flag_a, pos = _entrypoint_args(sc) + run_parts.extend(flag_a) + run_parts.append(shlex.quote(sc.image)) + if pos: + run_parts.append(pos) + + cmd = " \\\n ".join(run_parts) + lines.append(f'echo "[rock-compose] sidecar: {sc.name}"') + lines.append(cmd) + return "\n".join(lines) + + +def _render_health_probes(sidecars: list[SidecarSpec]) -> str: + """Render P4 health probe block for sidecars that declare health.""" + health_sidecars = [sc for sc in sidecars if sc.health is not None] + if not health_sidecars: + return "# (no health probes)" + lines = [] + for sc in health_sidecars: + h = sc.health + if h is None: + continue # type narrowing guard (already filtered above) + timeout = h.timeout_sec + port = h.port + lines.append(f'echo "[rock-compose] health probe: {sc.name}:{port} (timeout {timeout}s)"') + lines.append("_rock_health_ok=0") + lines.append(f"for _i in $(seq 1 {timeout}); do") + lines.append( + f" if docker run --rm --network rock_compose_$$ busybox " + f"nc -z {shlex.quote(sc.name)} {port} 2>/dev/null; then" + ) + lines.append(f' echo "[rock-compose] sidecar {sc.name} is ready"; _rock_health_ok=1; break; fi') + lines.append(" sleep 1") + lines.append("done") + lines.append( + f'if [ "$_rock_health_ok" -eq 0 ]; then echo "[rock-compose] ERROR: {sc.name} not ready after {timeout}s"; exit 1; fi' + ) + return "\n".join(lines) + + +def _render_main_container(main: MainContainerSpec) -> str: + """Render P5 main container execution (foreground, tee logs).""" + run_parts = [ + "docker run --name rock-main-$$", + "--network rock_compose_$$", + "--network-alias main", + "-v rock_shared_$$:/rock/shared", + # Mount the outer-sandbox scripts dir so the container can run main.sh. + # /rock/scripts lives in the OUTER sandbox; inner containers need it bind-mounted. + "-v /rock/scripts:/rock/scripts:ro", + ] + run_parts.extend(_resource_args(main.resources)) + if main.privileged: + run_parts.append("--privileged") + run_parts.extend(_env_args(main.env, main.secret_env)) + run_parts.extend(_volume_args(main.volume_mounts)) + run_parts.append(shlex.quote(main.image)) + # Main entrypoint is always bash /rock/scripts/main.sh (script/script_path uploaded there) + run_parts.append("bash /rock/scripts/main.sh") + + cmd = " \\\n ".join(run_parts) + return f"{cmd} 2>&1 | tee /rock/logs/main.log" + + +def _render_collect_sidecar_logs(sidecars: list[SidecarSpec]) -> str: + """Render P6 sidecar log collection and stop.""" + if not sidecars: + return "# (no sidecars to collect)" + lines = [] + for sc in sidecars: + lines.append(f"docker logs rock-sidecar-{sc.name}-$$ > /rock/logs/{sc.name}.log 2>&1 || true") + lines.append(f"docker stop rock-sidecar-{sc.name}-$$ 2>/dev/null || true") + return "\n".join(lines) + + +def _render_sidecar_cleanup(sidecars: list[SidecarSpec]) -> str: + """Render per-sidecar docker rm -f lines for cleanup_all.""" + if not sidecars: + return "" + lines = [f" docker rm -f rock-sidecar-{sc.name}-$$ 2>/dev/null || true" for sc in sidecars] + return "\n".join(lines) + + +def _render_oss_mirror_upload(config: ComposeJobConfig) -> str: + """Render P6 OSS mirror upload block (conditional).""" + mirror = config.environment.oss_mirror + if mirror is None or not mirror.enabled: + return "# (no oss mirror upload)" + return ( + 'echo "[rock-compose] uploading artifacts to OSS ..."\n' + 'ossutil cp /rock/logs/ "oss://$OSS_BUCKET/$ROCK_OSS_PREFIX/" \\\n' + " --recursive -f \\\n" + ' || echo "[rock-compose] oss upload failed (rc=$?), ignored" >&2' + ) + + +def _render_registry_login(config: ComposeJobConfig) -> str: + """Render optional docker login using registry credentials from env.""" + env = config.environment.env + registry = env.get("ROCK_REGISTRY_HOST", "") + if not registry: + return "# (no registry login)" + return ( + f'echo "[rock-compose] logging in to registry {registry} ..."\n' + f"docker login {shlex.quote(registry)} \\\n" + f' -u "$ROCK_REGISTRY_USER" \\\n' + f' -p "$ROCK_REGISTRY_PASSWORD" || true' + ) + + +# ── ComposeTrial ────────────────────────────────────────────────────────────── + + +class ComposeTrial(AbstractTrial): + """Docker Compose multi-container trial. + + Manages inner container orchestration inside a DinD outer sandbox via + a generated ``runner.sh`` script. + """ + + _config: ComposeJobConfig + + def __init__(self, config: ComposeJobConfig): + super().__init__(config) + self._ossutil_ready: bool = False + + def _oss_mirror_enabled(self) -> bool: + mirror = self._config.environment.oss_mirror + return mirror is not None and mirror.enabled + + def _prepare_oss_session_env(self) -> None: + """Resolve OSS credentials and inject ROCK_* keys into environment.env. + + Follows the same resolution order as BashTrial: + 1. OssMirrorConfig field + 2. environment.env + 3. host os.environ + """ + mirror = self._config.environment.oss_mirror + env = self._config.environment.env + + for field_name in _OSS_CREDENTIAL_FIELDS: + env_key = field_name.upper() + v = getattr(mirror, field_name, None) or env.get(env_key) or os.environ.get(env_key) + if v: + env[env_key] = v + + if not self._config.namespace: + raise ValueError("oss_mirror: namespace is not set (sandbox did not return one)") + if not self._config.experiment_id: + raise ValueError("oss_mirror: experiment_id is not set (sandbox did not return one)") + for env_key in ("OSS_BUCKET", "OSS_ENDPOINT", "OSS_REGION"): + if not env.get(env_key): + raise ValueError(f"oss_mirror.enabled=True but {env_key} is not resolvable") + + from rock import env_vars + + env["ROCK_ARTIFACT_DIR"] = env_vars.ROCK_BASH_JOB_ARTIFACT_DIR + env["ROCK_OSS_PREFIX"] = ( + f"artifacts/{self._config.namespace}/{self._config.experiment_id}/{self._config.job_name}" + ) + + async def on_sandbox_ready(self, sandbox: Sandbox) -> None: + """Backfill namespace/experiment_id then prepare OSS session env.""" + await super().on_sandbox_ready(sandbox) + if self._oss_mirror_enabled(): + self._prepare_oss_session_env() + + async def setup(self, sandbox: Sandbox) -> None: + """Upload files, write inline container scripts, and render runner.sh. + + Deliberately does NOT call super().setup() to skip _setup_proxy — + DinD compose jobs manage their own networking and proxy sidecar. + However, we call _upload_files() directly to handle environment.uploads. + """ + # Upload user-specified files (skip _setup_proxy) + await self._upload_files(sandbox) + + compose = self._config.compose + + # Write inline scripts for init/sidecar containers that have script= set + all_containers: list = list(compose.init_containers) + list(compose.sidecars) + for ctr in all_containers: + if ctr.script: + await sandbox.write_file_by_path(ctr.script, f"/rock/scripts/{ctr.name}.sh") + + # Write main container script (from top-level script / script_path) + main_script = self._config.script or "" + if self._config.script_path: + from pathlib import Path + + main_script = Path(self._config.script_path).read_text() + await sandbox.write_file_by_path(main_script, "/rock/scripts/main.sh") + + # Ensure ossutil available if any oss_deps declared + if compose.main.oss_deps: + self._ossutil_ready = await sandbox.fs.ensure_ossutil() + if not self._ossutil_ready: + logger.warning("ossutil install failed — OSS deps download may fail") + + # Render and write runner.sh + runner_content = self._render_runner_sh() + await sandbox.write_file_by_path(runner_content, "/rock/runner.sh") + + def build(self) -> str: + return "bash /rock/runner.sh" + + async def collect(self, sandbox: Sandbox, output: str, exit_code: int) -> TrialResult: + """Collect result: on failure, capture container logs for diagnostics.""" + exception_info = None + if exit_code != 0: + exception_info = ExceptionInfo( + exception_type="ComposeMainContainerFailed", + exception_message=f"Compose main container exited with code {exit_code}", + ) + + compose = self._config.compose + + # Collect main container log + main_log_obs = await sandbox.arun("cat /rock/logs/main.log 2>/dev/null || true") + if main_log_obs.output: + logger.info("[rock-compose] main log:\n%s", main_log_obs.output) + + # Collect sidecar logs + for sc in compose.sidecars: + sc_log_obs = await sandbox.arun(f"cat /rock/logs/{sc.name}.log 2>/dev/null || true") + if sc_log_obs.output: + logger.info("[rock-compose] sidecar %s log:\n%s", sc.name, sc_log_obs.output) + + # Collect init container logs (best-effort, may not exist) + for ic in compose.init_containers: + ic_log_obs = await sandbox.arun(f"cat /rock/logs/{ic.name}.log 2>/dev/null || true") + if ic_log_obs.output: + logger.info("[rock-compose] init %s log:\n%s", ic.name, ic_log_obs.output) + + return TrialResult( + task_name=self._config.job_name or "", + exception_info=exception_info, + raw_output=output, + exit_code=exit_code, + ) + + def _render_runner_sh(self) -> str: + """Render the complete runner.sh from the compose config. + + Uses str.replace on __PLACEHOLDER__ tokens — never str.format() — + to safely handle bash ${var}, ${PIPESTATUS[0]}, and {} literals. + """ + compose = self._config.compose + + runner = _RUNNER_SKELETON + + # P0 registry login + runner = runner.replace("__REGISTRY_LOGIN__", _render_registry_login(self._config)) + + # P1 OSS deps (from main container spec) + runner = runner.replace("__OSS_DEPS__", _render_oss_deps(compose.main.oss_deps)) + + # P2 init containers + runner = runner.replace("__INIT_CONTAINERS__", _render_init_containers(compose.init_containers)) + + # P3 sidecars + runner = runner.replace("__SIDECAR_CONTAINERS__", _render_sidecar_containers(compose.sidecars)) + + # P4 health probes + runner = runner.replace("__HEALTH_PROBES__", _render_health_probes(compose.sidecars)) + + # P5 main container + runner = runner.replace("__MAIN_CONTAINER__", _render_main_container(compose.main)) + + # P6 collect sidecar logs + OSS mirror upload + runner = runner.replace("__COLLECT_SIDECAR_LOGS__", _render_collect_sidecar_logs(compose.sidecars)) + runner = runner.replace("__OSS_MIRROR_UPLOAD__", _render_oss_mirror_upload(self._config)) + + # cleanup_all sidecar removal + runner = runner.replace("__SIDECAR_CLEANUP__", _render_sidecar_cleanup(compose.sidecars)) + + return runner + + +# Auto-register on import +register_trial(ComposeJobConfig, ComposeTrial) diff --git a/rock/sdk/job/config.py b/rock/sdk/job/config.py index 8498a32bff..94545abc97 100644 --- a/rock/sdk/job/config.py +++ b/rock/sdk/job/config.py @@ -79,6 +79,7 @@ def from_yaml(cls, path: str) -> JobConfig: from rock.sdk.bench.models.job.config import HarborJobConfig harbor_error: ValidationError | None = None + compose_error: ValidationError | None = None bash_error: ValidationError | None = None try: @@ -86,6 +87,15 @@ def from_yaml(cls, path: str) -> JobConfig: except (ValidationError, ValueError) as exc: harbor_error = exc + # Compose: detected by the presence of a "compose" key in the YAML data. + if "compose" in data: + from rock.sdk.job.compose.config import ComposeJobConfig + + try: + return ComposeJobConfig.model_validate(data) + except (ValidationError, ValueError) as exc: + compose_error = exc + try: return BashJobConfig.model_validate(data) except (ValidationError, ValueError) as exc: @@ -93,8 +103,9 @@ def from_yaml(cls, path: str) -> JobConfig: raise ValueError( "YAML does not match any known job type.\n" - f" As HarborJobConfig: {harbor_error}\n" - f" As BashJobConfig: {bash_error}" + f" As HarborJobConfig: {harbor_error}\n" + f" As ComposeJobConfig: {compose_error}\n" + f" As BashJobConfig: {bash_error}" ) diff --git a/tests/unit/sdk/job/test_compose_config.py b/tests/unit/sdk/job/test_compose_config.py new file mode 100644 index 0000000000..345d5128f5 --- /dev/null +++ b/tests/unit/sdk/job/test_compose_config.py @@ -0,0 +1,623 @@ +"""Tests for rock.sdk.job.compose.config — ComposeJobConfig and sub-models.""" + +from __future__ import annotations + +import textwrap +from unittest.mock import patch + +import pytest + +from rock.sdk.job.compose.config import ( + ComposeJobConfig, + ComposeSpec, + HealthSpec, + InitContainerSpec, + MainContainerSpec, + OssDep, + ResourceSpec, + SecretEnvEntry, + SidecarSpec, + VolumeMount, +) +from rock.sdk.job.config import BashJobConfig, JobConfig + +# --------------------------------------------------------------------------- +# ResourceSpec +# --------------------------------------------------------------------------- + + +class TestResourceSpec: + def test_defaults(self): + r = ResourceSpec() + assert r.cpus is None + assert r.memory is None + assert r.cpu_limit is None + assert r.memory_limit is None + + def test_all_fields(self): + r = ResourceSpec(cpus=4.0, memory="12g", cpu_limit=8.0, memory_limit="24g") + assert r.cpus == 4.0 + assert r.memory == "12g" + assert r.cpu_limit == 8.0 + assert r.memory_limit == "24g" + + def test_extra_forbid(self): + with pytest.raises(Exception): + ResourceSpec(cpus=1, unknown_field="x") + + +# --------------------------------------------------------------------------- +# VolumeMount +# --------------------------------------------------------------------------- + + +class TestVolumeMount: + def test_defaults(self): + v = VolumeMount(name="vol", mount_path="/data") + assert v.name == "vol" + assert v.mount_path == "/data" + assert v.main_mount_path is None + + def test_all_fields(self): + v = VolumeMount(name="vol", mount_path="/data", main_mount_path="/main-data") + assert v.main_mount_path == "/main-data" + + def test_extra_forbid(self): + with pytest.raises(Exception): + VolumeMount(name="v", mount_path="/p", extra_field="x") + + +# --------------------------------------------------------------------------- +# SecretEnvEntry +# --------------------------------------------------------------------------- + + +class TestSecretEnvEntry: + def test_fields(self): + s = SecretEnvEntry(secret_name="my-secret", secret_key="api-key") + assert s.secret_name == "my-secret" + assert s.secret_key == "api-key" + + def test_extra_forbid(self): + with pytest.raises(Exception): + SecretEnvEntry(secret_name="s", secret_key="k", bad="x") + + +# --------------------------------------------------------------------------- +# OssDep +# --------------------------------------------------------------------------- + + +class TestOssDep: + def test_defaults(self): + o = OssDep(key="path/to/obj", target_path="/data/") + assert o.key == "path/to/obj" + assert o.target_path == "/data/" + assert o.extract is False + + def test_extract_true(self): + o = OssDep(key="archive.tar.gz", target_path="/out/", extract=True) + assert o.extract is True + + def test_extra_forbid(self): + with pytest.raises(Exception): + OssDep(key="k", target_path="/t", bad="x") + + +# --------------------------------------------------------------------------- +# HealthSpec +# --------------------------------------------------------------------------- + + +class TestHealthSpec: + def test_defaults(self): + h = HealthSpec(port=8080) + assert h.port == 8080 + assert h.timeout_sec == 60 + + def test_custom_timeout(self): + h = HealthSpec(port=9090, timeout_sec=120) + assert h.timeout_sec == 120 + + def test_extra_forbid(self): + with pytest.raises(Exception): + HealthSpec(port=80, extra="x") + + +# --------------------------------------------------------------------------- +# InitContainerSpec / SidecarSpec (via _ContainerBase) +# --------------------------------------------------------------------------- + + +class TestContainerBase: + """Tests for _ContainerBase validators via InitContainerSpec (concrete subclass).""" + + def test_valid_name_pattern(self): + c = InitContainerSpec(name="my-container", image="ubuntu:22.04") + assert c.name == "my-container" + + def test_valid_name_alphanumeric_only(self): + c = InitContainerSpec(name="abc123", image="ubuntu:22.04") + assert c.name == "abc123" + + def test_invalid_name_uppercase(self): + with pytest.raises(Exception, match="invalid"): + InitContainerSpec(name="MyContainer", image="ubuntu:22.04") + + def test_invalid_name_starts_with_dash(self): + with pytest.raises(Exception, match="invalid"): + InitContainerSpec(name="-bad", image="ubuntu:22.04") + + def test_invalid_name_underscore(self): + with pytest.raises(Exception, match="invalid"): + InitContainerSpec(name="my_container", image="ubuntu:22.04") + + def test_defaults(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04") + assert c.script is None + assert c.script_path is None + assert c.command is None + assert c.args is None + assert c.env == {} + assert c.secret_env == {} + assert c.resources is None + assert c.privileged is False + assert c.volume_mounts == [] + + def test_script_only(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04", script="echo hi") + assert c.script == "echo hi" + + def test_script_path_only(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04", script_path="/run.sh") + assert c.script_path == "/run.sh" + + def test_command_only(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04", command=["dockerd"]) + assert c.command == ["dockerd"] + + def test_command_with_args(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04", command=["dockerd"], args=["--tls=false"]) + assert c.command == ["dockerd"] + assert c.args == ["--tls=false"] + + def test_entrypoint_exclusive_script_and_script_path(self): + with pytest.raises(Exception, match="mutually exclusive"): + InitContainerSpec(name="init", image="ubuntu:22.04", script="echo hi", script_path="/run.sh") + + def test_entrypoint_exclusive_script_and_command(self): + with pytest.raises(Exception, match="mutually exclusive"): + InitContainerSpec(name="init", image="ubuntu:22.04", script="echo hi", command=["bash"]) + + def test_entrypoint_exclusive_all_three(self): + with pytest.raises(Exception, match="mutually exclusive"): + InitContainerSpec( + name="init", + image="ubuntu:22.04", + script="echo hi", + script_path="/run.sh", + command=["bash"], + ) + + def test_args_without_command_raises(self): + with pytest.raises(Exception, match="args must be used together with command"): + InitContainerSpec(name="init", image="ubuntu:22.04", args=["--flag"]) + + def test_extra_forbid(self): + with pytest.raises(Exception): + InitContainerSpec(name="init", image="ubuntu:22.04", unknown="x") + + def test_privileged_default_false(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04") + assert c.privileged is False + + def test_privileged_true(self): + c = InitContainerSpec(name="init", image="ubuntu:22.04", privileged=True) + assert c.privileged is True + + +class TestSidecarSpec: + def test_defaults(self): + s = SidecarSpec(name="proxy", image="ubuntu:22.04") + assert s.health is None + assert s.volume_mounts == [] + + def test_health_field(self): + s = SidecarSpec(name="proxy", image="ubuntu:22.04", health=HealthSpec(port=8082)) + assert s.health.port == 8082 + + def test_extra_forbid(self): + with pytest.raises(Exception): + SidecarSpec(name="s", image="img", bad_field="x") + + +# --------------------------------------------------------------------------- +# MainContainerSpec +# --------------------------------------------------------------------------- + + +class TestMainContainerSpec: + def test_required_image(self): + m = MainContainerSpec(image="myregistry/main:latest") + assert m.image == "myregistry/main:latest" + + def test_defaults(self): + m = MainContainerSpec(image="img") + assert m.resources is None + assert m.env == {} + assert m.secret_env == {} + assert m.oss_deps == [] + assert m.volume_mounts == [] + assert m.privileged is False + + def test_extra_forbid(self): + with pytest.raises(Exception): + MainContainerSpec(image="img", unknown="x") + + def test_privileged(self): + m = MainContainerSpec(image="img", privileged=True) + assert m.privileged is True + + +# --------------------------------------------------------------------------- +# ComposeSpec +# --------------------------------------------------------------------------- + + +class TestComposeSpec: + def test_minimal(self): + cs = ComposeSpec(main=MainContainerSpec(image="main:latest")) + assert cs.init_containers == [] + assert cs.sidecars == [] + + def test_unique_names_ok(self): + cs = ComposeSpec( + main=MainContainerSpec(image="main:latest"), + init_containers=[InitContainerSpec(name="init1", image="img")], + sidecars=[SidecarSpec(name="sidecar1", image="img")], + ) + assert len(cs.init_containers) == 1 + assert len(cs.sidecars) == 1 + + def test_duplicate_names_raises(self): + with pytest.raises(Exception, match="unique"): + ComposeSpec( + main=MainContainerSpec(image="main:latest"), + init_containers=[InitContainerSpec(name="dup", image="img")], + sidecars=[SidecarSpec(name="dup", image="img")], + ) + + def test_duplicate_among_sidecars_raises(self): + with pytest.raises(Exception, match="unique"): + ComposeSpec( + main=MainContainerSpec(image="main:latest"), + sidecars=[ + SidecarSpec(name="dup", image="img"), + SidecarSpec(name="dup", image="img2"), + ], + ) + + def test_extra_forbid(self): + with pytest.raises(Exception): + ComposeSpec(main=MainContainerSpec(image="img"), unknown="x") + + +# --------------------------------------------------------------------------- +# ComposeJobConfig +# --------------------------------------------------------------------------- + + +class TestComposeJobConfig: + def _minimal_compose(self): + return {"main": {"image": "main:latest"}} + + def test_inherits_bash_job_config(self): + assert issubclass(ComposeJobConfig, BashJobConfig) + + def test_inherits_job_config(self): + assert issubclass(ComposeJobConfig, JobConfig) + + def test_minimal_config(self): + cfg = ComposeJobConfig(compose={"main": {"image": "main:latest"}}) + assert isinstance(cfg.compose, ComposeSpec) + assert cfg.compose.main.image == "main:latest" + + def test_job_name_default_datetime(self): + import re + + cfg = ComposeJobConfig(compose={"main": {"image": "main:latest"}}) + assert re.match(r"\d{4}-\d{2}-\d{2}__\d{2}-\d{2}-\d{2}", cfg.job_name) + + def test_explicit_job_name_preserved(self): + cfg = ComposeJobConfig(job_name="my-job", compose={"main": {"image": "main:latest"}}) + assert cfg.job_name == "my-job" + + def test_extra_forbid(self): + with pytest.raises(Exception): + ComposeJobConfig(compose={"main": {"image": "img"}}, unknown_field="x") + + def test_compose_required(self): + with pytest.raises(Exception): + ComposeJobConfig() + + def test_proxy_conflict_check_raises(self): + with pytest.raises(Exception, match="proxy"): + ComposeJobConfig( + compose={ + "main": {"image": "main:latest"}, + "sidecars": [{"name": "proxy", "image": "proxy:latest"}], + }, + environment={ + "proxy": {"enabled": True}, + }, + ) + + def test_proxy_conflict_check_ok_when_proxy_disabled(self): + """No conflict when environment.proxy.enabled=False.""" + + # Should not raise + cfg = ComposeJobConfig( + compose={ + "main": {"image": "main:latest"}, + "sidecars": [{"name": "proxy", "image": "proxy:latest"}], + }, + environment={ + "proxy": {"enabled": False}, + }, + ) + assert cfg is not None + + def test_proxy_conflict_check_ok_when_no_proxy_sidecar(self): + """No conflict when sidecar is not named 'proxy'.""" + + cfg = ComposeJobConfig( + compose={ + "main": {"image": "main:latest"}, + "sidecars": [{"name": "notproxy", "image": "proxy:latest"}], + }, + environment={ + "proxy": {"enabled": True}, + }, + ) + assert cfg is not None + + def test_resource_budget_check_logs_warning_when_over(self): + """_resource_budget_check logs a warning when inner cpus exceed outer.""" + import rock.sdk.job.compose.config as compose_module + + with patch.object(compose_module.logger, "warning") as mock_warn: + ComposeJobConfig( + environment={"cpus": 2, "memory": "4g"}, + compose={ + "main": {"image": "main:latest", "resources": {"cpus": 4}}, + "sidecars": [{"name": "side", "image": "side:latest", "resources": {"cpus": 4}}], + }, + ) + # Warning should have been called (inner total cpus > outer) + assert mock_warn.called + + def test_resource_budget_check_no_warning_within_budget(self): + """No warning when inner resources are within outer sandbox budget.""" + import rock.sdk.job.compose.config as compose_module + + with patch.object(compose_module.logger, "warning") as mock_warn: + ComposeJobConfig( + environment={"cpus": 16, "memory": "32g"}, + compose={ + "main": {"image": "main:latest", "resources": {"cpus": 4, "memory": "8g"}}, + "sidecars": [{"name": "side", "image": "side:latest", "resources": {"cpus": 1, "memory": "2g"}}], + }, + ) + assert not mock_warn.called + + def test_full_config(self): + """Test a full ComposeJobConfig round-trip.""" + cfg = ComposeJobConfig( + job_name="test-job", + compose={ + "main": { + "image": "main:latest", + "resources": {"cpus": 4, "memory": "16g"}, + "env": {"KEY": "VALUE"}, + "oss_deps": [{"key": "path/to/obj", "target_path": "/data/", "extract": True}], + }, + "init_containers": [ + { + "name": "init1", + "image": "init:latest", + "script_path": "/init.sh", + "volume_mounts": [{"name": "vol", "mount_path": "/data", "main_mount_path": "/main-data"}], + } + ], + "sidecars": [ + { + "name": "sidecar1", + "image": "sidecar:latest", + "script": "echo hello", + "health": {"port": 8080}, + } + ], + }, + ) + assert cfg.job_name == "test-job" + assert cfg.compose.main.image == "main:latest" + assert cfg.compose.main.resources.cpus == 4 + assert cfg.compose.main.resources.memory == "16g" + assert cfg.compose.main.env == {"KEY": "VALUE"} + assert cfg.compose.main.oss_deps[0].key == "path/to/obj" + assert cfg.compose.main.oss_deps[0].extract is True + assert cfg.compose.init_containers[0].name == "init1" + assert cfg.compose.init_containers[0].script_path == "/init.sh" + assert cfg.compose.init_containers[0].volume_mounts[0].name == "vol" + assert cfg.compose.sidecars[0].name == "sidecar1" + assert cfg.compose.sidecars[0].health.port == 8080 + + +# --------------------------------------------------------------------------- +# ComposeJobConfig.from_yaml +# --------------------------------------------------------------------------- + + +class TestComposeJobConfigFromYaml: + def test_from_yaml_minimal(self, tmp_path): + content = textwrap.dedent( + """\ + compose: + main: + image: main:latest + """ + ) + p = tmp_path / "cfg.yaml" + p.write_text(content) + cfg = ComposeJobConfig.from_yaml(str(p)) + assert isinstance(cfg, ComposeJobConfig) + assert cfg.compose.main.image == "main:latest" + + def test_from_yaml_file_not_found(self): + with pytest.raises(FileNotFoundError): + ComposeJobConfig.from_yaml("/nonexistent/path.yaml") + + def test_from_yaml_full(self, tmp_path): + content = textwrap.dedent( + """\ + job_name: my-compose-job + timeout: 3600 + compose: + main: + image: myregistry/main:latest + resources: + cpus: 4 + memory: "16g" + env: + DATASET: my-dataset + oss_deps: + - key: path/to/archive.tar.gz + target_path: /data/ + extract: true + init_containers: + - name: init1 + image: init:latest + script_path: /init.sh + sidecars: + - name: proxy + image: proxy:latest + script_path: /proxy.sh + health: + port: 8082 + timeout_sec: 60 + """ + ) + p = tmp_path / "cfg.yaml" + p.write_text(content) + cfg = ComposeJobConfig.from_yaml(str(p)) + assert cfg.job_name == "my-compose-job" + assert cfg.timeout == 3600 + assert cfg.compose.main.resources.cpus == 4 + assert cfg.compose.main.oss_deps[0].extract is True + assert cfg.compose.init_containers[0].name == "init1" + assert cfg.compose.sidecars[0].name == "proxy" + assert cfg.compose.sidecars[0].health.port == 8082 + + +# --------------------------------------------------------------------------- +# JobConfig.from_yaml — three-way auto-detection +# --------------------------------------------------------------------------- + + +class TestJobConfigFromYamlThreeWay: + """JobConfig.from_yaml dispatches to the correct subclass including ComposeJobConfig.""" + + def test_bash_script_detected(self, tmp_path): + yaml_content = "script: echo hello\ntimeout: 60\n" + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, BashJobConfig) + assert not isinstance(cfg, ComposeJobConfig) + + def test_harbor_experiment_id_detected(self, tmp_path): + from rock.sdk.bench.models.job.config import HarborJobConfig + + yaml_content = "experiment_id: exp-1\nagents:\n - name: my-agent\n" + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, HarborJobConfig) + + def test_compose_key_detected(self, tmp_path): + yaml_content = textwrap.dedent( + """\ + compose: + main: + image: main:latest + """ + ) + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, ComposeJobConfig) + + def test_compose_with_script_path(self, tmp_path): + """script_path at top-level (from BashJobConfig) + compose block → ComposeJobConfig.""" + yaml_content = textwrap.dedent( + """\ + script_path: ./main.sh + compose: + main: + image: main:latest + """ + ) + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, ComposeJobConfig) + assert cfg.script_path == "./main.sh" + + def test_compose_with_full_config(self, tmp_path): + yaml_content = textwrap.dedent( + """\ + job_name: swe-job + script_path: ./main.sh + timeout: 7200 + compose: + main: + image: myregistry/main:latest + resources: + cpus: 4 + memory: "16g" + sidecars: + - name: proxy + image: proxy:latest + script_path: /proxy.sh + """ + ) + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, ComposeJobConfig) + assert cfg.job_name == "swe-job" + assert cfg.compose.sidecars[0].name == "proxy" + + def test_invalid_compose_raises_value_error(self, tmp_path): + """A YAML with 'compose' key but an invalid compose structure raises a descriptive error.""" + yaml_content = textwrap.dedent( + """\ + compose: + main: {} + """ + ) + # compose.main is missing the required 'image' field → ComposeJobConfig fails + # BashJobConfig rejects 'compose' (extra=forbid) → also fails + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + with pytest.raises(ValueError, match="does not match any known job type"): + JobConfig.from_yaml(str(p)) + + def test_bash_direct_from_yaml_unaffected(self, tmp_path): + """BashJobConfig.from_yaml() still works independently.""" + yaml_content = "script: ls -la\ntimeout: 120\n" + p = tmp_path / "bash.yaml" + p.write_text(yaml_content) + cfg = BashJobConfig.from_yaml(str(p)) + assert isinstance(cfg, BashJobConfig) + assert cfg.script == "ls -la" diff --git a/tests/unit/sdk/job/test_trial_compose.py b/tests/unit/sdk/job/test_trial_compose.py new file mode 100644 index 0000000000..8320ba2d84 --- /dev/null +++ b/tests/unit/sdk/job/test_trial_compose.py @@ -0,0 +1,610 @@ +"""Tests for rock.sdk.job.compose.trial — ComposeTrial.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock + +from rock.sdk.envhub import EnvironmentConfig +from rock.sdk.envhub.config import OssMirrorConfig +from rock.sdk.job.compose.config import ( + ComposeJobConfig, + ComposeSpec, + HealthSpec, + InitContainerSpec, + MainContainerSpec, + OssDep, + ResourceSpec, + SecretEnvEntry, + SidecarSpec, +) +from rock.sdk.job.compose.trial import ( + ComposeTrial, + _entrypoint_args, + _env_args, + _render_oss_deps, + _resource_args, +) +from rock.sdk.job.trial.registry import _create_trial + +# ── Fixtures ────────────────────────────────────────────────────────────────── + + +def _minimal_main(image="ubuntu:22.04") -> MainContainerSpec: + return MainContainerSpec(image=image) + + +def _minimal_compose(script="echo hello") -> ComposeJobConfig: + return ComposeJobConfig( + script=script, + compose=ComposeSpec(main=_minimal_main()), + ) + + +def _mock_sandbox() -> AsyncMock: + sb = AsyncMock() + sb._namespace = "test-ns" + sb._experiment_id = "test-exp" + obs = MagicMock() + obs.exit_code = 0 + obs.output = "" + sb.arun = AsyncMock(return_value=obs) + sb.fs = AsyncMock() + sb.fs.ensure_ossutil = AsyncMock(return_value=True) + sb.fs.upload_dir = AsyncMock(return_value=MagicMock(exit_code=0)) + sb.write_file_by_path = AsyncMock(return_value=MagicMock(success=True)) + return sb + + +# ── Registration ────────────────────────────────────────────────────────────── + + +class TestRegistration: + def test_compose_config_creates_compose_trial(self): + cfg = _minimal_compose() + trial = _create_trial(cfg) + assert isinstance(trial, ComposeTrial) + + +# ── build() ─────────────────────────────────────────────────────────────────── + + +class TestBuild: + def test_build_returns_bash_runner(self): + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + assert trial.build() == "bash /rock/runner.sh" + + +# ── setup() ─────────────────────────────────────────────────────────────────── + + +class TestSetup: + async def test_setup_writes_runner_sh(self): + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + # write_file_by_path should have been called with /rock/runner.sh + paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] + assert "/rock/runner.sh" in paths + + async def test_setup_writes_main_script(self): + cfg = _minimal_compose(script="echo main-script") + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + # main.sh should be written + paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] + assert "/rock/scripts/main.sh" in paths + + # Find main.sh content + for call in sb.write_file_by_path.call_args_list: + if call.args[1] == "/rock/scripts/main.sh": + assert "echo main-script" in call.args[0] + break + + async def test_setup_writes_inline_init_script(self): + cfg = ComposeJobConfig( + script="echo main", + compose=ComposeSpec( + main=_minimal_main(), + init_containers=[InitContainerSpec(name="setup", image="alpine", script="echo setup-init")], + ), + ) + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] + assert "/rock/scripts/setup.sh" in paths + + async def test_setup_runner_contains_docker_network_create(self): + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + runner_content = None + for call in sb.write_file_by_path.call_args_list: + if call.args[1] == "/rock/runner.sh": + runner_content = call.args[0] + break + + assert runner_content is not None + assert "docker network create rock_compose_$$" in runner_content + assert "main.sh" in runner_content + + async def test_setup_runner_contains_pipestatus(self): + """${PIPESTATUS[0]} must be present verbatim (not broken by str.format).""" + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + runner_content = None + for call in sb.write_file_by_path.call_args_list: + if call.args[1] == "/rock/runner.sh": + runner_content = call.args[0] + break + + assert runner_content is not None + assert "${PIPESTATUS[0]}" in runner_content + + async def test_setup_ensures_ossutil_when_oss_deps(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=MainContainerSpec( + image="ubuntu:22.04", + oss_deps=[OssDep(key="oss://bucket/data.tar.gz", target_path="/data")], + ) + ), + ) + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + sb.fs.ensure_ossutil.assert_called_once() + + async def test_setup_no_ossutil_when_no_oss_deps(self): + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + sb.fs.ensure_ossutil.assert_not_called() + + async def test_setup_reads_script_path(self, tmp_path): + script_file = tmp_path / "main.sh" + script_file.write_text("echo from-file") + + cfg = ComposeJobConfig( + script_path=str(script_file), + compose=ComposeSpec(main=_minimal_main()), + ) + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + await trial.setup(sb) + + # Ensure content from file was written to /rock/scripts/main.sh + for call in sb.write_file_by_path.call_args_list: + if call.args[1] == "/rock/scripts/main.sh": + assert "echo from-file" in call.args[0] + break + + +# ── collect() ──────────────────────────────────────────────────────────────── + + +class TestCollect: + async def test_collect_success(self): + cfg = ComposeJobConfig( + script="echo hi", + job_name="myjob", + compose=ComposeSpec(main=_minimal_main()), + ) + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + result = await trial.collect(sb, output="ok\n", exit_code=0) + + assert result.exception_info is None + assert result.task_name == "myjob" + assert result.status == "completed" + assert result.exit_code == 0 + + async def test_collect_failure_sets_compose_exception(self): + cfg = ComposeJobConfig( + script="exit 1", + job_name="myjob", + compose=ComposeSpec(main=_minimal_main()), + ) + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + + result = await trial.collect(sb, output="", exit_code=1) + + assert result.exception_info is not None + assert result.exception_info.exception_type == "ComposeMainContainerFailed" + assert result.status == "failed" + assert result.exit_code == 1 + + async def test_collect_reads_main_log(self): + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + obs = MagicMock() + obs.output = "some log output" + sb.arun = AsyncMock(return_value=obs) + + result = await trial.collect(sb, output="", exit_code=0) + + # arun should have been called with cat for main log + calls = [str(call) for call in sb.arun.call_args_list] + assert any("main.log" in c for c in calls) + assert result is not None + + async def test_collect_reads_sidecar_log(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + sidecars=[SidecarSpec(name="proxy", image="nginx:latest")], + ), + ) + trial = ComposeTrial(cfg) + sb = _mock_sandbox() + obs = MagicMock() + obs.output = "proxy log" + sb.arun = AsyncMock(return_value=obs) + + await trial.collect(sb, output="", exit_code=0) + + calls = [str(call) for call in sb.arun.call_args_list] + assert any("proxy.log" in c for c in calls) + + +# ── runner.sh rendering ─────────────────────────────────────────────────────── + + +class TestRunnerRendering: + def _get_runner(self, cfg: ComposeJobConfig) -> str: + trial = ComposeTrial(cfg) + return trial._render_runner_sh() + + def test_p0_starts_dockerd_with_kata_fixes(self): + """P0 must actively start dockerd with the fixes learned from real kata runs: + explicit PATH (so nohup'd dockerd finds containerd) and + DOCKER_IGNORE_BR_NETFILTER_ERROR=1 (kata guest lacks br_netfilter).""" + cfg = ComposeJobConfig(script="echo hi", compose=ComposeSpec(main=_minimal_main())) + runner = self._get_runner(cfg) + assert "nohup dockerd" in runner + assert "DOCKER_IGNORE_BR_NETFILTER_ERROR=1" in runner + assert "PATH=/usr/local/bin" in runner + + def test_main_mounts_scripts_dir(self): + """Inner main container must bind-mount the outer /rock/scripts so main.sh exists.""" + cfg = ComposeJobConfig(script="echo hi", compose=ComposeSpec(main=_minimal_main())) + runner = self._get_runner(cfg) + assert "-v /rock/scripts:/rock/scripts:ro" in runner + assert "bash /rock/scripts/main.sh" in runner + + def test_init_container_has_rm_flag(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + init_containers=[InitContainerSpec(name="init1", image="alpine", script="echo init")], + ), + ) + runner = self._get_runner(cfg) + assert "docker run --rm" in runner + assert "init1" in runner + # init containers also need the scripts dir mounted + assert "-v /rock/scripts:/rock/scripts:ro" in runner + + def test_sidecar_has_d_flag_and_network_alias(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + sidecars=[SidecarSpec(name="proxy", image="nginx:latest")], + ), + ) + runner = self._get_runner(cfg) + assert "docker run -d" in runner + assert "--network-alias proxy" in runner + + def test_health_probe_triggers_nc(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + sidecars=[SidecarSpec(name="proxy", image="nginx:latest", health=HealthSpec(port=8080))], + ), + ) + runner = self._get_runner(cfg) + assert "nc -z" in runner + assert "proxy" in runner + assert "8080" in runner + + def test_no_health_probe_when_no_health(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + sidecars=[SidecarSpec(name="proxy", image="nginx:latest")], + ), + ) + runner = self._get_runner(cfg) + assert "nc -z" not in runner + + def test_privileged_flag(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=MainContainerSpec(image="ubuntu:22.04", privileged=True), + ), + ) + runner = self._get_runner(cfg) + # main container section should have --privileged + assert "--privileged" in runner + + def test_init_privileged(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + init_containers=[InitContainerSpec(name="priv-init", image="alpine", privileged=True)], + ), + ) + runner = self._get_runner(cfg) + assert "--privileged" in runner + + def test_command_produces_entrypoint(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=_minimal_main(), + init_containers=[ + InitContainerSpec(name="custom", image="alpine", command=["sh"], args=["-c", "echo custom"]) + ], + ), + ) + runner = self._get_runner(cfg) + assert "--entrypoint" in runner + + def test_secret_env_rendered_as_shell_var_reference(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=MainContainerSpec( + image="ubuntu:22.04", + secret_env={"MY_SECRET": SecretEnvEntry(secret_name="my-secret", secret_key="key")}, + ) + ), + ) + runner = self._get_runner(cfg) + # Secret should be a shell variable reference, not the actual secret value + assert "${MY_SECRET}" in runner + assert "secret_key" not in runner + assert "my-secret" not in runner + + def test_oss_deps_extract_branch_generates_tar(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=MainContainerSpec( + image="ubuntu:22.04", + oss_deps=[OssDep(key="oss://bucket/data.tar.gz", target_path="/data", extract=True)], + ) + ), + ) + runner = self._get_runner(cfg) + assert "tar -xf" in runner + assert "ossutil cp" in runner + + def test_oss_deps_no_extract_no_tar(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=MainContainerSpec( + image="ubuntu:22.04", + oss_deps=[OssDep(key="oss://bucket/model.bin", target_path="/model/model.bin")], + ) + ), + ) + runner = self._get_runner(cfg) + assert "ossutil cp" in runner + assert "tar -xf" not in runner + + def test_pipestatus_preserved(self): + """str.replace approach must keep ${PIPESTATUS[0]} intact.""" + runner = self._get_runner(_minimal_compose()) + assert "${PIPESTATUS[0]}" in runner + + def test_main_script_path_in_runner(self): + runner = self._get_runner(_minimal_compose()) + assert "bash /rock/scripts/main.sh" in runner + + def test_resource_spec_cpus_in_runner(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec(main=MainContainerSpec(image="ubuntu:22.04", resources=ResourceSpec(cpus=4.0))), + ) + runner = self._get_runner(cfg) + assert "--cpus" in runner + assert "4.0" in runner + + def test_resource_spec_memory_in_runner(self): + cfg = ComposeJobConfig( + script="echo hi", + compose=ComposeSpec( + main=MainContainerSpec( + image="ubuntu:22.04", + resources=ResourceSpec(memory="8g", memory_limit="16g"), + ) + ), + ) + runner = self._get_runner(cfg) + assert "--memory-reservation" in runner + assert "--memory" in runner + + +# ── Helper function unit tests ──────────────────────────────────────────────── + + +class TestResourceArgs: + def test_none_returns_empty(self): + assert _resource_args(None) == [] + + def test_cpus(self): + r = ResourceSpec(cpus=2.0) + args = _resource_args(r) + assert "--cpus 2.0" in args + + def test_cpu_limit_overrides_cpus(self): + r = ResourceSpec(cpus=2.0, cpu_limit=4.0) + args = _resource_args(r) + assert "--cpus 4.0" in args + assert "--cpus 2.0" not in args + + def test_memory(self): + r = ResourceSpec(memory="4g") + args = _resource_args(r) + assert "--memory-reservation 4g" in args + + def test_memory_limit(self): + r = ResourceSpec(memory_limit="8g") + args = _resource_args(r) + assert "--memory 8g" in args + + +class TestEnvArgs: + def test_plain_env(self): + args = _env_args({"FOO": "bar"}, {}) + assert "-e FOO=bar" in args + + def test_secret_env_shell_var(self): + secret = SecretEnvEntry(secret_name="my-secret", secret_key="my-key") + args = _env_args({}, {"MY_SECRET": secret}) + # Must reference ${MY_SECRET} not the literal key value + env_arg = next(a for a in args if "MY_SECRET" in a) + assert "${MY_SECRET}" in env_arg + assert "my-key" not in env_arg + assert "my-secret" not in env_arg + + +class TestEntrypointArgs: + def test_command_sets_entrypoint(self): + spec = MagicMock() + spec.command = ["dockerd"] + spec.args = ["--tls=false"] + spec.script_path = None + spec.script = None + flag_args, pos = _entrypoint_args(spec) + assert any("--entrypoint" in f for f in flag_args) + assert "--tls=false" in pos + + def test_script_path(self): + spec = MagicMock() + spec.command = None + spec.script_path = "/my/script.sh" + spec.script = None + flag_args, pos = _entrypoint_args(spec) + assert flag_args == [] + assert "bash" in pos + assert "/my/script.sh" in pos + + def test_inline_script_uses_name(self): + spec = MagicMock() + spec.command = None + spec.script_path = None + spec.script = "echo hi" + spec.name = "mycontainer" + flag_args, pos = _entrypoint_args(spec) + assert flag_args == [] + assert "bash /rock/scripts/mycontainer.sh" in pos + + def test_no_entrypoint_options(self): + spec = MagicMock() + spec.command = None + spec.script_path = None + spec.script = None + flag_args, pos = _entrypoint_args(spec) + assert flag_args == [] + assert pos == "" + + +class TestRenderOssDeps: + def test_empty(self): + result = _render_oss_deps([]) + assert "no oss_deps" in result + + def test_plain_dep(self): + dep = OssDep(key="oss://b/file.bin", target_path="/data/file.bin") + result = _render_oss_deps([dep]) + assert "ossutil cp" in result + assert "tar" not in result + + def test_extract_dep(self): + dep = OssDep(key="oss://b/data.tar.gz", target_path="/data", extract=True) + result = _render_oss_deps([dep]) + assert "tar -xf" in result + assert "ossutil cp" in result + + +# ── on_sandbox_ready hook ───────────────────────────────────────────────────── + + +class TestOnSandboxReady: + async def test_backfills_namespace(self): + cfg = _minimal_compose() + trial = ComposeTrial(cfg) + sb = MagicMock() + sb._namespace = "test-ns" + sb._experiment_id = "test-exp" + + await trial.on_sandbox_ready(sb) + + assert cfg.namespace == "test-ns" + assert cfg.experiment_id == "test-exp" + + async def test_oss_mirror_env_prepared_when_enabled(self, monkeypatch): + for k in list(__import__("os").environ): + if k.startswith("OSS"): + monkeypatch.delenv(k, raising=False) + + cfg = ComposeJobConfig( + script="echo hi", + job_name="myjob", + compose=ComposeSpec(main=_minimal_main()), + environment=EnvironmentConfig( + oss_mirror=OssMirrorConfig( + enabled=True, + oss_bucket="b", + oss_endpoint="ep", + oss_region="rg", + ), + ), + ) + trial = ComposeTrial(cfg) + sb = MagicMock() + sb._namespace = "ns1" + sb._experiment_id = "exp1" + + await trial.on_sandbox_ready(sb) + + assert cfg.environment.env.get("OSS_BUCKET") == "b" + assert "ROCK_ARTIFACT_DIR" in cfg.environment.env + assert cfg.environment.env["ROCK_OSS_PREFIX"] == "artifacts/ns1/exp1/myjob" From 9caa793b848b002cb66a2fe706e560cdc9e1a665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=82=E6=B4=9B?= Date: Wed, 10 Jun 2026 13:13:37 +0800 Subject: [PATCH 2/4] docs(sdk): add ready-to-run harbor ComposeJobConfig demo harbor_compose_demo.py + .env.example: env-var driven demo for running the harbor task (claude-code / aone-bench-java100) via ComposeJobConfig, with all real-backend fixes baked in. User only fills credentials (incl. OSS) to run. Co-Authored-By: Claude Code AI-Model: claude-opus-4-8 AI-Contributed/Feature: 240/240 AI-Contributed/UT: 0/0 --- examples/job/compose/.env.example | 26 +++ examples/job/compose/README.md | 25 ++- examples/job/compose/harbor_compose_demo.py | 211 ++++++++++++++++++++ 3 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 examples/job/compose/.env.example create mode 100644 examples/job/compose/harbor_compose_demo.py diff --git a/examples/job/compose/.env.example b/examples/job/compose/.env.example new file mode 100644 index 0000000000..20a0c8dd1b --- /dev/null +++ b/examples/job/compose/.env.example @@ -0,0 +1,26 @@ +# ComposeJobConfig harbor demo 凭证模板 +# 用法:cp .env.example .env && 填值 && source .env && uv run python harbor_compose_demo.py + +# ── ROCK 集群(必填)── +export ROCK_TOKEN='' +# export ROCK_BASE_URL='http://xrl.alibaba-inc.com' # 默认即此,可不填 +# export ROCK_CLUSTER='vpc-sg-a' # 默认即此,可不填 + +# ── 模型(必填)── +export MODEL='glm-5' +export MODEL_BASE_URL='https://routify.alibaba-inc.com/protocol/openai/v1' +export MODEL_API_KEY='' + +# ── OSS 凭证(必填 —— harbor 从 OSS 下载 dataset;AP 命令里没有,需你补)── +export OSS_BUCKET='' +export OSS_ENDPOINT='' # e.g. oss-cn-hangzhou-internal.aliyuncs.com +export OSS_REGION='' # e.g. cn-hangzhou +export OSS_ACCESS_KEY_ID='' +export OSS_ACCESS_KEY_SECRET='' + +# ── 任务参数(可选,都有默认值,对应 AP -p)── +# export INSTANCE_ID='codereview-20789198' +# export DATASET='terminal-bench/aone-bench-java100' +# export SPLIT='test' +# export HARBOR_AGENT='claude-code' +# export JOB_TIMEOUT='9000' diff --git a/examples/job/compose/README.md b/examples/job/compose/README.md index 13947244b5..cb618636d0 100644 --- a/examples/job/compose/README.md +++ b/examples/job/compose/README.md @@ -7,13 +7,34 @@ ``` examples/job/compose/ -├── compose_demo.py # 入口脚本(argparse + Job.run()) +├── harbor_compose_demo.py # ★ 开箱即用 demo(凭证走环境变量,内置所有真机 fix) +├── .env.example # 凭证模板(cp 成 .env 填值后 source) +├── compose_demo.py # 通用入口(-c 读 YAML,适合自定义 config) ├── job_config.yaml.template # ComposeJobConfig YAML 模板(含占位符) -├── main.sh # 主容器入口脚本(harbor runner,原 Agent-Hub/task/harbor/main.sh) +├── main.sh # 主容器入口脚本(harbor runner,原 Agent-Hub/task/harbor/main.sh + 两层适配) └── sidecars/ └── proxy-sidecar.sh # cc-proxy sidecar 脚本(原 Agent-Hub/task/harbor/proxy-sidecar.sh) ``` +## 快速开始(推荐:harbor_compose_demo.py) + +这是经过 ROCK 真机端到端验证的脚本,对应你的 AP harbor 命令,所有 dockerd/网络/挂载 +修正都已内置,只需配凭证: + +```bash +cd examples/job/compose +cp .env.example .env # 填入 ROCK_TOKEN / MODEL_* / OSS_* 凭证 +source .env +uv run python harbor_compose_demo.py +``` + +**OSS 凭证是必需的**:harbor 从 OSS 下载 dataset(`terminal-bench/aone-bench-java100`)。 +AP 平台自动注入 OSS 凭证,SDK 直连模式需你在 `.env` 里显式提供 +(OSS_BUCKET / OSS_ENDPOINT / OSS_REGION / OSS_ACCESS_KEY_ID / OSS_ACCESS_KEY_SECRET)。 + +任务参数(INSTANCE_ID / DATASET / HARBOR_AGENT 等)都有默认值(对应 AP `-p`), +可用环境变量覆盖,详见 `harbor_compose_demo.py` 顶部 docstring。 + ## 运行方案:runner.sh 在外层沙箱主动启动 dockerd > 以下要点均经过 ROCK 真实后端(`xrl.alibaba-inc.com` / `vpc-sg-a`,kata runtime)端到端验证。 diff --git a/examples/job/compose/harbor_compose_demo.py b/examples/job/compose/harbor_compose_demo.py new file mode 100644 index 0000000000..2bb6fec33b --- /dev/null +++ b/examples/job/compose/harbor_compose_demo.py @@ -0,0 +1,211 @@ +"""ComposeJobConfig 端到端 demo —— 用 ComposeJobConfig 跑 harbor 任务。 + +这是经过 ROCK 真机后端(kata runtime)端到端验证的脚本: + Job → kata 沙箱 → dockerd → proxy sidecar → health → 主容器(复用外层 dockerd) + → harbor CLI 运行 → 下载 dataset → 跑 agent rollout + +对应的 AP 命令(claude-code agent / aone-bench-java100 / glm-5): + ap job create harbor --instance-id codereview-20789198 -p '{...}' --runner rock + +与 AP 命令的唯一区别:AP 平台自动注入 OSS 凭证,SDK 直连模式需你显式提供 +(harbor 从 OSS 下载 dataset 必需)。把凭证放进环境变量即可。 + +──────────────────────────────────────────────────────────────────────────── +用法: + # 1) 配置凭证(必填) + export ROCK_TOKEN='t-42c2a16fa5924e34' + export MODEL='glm-5' + export MODEL_BASE_URL='https://routify.alibaba-inc.com/protocol/openai/v1' + export MODEL_API_KEY='sk-...' + # OSS 凭证(harbor 下载 dataset 必需 —— AP 命令里没有,需你补) + export OSS_BUCKET='' + export OSS_ENDPOINT='' # e.g. oss-cn-hangzhou-internal.aliyuncs.com + export OSS_REGION='' # e.g. cn-hangzhou + export OSS_ACCESS_KEY_ID='' + export OSS_ACCESS_KEY_SECRET='' + + # 2) 跑 + uv run python examples/job/compose/harbor_compose_demo.py + +可选环境变量覆盖(都有默认值,对应 AP 命令的 -p 参数): + INSTANCE_ID(codereview-20789198) DATASET(terminal-bench/aone-bench-java100) + SPLIT(test) HARBOR_AGENT(claude-code) ROCK_BASE_URL ROCK_CLUSTER + HARBOR_MAIN_IMAGE PROXY_IMAGE JOB_TIMEOUT +──────────────────────────────────────────────────────────────────────────── +""" + +import asyncio +import logging +import os +import sys +from pathlib import Path + +from rock.sdk.envhub import EnvironmentConfig +from rock.sdk.job import Job +from rock.sdk.job.compose.config import ( + ComposeJobConfig, + ComposeSpec, + HealthSpec, + MainContainerSpec, + SidecarSpec, + VolumeMount, +) +from rock.sdk.job.operator import ScatterOperator + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logging.getLogger("httpx").setLevel(logging.WARNING) +logger = logging.getLogger("harbor-compose-demo") + +HERE = Path(__file__).resolve().parent + +# ── 默认值(对应用户 AP 命令的 -p 参数)────────────────────────────────────── +DEFAULTS = { + "ROCK_BASE_URL": "http://xrl.alibaba-inc.com", + "ROCK_CLUSTER": "vpc-sg-a", + "INSTANCE_ID": "codereview-20789198", + "DATASET": "terminal-bench/aone-bench-java100", + "SPLIT": "test", + "HARBOR_AGENT": "claude-code", + "HARBOR_MAIN_IMAGE": "rock-registry.ap-southeast-1.cr.aliyuncs.com/harbor/harbor:086a7b5822fc09891b190e18d", + "PROXY_IMAGE": "agent-platform-staging-registry-vpc.ap-southeast-1.cr.aliyuncs.com/eflops/proxy-hub:bailian-usage-dev", + "JOB_TIMEOUT": "9000", +} + +# 必填凭证(缺一不可) +REQUIRED = [ + "ROCK_TOKEN", + "MODEL", + "MODEL_BASE_URL", + "MODEL_API_KEY", + "OSS_BUCKET", + "OSS_ENDPOINT", + "OSS_REGION", + "OSS_ACCESS_KEY_ID", + "OSS_ACCESS_KEY_SECRET", +] + + +def cfg(key: str) -> str: + """取环境变量,回落到 DEFAULTS。""" + return os.environ.get(key, DEFAULTS.get(key, "")) + + +def check_required() -> None: + missing = [k for k in REQUIRED if not os.environ.get(k)] + if missing: + print("缺少必填环境变量:") + for k in missing: + print(f" - {k}") + print("\n请先 export 这些变量(见本文件顶部 docstring)。") + sys.exit(1) + + +def build_config() -> ComposeJobConfig: + # 容器内 env(harbor main.sh + proxy sidecar 都从这里读) + container_env = { + # 模型 + "MODEL": cfg("MODEL"), + "MODEL_BASE_URL": cfg("MODEL_BASE_URL"), + "MODEL_API_KEY": cfg("MODEL_API_KEY"), + # harbor 任务(对应 AP -p 参数) + "HARBOR_AGENT": cfg("HARBOR_AGENT"), + "INSTANCE_ID": cfg("INSTANCE_ID"), + "DATASET": cfg("DATASET"), + "SPLIT": cfg("SPLIT"), + "DATASET_TYPE": "local", + "HARBOR_ENV": "docker", + "N_ATTEMPTS": "1", + "N_CONCURRENT": "1", + "TIMEOUT_MULTIPLIER": "3.0", + "MAX_RETRIES": "3", + "MAX_ITERATIONS": "200", + "AGENT_VERSION": "2.1.87", + "AGENT_TIMEOUT_MULTIPLIER": "8.0", + "RETRY_INCLUDE": "NonZeroAgentExitCodeError", + "FORCE_PROXY": "true", + "PROVIDER": "anthropic", + "TEMPERATURE": "1.0", + "INTERLEAVED_THINKING": "true", + "THINKING_TYPE": "adaptive", + "REASONING_EFFORT": "high", + "CONTEXT_1M": "true", + "SKIP_CONFIRM": "true", + "OUTPUT_DIR": "/tmp/output", + "SHARED_DIR": "/tmp/shared", + # OSS 凭证(harbor 下载 dataset 必需) + "OSS_BUCKET": cfg("OSS_BUCKET"), + "OSS_ENDPOINT": cfg("OSS_ENDPOINT"), + "OSS_REGION": cfg("OSS_REGION"), + "OSS_ACCESS_KEY_ID": cfg("OSS_ACCESS_KEY_ID"), + "OSS_ACCESS_KEY_SECRET": cfg("OSS_ACCESS_KEY_SECRET"), + } + + return ComposeJobConfig( + job_name="harbor-compose-demo", + timeout=int(cfg("JOB_TIMEOUT")), + # 主容器入口脚本(harbor runner,从 Agent-Hub 复制并适配) + script_path=str(HERE / "main.sh"), + environment=EnvironmentConfig( + # 外层沙箱镜像须自带 docker 工具链(不要用 docker:27-dind,kata 下缺 containerd) + image=cfg("HARBOR_MAIN_IMAGE"), + base_url=cfg("ROCK_BASE_URL"), + cluster=cfg("ROCK_CLUSTER"), + extra_headers={"XRL-Authorization": f"Bearer {cfg('ROCK_TOKEN')}"}, + use_kata_runtime=True, + startup_timeout=1200, + memory="32g", + cpus=16, + uploads=[ + (str(HERE / "main.sh"), "/rock/scripts/main.sh"), + (str(HERE / "sidecars"), "/rock/scripts/sidecars"), + ], + env=container_env, + ), + compose=ComposeSpec( + main=MainContainerSpec( + image=cfg("HARBOR_MAIN_IMAGE"), + privileged=True, + env=container_env, + # 复用外层 dockerd(挂载外层 docker socket),避免主容器内再起第三层 dockerd + # —— 第三层 dockerd 在 kata 下会失败("Docker daemon failed to start") + volume_mounts=[ + VolumeMount( + name="docker-sock", + mount_path="/var/run/docker.sock", + host_path="/var/run/docker.sock", + ) + ], + ), + sidecars=[ + SidecarSpec( + name="proxy", + image=cfg("PROXY_IMAGE"), + script_path="/rock/scripts/sidecars/proxy-sidecar.sh", + env=container_env, + health=HealthSpec(port=8082, timeout_sec=120), + ), + ], + ), + ) + + +async def main() -> None: + config = build_config() + logger.info("Submitting harbor task via ComposeJobConfig (job_name=%s) ...", config.job_name) + logger.info(" backend=%s cluster=%s", config.environment.base_url, config.environment.cluster) + logger.info(" dataset=%s split=%s agent=%s", cfg("DATASET"), cfg("SPLIT"), cfg("HARBOR_AGENT")) + + # size=1:单 trial(避免 ScatterOperator 共享 config 引用的竞态) + result = await Job(config, operator=ScatterOperator(size=1)).run() + + logger.info("=== RESULT ===") + logger.info("exit_code=%s status=%s score=%s", result.exit_code, result.status, result.score) + for t in result.trial_results: + logger.info(" trial=%s exit=%s score=%s", t.task_name, t.exit_code, t.score) + if t.exception_info: + logger.info(" error: %s: %s", t.exception_info.exception_type, t.exception_info.exception_message) + + +if __name__ == "__main__": + check_required() + asyncio.run(main()) From d9b8b6162aa972e6f22291a72b4d245b1649515b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=82=E6=B4=9B?= Date: Thu, 11 Jun 2026 12:33:55 +0800 Subject: [PATCH 3/4] refactor(sdk): simplify ComposeJobConfig to v2 (job_config + docker-compose.yaml) Migrate from custom compose block with hand-written runner.sh orchestration to standard docker-compose.yaml with native `docker compose up`. ROCK now only bootstraps DinD dockerd and delegates all container orchestration to compose, drastically reducing config surface and runner.sh complexity. Co-Authored-By: Claude Code AI-Model: claude-opus-4-6 AI-Contributed/Feature: 1791/1791 AI-Contributed/UT: 1342/1342 --- docs/compose-job-config-design.md | 520 ++++++++++++++ examples/job/compose/README.md | 159 ++--- examples/job/compose/compose_demo.py | 16 +- examples/job/compose/docker-compose.yaml | 96 +++ examples/job/compose/harbor_compose_demo.py | 89 +-- examples/job/compose/job_config.yaml.template | 83 +-- examples/job/compose/main.sh | 18 +- rock/sdk/job/__init__.py | 4 +- rock/sdk/job/compose/config.py | 284 +------- rock/sdk/job/compose/trial.py | 516 +++----------- rock/sdk/job/config.py | 4 +- tests/unit/sdk/job/test_compose_config.py | 649 ++++-------------- tests/unit/sdk/job/test_config.py | 61 ++ tests/unit/sdk/job/test_trial_compose.py | 632 +++++------------ 14 files changed, 1246 insertions(+), 1885 deletions(-) create mode 100644 docs/compose-job-config-design.md create mode 100644 examples/job/compose/docker-compose.yaml diff --git a/docs/compose-job-config-design.md b/docs/compose-job-config-design.md new file mode 100644 index 0000000000..a0f6b60bcf --- /dev/null +++ b/docs/compose-job-config-design.md @@ -0,0 +1,520 @@ +# ComposeJobConfig 设计方案(v2 · 标准 docker-compose) + +> 为 ROCK SDK 设计支持多容器场景的 `ComposeJobConfig`。 +> **v2 重构核心**:放弃 v1 的自定义 `compose:` 块,改为 +> **`job_config.yaml`(job 元信息 + DinD 外层沙箱)+ 用户标准 `docker-compose.yaml`(容器编排)** 的双文件方式。 +> 容器编排完全回归 Docker Compose 原生语义,ROCK 不再重新发明 init/sidecar/resources/secret 的声明式字段。 +> +> 本文档为设计方案,**不含实现代码**。 + +--- + +## 0. 设计概要(TL;DR) + +| 决策点 | v1(旧) | v2(本方案) | +|--------|---------|-------------| +| 容器编排载体 | job_config.yaml 内自定义 `compose:` 块 | **用户独立的标准 `docker-compose.yaml`** | +| 编排执行 | ComposeTrial 渲染 runner.sh,手写 `docker run` 串编排 | **`docker compose up`** 原生编排 | +| 主容器标识 | service 名固定 `main` + `--network-alias main` | **service 名硬约定为 `main`**,`--exit-code-from main` 取退出码 | +| init 容器 | `compose.init_containers[]` + runner Phase 2 串行 | **compose `depends_on: service_completed_successfully`** 原生 | +| sidecar | `compose.sidecars[]` + runner Phase 3 后台 | **compose 普通 service** + `depends_on: service_healthy` | +| 资源限制 | `ResourceSpec`(自定义四字段) | **compose `deploy.resources`** 原生 | +| secret 注入 | `secret_env`(K8s 风格自定义) | **compose `environment` / `env_file`** 原生 | +| OSS 依赖下载 | `compose.main.oss_deps[]` | **compose 里写成 init service**(用户自己拉),或主容器脚本内拉 | +| 健康探测 | `HealthSpec` + runner Phase 4 busybox nc | **compose `healthcheck`** 原生 | +| 容器间网络 | runner 建 `docker network` + `--network-alias` | **compose 默认 network**(service 名即 DNS 名) | +| 共享卷 | runner 建命名卷 + `main_mount_path` 逻辑映射 | **compose `volumes`** 原生 | +| OSS 产物上传 | `environment.oss_mirror`(复用) | **`environment.oss_mirror`(复用,唯一保留的 ROCK 收尾扩展)** | +| 类型检测特征 | `"compose" in data` | **`"compose_file" in data`**(顶层字符串指针) | +| Trial | ComposeTrial 渲染长 runner.sh | **ComposeTrial 上传 compose 文件 + 极简 runner.sh(仅 dockerd 引导 + `docker compose up`)** | + +**为什么换 v2**:v1 把 docker-compose 已经标准化的概念(depends_on / healthcheck / deploy.resources / networks / volumes)用自定义 pydantic 字段重新发明了一遍,既增加学习成本,又让 ComposeTrial 背上几百行 runner.sh 渲染逻辑。v2 让用户直接写他们已经熟悉的 `docker-compose.yaml`,ROCK 只负责:① 准备 DinD 外层沙箱;② 引导 dockerd;③ `docker compose up`;④ 收退出码 + 可选 OSS 产物上传。 + +核心层次图: + +``` +DinD 外层沙箱 ← environment (SandboxConfig): image=docker:dind, memory, cpus +└── runner.sh (ComposeTrial 生成,极简) + ├── P0: 引导并等待 dockerd 就绪 + ├── P1: docker compose -f docker-compose.yaml up --exit-code-from main --abort-on-container-exit + │ └── compose 原生编排: + │ ├── init services ← depends_on: service_completed_successfully + │ ├── sidecar services ← depends_on: service_healthy + │ └── main service ← 决定整体退出码(约定名 main) + └── P2: 收尾——docker compose logs / down,可选 oss_mirror 产物上传 +``` + +--- + +## 1. 用户目录结构 + +``` +my-compose-job/ +├── job_config.yaml # ROCK job 元信息 + DinD 外层沙箱(必须) +├── docker-compose.yaml # 标准 docker-compose 编排(必须,由 compose_file 指向) +├── main.sh # 主容器入口脚本(被 compose 的 main service 引用) +├── init/ +│ └── dependency-init.sh # init service 脚本 +└── sidecars/ + └── proxy-sidecar.sh # sidecar service 脚本 +``` + +两个文件的分工: + +| 文件 | 负责 | 由谁解析 | +|------|------|----------| +| `job_config.yaml` | ROCK 层:job_name/timeout/labels、**DinD 外层沙箱** environment、`compose_file` 指针、可选 `oss_mirror` | ROCK SDK(pydantic `ComposeJobConfig`) | +| `docker-compose.yaml` | 容器层:services、depends_on、healthcheck、deploy.resources、networks、volumes、environment/env_file | **docker compose 自己**(ROCK 不解析其内容) | + +> **关键原则**:ROCK **不解析 `docker-compose.yaml` 的内部结构**,只把它当作一个待上传、待 `docker compose up` 的文件。这意味着用户能用 compose 的全部能力(profiles、多 network、configs、secrets…),ROCK 不构成表达力上限。 + +--- + +## 2. `job_config.yaml` 示例 + +继承 BashJobConfig 的 environment 描述外层沙箱;**移除** v1 的 `compose:` 块和顶层 `script_path`(入口已在 compose 的 main service 里),新增 `compose_file` 指针。 + +```yaml +# ComposeJobConfig 示例 — claude-code-swe 多容器场景 + +# ── 继承自 JobConfig ──────────────────────────────────────────── +job_name: claude-code-swe-2026-06-09 +namespace: xrl-sandbox +experiment_id: exp-swe-bench-001 +timeout: 7200 # 整体超时(docker compose up 全生命周期) +labels: + team: xrl + task: swe-bench + +# ── ComposeJobConfig 专有:指向标准 compose 文件(本地相对路径)── +compose_file: ./docker-compose.yaml + +# ── 继承自 EnvironmentConfig(描述 DinD 外层沙箱)────────────── +environment: + image: docker:27-dind # ★ 外层沙箱镜像必须是 DinD + memory: "32g" # 外层沙箱总内存(须 ≥ 内层各容器之和) + cpus: 8 # 外层沙箱总 CPU + cluster: default + startup_timeout: 1800 + use_kata_runtime: false + + # 把 compose 文件 + 脚本目录送进沙箱(compose 文件里用相对/绝对路径引用) + uploads: + - ["./docker-compose.yaml", "/rock/compose/docker-compose.yaml"] + - ["./main.sh", "/rock/compose/main.sh"] + - ["./init", "/rock/compose/init"] + - ["./sidecars", "/rock/compose/sidecars"] + + # 沙箱级环境变量(注入外层,docker compose 可读,用于 compose 内 ${VAR} 插值) + env: + OSS_BUCKET: xrl-artifacts + OSS_ENDPOINT: oss-cn-hangzhou-internal.aliyuncs.com + OSS_ACCESS_KEY_ID: "" + OSS_ACCESS_KEY_SECRET: "" + + # 产物上传(复用现有机制,compose 结束后由 runner P2 执行) + oss_mirror: + enabled: true + oss_bucket: xrl-artifacts +``` + +要点: + +- **没有 `script_path`**:主容器入口完全由 `docker-compose.yaml` 的 main service(`command:` / `entrypoint:` / 镜像默认)决定。 +- **`compose_file`** 是本地相对路径,ROCK 会把它 upload 到沙箱并在 `docker compose -f <沙箱内路径>` 中引用(见 §4)。 +- **`environment.env`** 注入到外层沙箱,`docker compose` 执行时这些值可用于 compose 文件里的 `${VAR}` 插值——这是把外层凭证传给内层 service 的标准通道。 + +--- + +## 3. `docker-compose.yaml` 示例(标准语义) + +这是用户写的**纯标准 compose 文件**,ROCK 不解析。下面示例展示如何用原生 compose 表达 v1 那套 init/sidecar/main/资源/健康/共享卷。 + +```yaml +# docker-compose.yaml — claude-code-swe 场景(纯标准 compose) +name: rock-compose-swe + +networks: + default: + name: rock_compose # service 名即 DNS 名,main 用 http://proxy:8082 访问 + +volumes: + shared: # 跨容器共享卷(替代 v1 的命名卷 + main_mount_path) + +services: + # ── init service:主容器前置依赖,跑完即退(service_completed_successfully)── + dependency-init: + image: code-agi-registry/claude-code-swe:20260508 + command: ["bash", "/rock/compose/init/dependency-init.sh"] + volumes: + - shared:/var/lib/dependency + - /rock/compose:/rock/compose:ro + environment: + OSS_BUCKET: "${OSS_BUCKET}" # 来自外层沙箱 environment.env,compose 插值注入 + OSS_ACCESS_KEY_ID: "${OSS_ACCESS_KEY_ID}" + OSS_ACCESS_KEY_SECRET: "${OSS_ACCESS_KEY_SECRET}" + + # ── sidecar service:与 main 并行;service 名 proxy 即 network-alias ── + proxy: + image: agent-platform-registry/claude-code-proxy:latest + command: ["bash", "/rock/compose/sidecars/proxy-sidecar.sh"] + volumes: + - /rock/compose:/rock/compose:ro + deploy: + resources: + limits: + cpus: "1" + memory: 1g + healthcheck: # 原生健康探测,替代 v1 HealthSpec + test: ["CMD", "nc", "-z", "localhost", "8082"] + interval: 5s + timeout: 3s + retries: 12 # ≈ 60s 就绪窗口 + + # ── main service:约定名 main,决定整体退出码 ── + main: + image: code-agi-registry/claude-code-swe:20260508 + command: ["bash", "/rock/compose/main.sh"] + depends_on: + dependency-init: + condition: service_completed_successfully # init 跑完才启动 main + proxy: + condition: service_healthy # proxy 就绪才启动 main + volumes: + - shared:/var/lib/dependency:ro # 读 init 写入的共享卷 + - /rock/compose:/rock/compose:ro + deploy: + resources: + limits: + cpus: "4" + memory: 16g + environment: + DATASET: princeton-nlp/SWE-bench_Verified + SPLIT: test + MODEL: claude-opus-4-8 + ANTHROPIC_BASE_URL: http://proxy:8082 # service 名做主机名 + ANTHROPIC_API_KEY: "${ANTHROPIC_API_KEY}" # 从外层 env / host env 插值 +``` + +**v1 自定义字段 → v2 标准 compose 的映射**(用户写 compose 时照此翻译): + +| v1 ComposeJobConfig 字段 | v2 docker-compose.yaml | +|--------------------------|------------------------| +| `compose.main` | `services.main` | +| 顶层 `script_path: ./main.sh` | `services.main.command: ["bash", "/rock/compose/main.sh"]` | +| `compose.init_containers[]` | `services.` + `main.depends_on..condition: service_completed_successfully` | +| `compose.sidecars[]` | `services.`(普通 service) | +| `SidecarSpec.health` | `services..healthcheck` + `main.depends_on..condition: service_healthy` | +| `ResourceSpec.cpus/memory/*_limit` | `services..deploy.resources.{reservations,limits}` | +| `secret_env`(K8s 风格) | `services..environment` / `env_file`(值用 `${VAR}` 从外层注入) | +| `OssDep.oss_deps[]` | 写成 init service 拉取,或主容器脚本内拉(见 §6.1) | +| `VolumeMount` + `main_mount_path` | 标准命名 `volumes:` + 各 service 自己的 `volumes:` 挂载点 | +| `--network-alias ` | compose 默认 network 下 service 名即 DNS 名(零配置) | +| `command` / `args` / `privileged` | `services..command` / `entrypoint` / `privileged: true` | + +--- + +## 4. ComposeJobConfig 数据模型(Python) + +v2 模型大幅瘦身——不再有 ResourceSpec / VolumeMount / SecretEnvEntry / OssDep / HealthSpec / 各 ContainerSpec / ComposeSpec。只剩一个顶层 `compose_file` 字符串。 + +```python +from __future__ import annotations + +from datetime import datetime +from pathlib import Path + +from pydantic import ConfigDict, Field, model_validator + +from rock.sdk.job.config import JobConfig + + +class ComposeJobConfig(JobConfig): + """Docker Compose 多容器 Job 配置(v2:标准 compose 文件)。 + + 与 BashJobConfig 平级,直接继承 JobConfig。不再有顶层 script/script_path—— + 主容器入口由 docker-compose.yaml 的 main service 决定。 + + 类型检测特征:YAML 中存在 ``compose_file`` 键即识别为本类型。 + """ + + model_config = ConfigDict(extra="forbid") + + job_name: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d__%H-%M-%S")) + + # 必填,存在即标识 ComposeJobConfig。本地路径(相对 job_config.yaml)。 + compose_file: str + + # 任一容器退出即停止整组(docker compose up --abort-on-container-exit)。 + # 默认开启;置 false 可让 sidecar 崩溃不阻断 main。 + abort_on_container_exit: bool = True + + @model_validator(mode="after") + def _validate_compose_file(self) -> "ComposeJobConfig": + # 仅做存在性 / 后缀的轻量校验;不解析 compose 内部结构。 + if not self.compose_file: + raise ValueError("compose_file 不能为空") + return self +``` + +> **主 service 名硬约定为 `main`**:runner 用 `--exit-code-from main` 取退出码(见 §5),不引入可配置字段,保持约定优先、零配置。 +> **`abort_on_container_exit`**:默认开启(任一 service 退出即收敛整组);需要"sidecar 崩溃不阻断 main"时显式置 `false`,runner 据此决定是否给 `docker compose up` 加 `--abort-on-container-exit`(见 §6)。 + +### `JobConfig.from_yaml()` 检测分支(修改) + +检测顺序:**Harbor → Compose → Bash**。特征字段从 `"compose"` 改为 **`"compose_file"`**。 + +```python +# JobConfig.from_yaml() 内(lazy import 避免循环依赖) +# 1) Harbor:特征是 required experiment_id +try: + return HarborJobConfig.model_validate(data) +except (ValidationError, ValueError) as exc: + harbor_error = exc + +# 2) Compose:特征是存在 "compose_file" 键(v2 变更点) +if "compose_file" in data: + from rock.sdk.job.compose.config import ComposeJobConfig + try: + return ComposeJobConfig.model_validate(data) + except (ValidationError, ValueError) as exc: + compose_error = exc + +# 3) Bash:兜底 +try: + return BashJobConfig.model_validate(data) +except (ValidationError, ValueError) as exc: + bash_error = exc +``` + +**向后兼容**:现有 Bash/Harbor YAML 无 `compose_file` 键,零影响;含 `compose_file` 的 YAML 因 Bash/Harbor 的 `extra="forbid"` 被它们拒绝,唯一落到 ComposeJobConfig。 + +--- + +## 5. ComposeTrial 逻辑设计 + +继承 `AbstractTrial`,三段式接口,末尾 `register_trial(ComposeJobConfig, ComposeTrial)`。相比 v1,runner.sh 从几百行的多 Phase 编排**缩到只剩 dockerd 引导 + `docker compose up`**。 + +```python +class ComposeTrial(AbstractTrial): + """在 DinD 沙箱内用 `docker compose` 编排多容器的 Trial。 + + setup() → 上传 compose 文件 + 脚本 + 凭证,生成极简 runner.sh + build() → 返回 "bash /rock/runner.sh" + collect()→ runner.sh 退出码 = main service 退出码(--exit-code-from main) + """ + _config: ComposeJobConfig +``` + +### 5.1 `on_sandbox_ready()` + +调 `super().on_sandbox_ready(sandbox)` 回填 `namespace` / `experiment_id`。把 **OSS 凭证**解析后写入 `environment.env`,供 ① `docker compose` 插值 ② P2 产物上传使用(同 BashTrial 思路)。 + +### 5.2 `setup()` + +```python +async def setup(self, sandbox: Sandbox) -> None: + await self._upload_files(sandbox) # 上传 environment.uploads(含 compose 文件 + 脚本) + + runner = self._render_runner_sh() # 极简模板,几乎无条件渲染 + await sandbox.fs.write_text("/rock/runner.sh", runner) + await sandbox.arun("chmod +x /rock/runner.sh") +``` + +不再需要 `_materialize_inline_scripts` / `ensure_ossutil` / 各容器 docker run 渲染——这些要么交给 compose,要么交给用户脚本。 + +### 5.3 `build()` + +```python +def build(self) -> str: + return "bash /rock/runner.sh" +``` + +### 5.4 `collect()` + +```python +async def collect(self, sandbox, output, exit_code) -> TrialResult: + exc = None + if exit_code != 0: + exc = ExceptionInfo( + exception_type="ComposeMainServiceFailed", + exception_message=f"main service exited with {exit_code}", + ) + # 各 service 日志由 runner P2 `docker compose logs` 落到 /rock/logs/compose.log + obs = await sandbox.arun("cat /rock/logs/compose.log 2>/dev/null || true") + return TrialResult( + task_name=self._config.job_name or "", + exception_info=exc, + raw_output=output, + exit_code=exit_code, + ) +``` + +--- + +## 6. runner.sh 生命周期(v2 极简版) + +runner.sh 在 **DinD 外层沙箱** 内运行;退出码 = main service 退出码。整体只有 3 个 Phase。 + +```bash +#!/bin/bash +# runner.sh — ROCK ComposeJob 运行时(v2:委托 docker compose) +set -uo pipefail +COMPOSE_FILE="/rock/compose/docker-compose.yaml" +LOG_DIR="/rock/logs"; mkdir -p "$LOG_DIR" +RUNNER_EXIT=0 + +cleanup_all() { + docker compose -f "$COMPOSE_FILE" logs --no-color > "$LOG_DIR/compose.log" 2>&1 || true + docker compose -f "$COMPOSE_FILE" down -v --remove-orphans >/dev/null 2>&1 || true +} +trap cleanup_all EXIT +trap 'RUNNER_EXIT=143; exit 143' TERM INT + +# ── P0: 引导并等待 dockerd 就绪 ── +echo "[runner] P0: wait docker daemon" +if ! docker info >/dev/null 2>&1; then + if ! pgrep -x dockerd >/dev/null 2>&1; then + PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin \ + DOCKER_IGNORE_BR_NETFILTER_ERROR=1 nohup dockerd >/var/log/dockerd.log 2>&1 & + fi +fi +for i in $(seq 1 30); do docker info >/dev/null 2>&1 && break; sleep 2; done +docker info >/dev/null 2>&1 || { echo "docker daemon not ready"; exit 1; } + +# 可选:私有 registry 登录(凭证来自 SandboxConfig.registry_*) +if [ -n "${REGISTRY_USERNAME:-}" ]; then + docker login "${REGISTRY_HOST:-}" -u "$REGISTRY_USERNAME" -p "$REGISTRY_PASSWORD" >/dev/null 2>&1 || true +fi + +# ── P1: docker compose up,主 service 退出即收敛 ── +# __ABORT_FLAG__ 由 ComposeTrial 渲染:abort_on_container_exit=True → +# "--abort-on-container-exit",否则为空字符串。 +echo "[runner] P1: docker compose up" +docker compose -f "$COMPOSE_FILE" up \ + __ABORT_FLAG__ \ + --exit-code-from main 2>&1 | tee "$LOG_DIR/up.log" +RUNNER_EXIT=${PIPESTATUS[0]} +echo "[runner] main service exited rc=$RUNNER_EXIT" + +# ── P2: 可选 OSS 产物上传(仅当 environment.oss_mirror.enabled)── +__PHASE2_OSS_UPLOAD__ + +exit "$RUNNER_EXIT" +``` + +各 Phase 要点: + +| Phase | 行为 | 失败语义 | +|-------|------|----------| +| 0 | 引导 dockerd、等就绪、可选私有 registry 登录 | dockerd 60s 未就绪 → exit 1 | +| 1 | `docker compose up [--abort-on-container-exit] --exit-code-from main` | main service 退出码即整体结果;`abort_on_container_exit=True`(默认)时任一容器退出触发 abort | +| 2 | 可选 OSS 产物上传(ossutil cp) | 收尾失败不改变 RUNNER_EXIT | +| EXIT(trap) | `docker compose logs` 落盘 + `docker compose down -v` 清理 | 始终执行 | + +> `--abort-on-container-exit` 让任一 service 退出即停止整组;`--exit-code-from main` 保证整体退出码取自 main service。init service 跑完会"退出",但因为它是 main 的 `depends_on: service_completed_successfully` 前置、在 main 启动前就结束,配合 compose 的依赖编排不会误触 abort 主流程——这是标准 compose 行为,无需 ROCK 干预。 + +--- + +## 7. 关键机制(v2:几乎全部委托 compose) + +### 7.1 容器间网络 +compose 默认建一个 project network,**service 名即 DNS 名**。main 直接 `http://proxy:8082` 访问 proxy service。零配置,无需 runner 建 network / `--network-alias`。 + +### 7.2 共享卷 +compose 顶层 `volumes:` 声明命名卷,各 service 在自己的 `volumes:` 里挂到各自路径。v1 的 `main_mount_path` 逻辑映射消失——init 写 `/var/lib/dependency`、main 读 `/var/lib/dependency:ro`,各 service 挂载点本就独立,是 compose 原生能力。 + +### 7.3 secret / 凭证注入 +两级: +1. **外层 → compose 插值**:`environment.env` 注入外层沙箱,`docker-compose.yaml` 里用 `${VAR}` 引用,compose 执行时插值。 +2. **service 级**:compose 的 `environment:` / `env_file:` 直接写。 +**安全边界**:仍仅防 YAML 明文(值会出现在容器 env / `docker inspect`),不防沙箱内进程读取。需更强隔离用 compose `secrets:` + 文件挂载。 + +### 7.4 资源限制 +compose `deploy.resources.{limits,reservations}.{cpus,memory}` 原生表达。外层 `environment.cpus/memory` 是总上限,内层之和须 ≤ 外层——v2 **不再做 Python 侧 `_resource_budget_check`**(ROCK 不解析 compose 内容),由用户自行保证;如需校验可作为后续可选增强(解析 compose 求和)。 + +### 7.5 DinD 边界问题 +- **daemon 就绪**:P0 引导 + 轮询 `docker info`(kata 后端 dockerd 不自启,需 ROCK 引导,见 P0 注释)。 +- **私有镜像**:P0 `docker login`,凭证来自 `SandboxConfig.registry_username/password`。 +- **容器清理**:`trap EXIT → docker compose down -v --remove-orphans`,比 v1 按 `$$` 过滤 `docker rm` 更干净。 + +### 7.6 init / 依赖编排 +完全交给 compose `depends_on` 的 condition: +- `service_completed_successfully` — init 跑完且成功才启动依赖方;init 失败则 compose 直接报错、main 不启动。 +- `service_healthy` — sidecar healthcheck 通过才启动 main,替代 v1 的 busybox nc 探测。 + +--- + +## 8. 三类 Job 对比表(v2) + +| 维度 | BashJobConfig | HarborJobConfig | ComposeJobConfig (v2) | +|------|--------------|----------------|------------------| +| 容器数量 | 1(沙箱内直跑脚本) | 1 主 + Harbor 内部编排 | N services,DinD 内 `docker compose` 编排 | +| 编排载体 | 顶层 `script_path` | `harbor jobs start -c` | **独立 `docker-compose.yaml`** | +| 主容器入口 | 顶层 `script_path` | harbor CLI | compose **main service**(约定名) | +| 外层沙箱 | `environment` | `environment`(Rock 扩展) | `environment`,**image 须为 dind** | +| 内层容器资源 | 无 | Harbor override | **compose `deploy.resources`** | +| OSS 产物上传 | `environment.oss_mirror` | `environment.oss_mirror` | `environment.oss_mirror`(复用) | +| OSS 依赖下载 | 无内建 | `environment.oss_deps` | **compose init service / 主容器脚本** | +| Secret 注入 | `environment.env`(明文) | `environment.env` | **compose `environment`/`env_file`** + 外层 `${VAR}` 插值 | +| init / 依赖 | 无 | Harbor 内部 | **compose `depends_on` condition** | +| 健康探测 | 无 | Harbor 内部 | **compose `healthcheck`** | +| YAML 特征字段 | 无(兜底) | `experiment_id`(required) | **`compose_file`(存在即识别)** | +| Trial 类 | `BashTrial` | `HarborTrial` | `ComposeTrial`(极简 runner) | +| 沙箱内执行 | `sandbox.nohup(script)` | harbor CLI | **`docker compose up --exit-code-from main`** | +| dockerd 依赖 | ❌ | ✅ | ✅ | +| collect 产物 | `TrialResult` | `list[TrialResult]` | `TrialResult`(+`docker compose logs`) | + +--- + +## 9. 集成与边界问题清单(v2) + +| # | 问题 | 处理 | +|---|------|------| +| 1 | `from_yaml` 检测顺序 | Harbor → Compose(`"compose_file" in data`) → Bash;三者 extra=forbid 互斥 | +| 2 | Registry 注册 | `register_trial(ComposeJobConfig, ComposeTrial)`;确保 `compose/trial.py` 被 import 链触发 | +| 3 | timeout 语义 | `JobConfig.timeout` = 整体超时(`docker compose up` 全程);service 级超时用 compose `healthcheck`/脚本自管 | +| 4 | 主 service 识别 | 硬约定为 `main`;`--exit-code-from main` 取退出码,无可配置字段 | +| 5 | compose 文件上传 | 通过 `environment.uploads` 显式上传 compose 文件 + 脚本到 `/rock/compose/`;runner `-f` 引用沙箱内路径 | +| 6 | compose 文件内部 | ROCK **不解析**;表达力 = docker compose 全集;校验交给 `docker compose config`(可选 setup 阶段预检) | +| 7 | init 失败 | compose `service_completed_successfully` 失败 → compose 报错,main 不启动,runner 非 0 退出 | +| 8 | sidecar 崩溃 | 默认 `abort_on_container_exit=True` 让整组停止;需 sidecar 崩溃不阻断 main 时置 `false`,runner 去掉 `--abort-on-container-exit`(此时 main 跑完仍正常收敛) | +| 9 | 两层资源混淆 | v2 不在 Python 侧求和校验(不解析 compose);文档提示用户外层 ≥ 内层之和 | +| 10 | 向后兼容 | 现有 Bash/Harbor YAML 无 `compose_file` 键,零影响 | +| 11 | secret 安全边界 | 文档明确:仅防 YAML 明文,不防沙箱内读取;更强隔离用 compose `secrets:` | +| 12 | 内联脚本 | v2 不再支持 job_config 内联 script;脚本走 uploads + compose `command` 引用,路径统一 `/rock/compose/` | + +--- + +## 10. 从 v1 迁移指南 + +已按 v1 写过 `compose:` 块的用户,按下表把单文件拆成 `job_config.yaml` + `docker-compose.yaml`: + +| v1 位置 | v2 去向 | +|---------|---------| +| 顶层 `job_name/namespace/experiment_id/timeout/labels` | 留在 `job_config.yaml`(不变) | +| 顶层 `script_path` | 删除;改写为 `docker-compose.yaml` 里 main service 的 `command` | +| `environment.*`(外层沙箱) | 留在 `job_config.yaml`(不变),uploads 增加 compose 文件 | +| `environment.oss_mirror` | 留在 `job_config.yaml`(不变) | +| `compose.main` | `docker-compose.yaml` → `services.main` | +| `compose.init_containers[]` | `services.` + main `depends_on.: service_completed_successfully` | +| `compose.sidecars[]` | `services.`(+ `healthcheck` + main `depends_on.: service_healthy`) | +| `*.resources` | `services..deploy.resources` | +| `*.secret_env` | `services..environment`(值用 `${VAR}` 从外层 env 插值) | +| `main.oss_deps[]` | 写成 init service 拉取,或 main 脚本内拉 | +| `volume_mounts` + `main_mount_path` | 顶层 `volumes:` + 各 service `volumes:` 挂载点 | +| 新增 | `job_config.yaml` 顶层加 `compose_file: ./docker-compose.yaml` | + +--- + +## 11. 后续实现建议(非本设计范围) + +1. **改造模块**:`rock/sdk/job/compose/{config.py, trial.py}` —— config 删掉 v1 的全部子模型,trial 删掉 runner 多 Phase 渲染逻辑,runner.sh 模板换成 §6 极简版。 +2. **TDD**: + - `from_yaml` 三类型识别参数化测试(特征字段改 `compose_file`); + - `ComposeJobConfig` 校验测试(`compose_file` 必填、`abort_on_container_exit` 默认 True、extra forbid); + - runner.sh 渲染快照测试(`ABORT_FLAG` / OSS_UPLOAD 两个占位符)。 +3. **集成测试**:标 `@pytest.mark.need_admin`,在真实 DinD 沙箱内跑一个最小 `main + proxy(healthcheck)` compose 用例,验证 `--exit-code-from main` 退出码透传。 +4. **可选增强**: + - setup 阶段 `docker compose config -q` 预检 compose 文件合法性(fail-fast); + - 解析 compose 求 `deploy.resources` 之和,与外层资源比较给 warning(恢复 v1 的 budget check,但作为可选项)。 diff --git a/examples/job/compose/README.md b/examples/job/compose/README.md index cb618636d0..6d87136e01 100644 --- a/examples/job/compose/README.md +++ b/examples/job/compose/README.md @@ -1,51 +1,72 @@ -# ComposeJobConfig 端到端用例:harbor + cc-proxy +# ComposeJobConfig (v2) 端到端用例:harbor + cc-proxy -本目录展示如何用 `ComposeJobConfig` 在 ROCK DinD 沙箱内运行 harbor 任务 +本目录展示如何用 `ComposeJobConfig` v2 在 ROCK DinD 沙箱内运行 harbor 任务 (claude-code agent 跑 terminal-bench / aone-bench-java100)。 +v2 的核心变化:容器编排从 job_config 内的自定义 `compose:` 块迁移到标准 `docker-compose.yaml`。 +ROCK 不再解析 compose 内部结构,只负责:① 准备 DinD 外层沙箱;② 引导 dockerd; +③ `docker compose up --exit-code-from main`;④ 收退出码 + 可选 OSS 产物上传。 + ## 目录结构 ``` examples/job/compose/ ├── harbor_compose_demo.py # ★ 开箱即用 demo(凭证走环境变量,内置所有真机 fix) -├── .env.example # 凭证模板(cp 成 .env 填值后 source) ├── compose_demo.py # 通用入口(-c 读 YAML,适合自定义 config) -├── job_config.yaml.template # ComposeJobConfig YAML 模板(含占位符) -├── main.sh # 主容器入口脚本(harbor runner,原 Agent-Hub/task/harbor/main.sh + 两层适配) +├── job_config.yaml.template # ComposeJobConfig YAML 模板(v2:只含 compose_file 指针) +├── docker-compose.yaml # ★ 标准 compose 编排(main + proxy sidecar) +├── main.sh # 主容器入口脚本(harbor runner) └── sidecars/ - └── proxy-sidecar.sh # cc-proxy sidecar 脚本(原 Agent-Hub/task/harbor/proxy-sidecar.sh) + └── proxy-sidecar.sh # cc-proxy sidecar 脚本 ``` -## 快速开始(推荐:harbor_compose_demo.py) +两个文件的分工: -这是经过 ROCK 真机端到端验证的脚本,对应你的 AP harbor 命令,所有 dockerd/网络/挂载 -修正都已内置,只需配凭证: +| 文件 | 负责 | 由谁解析 | +|------|------|----------| +| `job_config.yaml` | ROCK 层:job_name/timeout/labels、DinD 外层沙箱 environment、`compose_file` 指针、可选 `oss_mirror` | ROCK SDK | +| `docker-compose.yaml` | 容器层:services、depends_on、healthcheck、deploy.resources、networks、volumes、environment | **docker compose 自己**(ROCK 不解析) | + +## 快速开始(推荐:harbor_compose_demo.py) ```bash cd examples/job/compose -cp .env.example .env # 填入 ROCK_TOKEN / MODEL_* / OSS_* 凭证 -source .env +# 配置凭证 +export ROCK_TOKEN='' +export MODEL='claude-opus-4-8' +export MODEL_BASE_URL='https://api.anthropic.com/v1' +export MODEL_API_KEY='sk-ant-...' +export OSS_BUCKET='' +export OSS_ENDPOINT='oss-cn-hangzhou-internal.aliyuncs.com' +export OSS_REGION='cn-hangzhou' +export OSS_ACCESS_KEY_ID='' +export OSS_ACCESS_KEY_SECRET='' + +# 运行 uv run python harbor_compose_demo.py ``` -**OSS 凭证是必需的**:harbor 从 OSS 下载 dataset(`terminal-bench/aone-bench-java100`)。 -AP 平台自动注入 OSS 凭证,SDK 直连模式需你在 `.env` 里显式提供 -(OSS_BUCKET / OSS_ENDPOINT / OSS_REGION / OSS_ACCESS_KEY_ID / OSS_ACCESS_KEY_SECRET)。 +**OSS 凭证是必需的**:harbor 从 OSS 下载 dataset。AP 平台自动注入 OSS 凭证,SDK 直连模式需显式提供。 -任务参数(INSTANCE_ID / DATASET / HARBOR_AGENT 等)都有默认值(对应 AP `-p`), -可用环境变量覆盖,详见 `harbor_compose_demo.py` 顶部 docstring。 +任务参数(INSTANCE_ID / DATASET / HARBOR_AGENT 等)都有默认值(对应 AP `-p`),可用环境变量覆盖, +详见 `harbor_compose_demo.py` 顶部 docstring。 -## 运行方案:runner.sh 在外层沙箱主动启动 dockerd +## v2 运行方案 -> 以下要点均经过 ROCK 真实后端(`xrl.alibaba-inc.com` / `vpc-sg-a`,kata runtime)端到端验证。 +### 两层结构 -外层沙箱镜像**必须自带 `docker` / `dockerd` / `containerd` / `runc`**(例如 harbor runner 镜像)。 +``` +DinD 外层沙箱(外层镜像,自带 docker 工具链) +└── runner.sh(ComposeTrial 生成,极简:dockerd 引导 + docker compose up) + └── docker-compose.yaml(用户标准 compose 编排) + ├── proxy service ← main depends_on(service_started) + └── main service ← 决定整体退出码(约定名 main) +``` -**重要**:不要用 `docker:27-dind` 作外层镜像 —— 实测该镜像变体在 ROCK kata 沙箱内 -缺少 `containerd`,dockerd 无法启动。请用一个预装完整 docker 工具链的业务镜像。 +### dockerd 引导(P0) -**dockerd 不会自动启动**:ROCK kata 沙箱进入时没有运行 dockerd。`ComposeTrial` -生成的 `runner.sh` 会在 P0 阶段主动启动它,并内置两个 kata 环境必需的修正: +ROCK kata 沙箱进入时没有运行 dockerd。`ComposeTrial` 生成的 `runner.sh` 会在 P0 阶段主动启动, +并内置两个 kata 环境必需的修正: ```bash PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin \ @@ -53,57 +74,38 @@ DOCKER_IGNORE_BR_NETFILTER_ERROR=1 \ nohup dockerd >/var/log/dockerd.log 2>&1 & ``` -- **显式 PATH**:`nohup` 启动的 dockerd 不继承交互 shell 的 PATH,否则报 - `containerd executable file not found`。 -- **`DOCKER_IGNORE_BR_NETFILTER_ERROR=1`**:kata guest 缺 - `/proc/sys/net/bridge/bridge-nf-call-iptables`,否则 bridge 网络初始化失败。 +- **显式 PATH**:`nohup` 启动的 dockerd 不继承交互 shell 的 PATH,否则报 `containerd executable file not found`。 +- **`DOCKER_IGNORE_BR_NETFILTER_ERROR=1`**:kata guest 缺 `/proc/sys/net/bridge/bridge-nf-call-iptables`, + 否则 bridge 网络初始化失败。 -这些已固化在 `rock/sdk/job/compose/trial.py` 的 runner 模板里,用户无需关心。 +### compose 编排要点 -### 三层 DinD 结构(注意事项) +- **service 名 = DNS 名**:`proxy` service 在同一 compose network 里直接用 `http://proxy:8082` 访问。 +- **依赖编排**:main `depends_on: proxy: condition: service_started`,无需 runner.sh 里的 busybox nc 探测逻辑。 +- **资源限制**:`deploy.resources.limits` 原生表达,外层 cpus/memory ≥ 内层各容器之和。 +- **凭证注入**:`environment.env` 注入外层沙箱,compose 文件里用 `${VAR}` 插值传给内层 service。 -``` -ROCK 外层 kata 沙箱(业务镜像,自带 docker 工具链) -└── runner.sh(ComposeTrial 生成,P0 启动 dockerd,再用 docker CLI 编排) - ├── proxy sidecar 容器(cc-proxy,监听 8082,--network-alias proxy) - └── main 容器(harbor runner,privileged=true,挂载 /rock/scripts) - └── harbor CLI → 在内层再起 task env 容器(第三层) -``` +> ⚠️ **healthcheck + service_healthy 的陷阱(真机验证踩坑)**:本例 proxy-sidecar.sh 会**按 agent 类型自门控**—— +> 对 `agent=claude-code` 它打印 `Proxy not needed ... sleeping` 后直接 sleep,**不监听 8082 端口**。 +> 若给 proxy 配端口 healthcheck + main `depends_on: service_healthy`,proxy 永远不健康 → +> main **根本不启动** → `docker compose up` 返回 rc=1。 +> 因此本例改用 `condition: service_started`(只等 proxy 容器起来,不等"健康")。 +> 经验:**只有当 sidecar 真的会进入监听态时才用 service_healthy**;自门控/可空跑的 sidecar 用 service_started。 -**关键约束**: -- 外层沙箱须开启 `use_kata_runtime: true` -- main 容器须 `privileged: true`(harbor 内部起 docker 容器需要) -- runner.sh 自动把外层 `/rock/scripts` 以 `-v /rock/scripts:/rock/scripts:ro` - 挂载进 main / init / sidecar 容器,使各容器能执行上传的脚本 -- 主容器入口固定为 `bash /rock/scripts/main.sh`,因此**业务镜像须自带 `bash`** -- proxy 访问:sidecar 以 `--network-alias proxy` 注册,同一 compose network 内 - main 容器可用 `http://proxy:8082` 直达。注意:原始 Agent-Hub main.sh 用 - `docker network inspect bridge` 取 gateway IP 访问 proxy,这是 K8s 同 Pod 模型; - 在 compose 独立容器模型下若 proxy 不在 main 的 bridge 网络,需改用 `proxy:8082` - alias(见 main.sh 顶部注释) +### 沙箱内文件布局 -## 环境变量 +uploads 后的路径(由 `environment.uploads` 配置): -运行前需导出以下环境变量(或写入 `.env` 后 `source .env`): - -```bash -# 模型 -export MODEL=claude-opus-4-8 -export MODEL_API_KEY= -export MODEL_BASE_URL= - -# ROCK 集群 -export ROCK_TOKEN= - -# OSS 凭证 -export OSS_ACCESS_KEY_ID= -export OSS_ACCESS_KEY_SECRET= -export OSS_REGION=cn-hangzhou -export OSS_ENDPOINT=oss-cn-hangzhou-internal.aliyuncs.com -export OSS_BUCKET= +``` +/rock/compose/docker-compose.yaml ← 来自本地 docker-compose.yaml +/rock/compose/main.sh ← 来自本地 main.sh +/rock/compose/sidecars/ ← 来自本地 sidecars/ +/rock/runner.sh ← ComposeTrial 生成(极简模板) +/rock/logs/compose.log ← runner.sh trap EXIT 收集的 docker compose logs +/rock/logs/up.log ← docker compose up 实时输出(tee) ``` -## 使用步骤 +## 使用步骤(compose_demo.py + YAML) ### 1. 准备配置文件 @@ -117,12 +119,12 @@ cp examples/job/compose/job_config.yaml.template examples/job/compose/job_config | 占位符 | 说明 | |--------|------| | `` | ROCK 集群认证 token | +| `` | harbor runner 镜像(含 harbor CLI + claude-code + docker 工具链) | +| `` | claude-code proxy 镜像 | | `` | 模型名,e.g. `claude-opus-4-8` | | `` | 模型 API Key | | `` | 模型 Base URL | | `` | 任务 ID,e.g. `mailman` | -| `` | harbor runner 镜像(含 harbor CLI + claude-code) | -| `` | claude-code proxy 镜像 | | `` | OSS 凭证和 bucket 信息 | ### 2. 运行 @@ -131,19 +133,22 @@ cp examples/job/compose/job_config.yaml.template examples/job/compose/job_config python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml ``` -### 3. 查看结果 - -脚本会打印 `exit_code`、`score` 和各 trial 结果。 -产物(harbor_stdout.txt、result.json、metrics.json 等)通过 `oss_mirror` 上传到 OSS。 - -## proxy sidecar 端口 +## v1 → v2 迁移对照 -`proxy-sidecar.sh`(原 Agent-Hub/task/harbor/proxy-sidecar.sh)监听端口 **8082**。 -配置中 `sidecars[].health.port = 8082`,runner.sh 会在主容器启动前探测该端口就绪。 +| v1 位置 | v2 去向 | +|---------|---------| +| 顶层 `script_path` | 删除;改写为 `docker-compose.yaml` 里 main service 的 `command` | +| `compose.main` | `services.main` | +| `compose.init_containers[]` | `services.` + main `depends_on.: service_completed_successfully` | +| `compose.sidecars[]` | `services.`(监听态 sidecar 用 `healthcheck` + `depends_on: service_healthy`;自门控/可空跑的用 `service_started`) | +| `*.resources` | `services..deploy.resources` | +| `*.secret_env` | `services..environment`(值用 `${VAR}` 从外层 env 插值) | +| 新增 | `job_config.yaml` 顶层加 `compose_file: ./docker-compose.yaml` | ## 与 HarborJobConfig 的对比 本示例展示 ComposeJobConfig 的表达能力。实际上,对于"调 harbor CLI 跑 benchmark"的场景, `HarborJobConfig` 更原生(自带 agents/datasets/verifier 结构化支持)。 -ComposeJobConfig 更适合:自己掌控每个容器镜像和脚本、容器间是简单"主 + sidecar + init"拓扑。 +ComposeJobConfig v2 更适合:自己掌控每个容器镜像和脚本、用标准 compose 语义表达复杂编排 +(init 依赖、healthcheck、资源限制、共享卷、多 network 等)。 diff --git a/examples/job/compose/compose_demo.py b/examples/job/compose/compose_demo.py index 72449d971a..98c3a4a747 100644 --- a/examples/job/compose/compose_demo.py +++ b/examples/job/compose/compose_demo.py @@ -1,15 +1,17 @@ -"""ComposeJobConfig end-to-end demo using ROCK Job SDK. +"""ComposeJobConfig (v2) end-to-end demo using ROCK Job SDK. Runs a harbor task (claude-code agent on terminal-bench / aone-bench-java100) -using ``ComposeJobConfig`` — the multi-container compose variant of JobConfig. +using ``ComposeJobConfig`` v2 — container orchestration is fully delegated to +a standard ``docker-compose.yaml`` file. -The outer DinD sandbox (docker:27-dind) provides the Docker daemon. -Inside, runner.sh orchestrates: - - main container → harbor runner (main.sh) - - proxy sidecar → claude-code proxy (port 8082) +Layout inside the DinD sandbox after uploads: + /rock/compose/docker-compose.yaml ← uploaded from compose_file + /rock/compose/main.sh ← harbor runner entry-point + /rock/compose/sidecars/ ← sidecar scripts + /rock/runner.sh ← generated by ComposeTrial (minimal) Usage: - python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml.template + python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml Required environment variables (forwarded into the sandbox via environment.env): MODEL Model name, e.g. claude-opus-4-8 diff --git a/examples/job/compose/docker-compose.yaml b/examples/job/compose/docker-compose.yaml new file mode 100644 index 0000000000..06414f4c82 --- /dev/null +++ b/examples/job/compose/docker-compose.yaml @@ -0,0 +1,96 @@ +# docker-compose.yaml — harbor compose job (v2: standard compose orchestration) +# +# This file is uploaded by ComposeJobConfig (via environment.uploads) to +# /rock/compose/docker-compose.yaml inside the DinD sandbox, then executed +# by runner.sh as: +# docker compose -f /rock/compose/docker-compose.yaml up \ +# --abort-on-container-exit --exit-code-from main +# +# Environment variables with ${VAR} syntax are interpolated from the outer +# DinD sandbox's environment (injected via environment.env in job_config.yaml). +# This is the standard channel for passing credentials into inner containers. + +name: rock-compose-harbor + +networks: + default: + name: rock_compose_harbor # service name = DNS hostname (e.g. http://proxy:8082) + +services: + # ── proxy sidecar: claude-code proxy, must be healthy before main starts ── + proxy: + image: ${PROXY_IMAGE} + command: ["bash", "/rock/compose/sidecars/proxy-sidecar.sh"] + volumes: + - /rock/compose:/rock/compose:ro + environment: + MODEL: "${MODEL}" + MODEL_API_KEY: "${MODEL_API_KEY}" + MODEL_BASE_URL: "${MODEL_BASE_URL}" + HARBOR_AGENT: "${HARBOR_AGENT}" + PROVIDER: "${PROVIDER:-anthropic}" + OUTPUT_DIR: /tmp/output + SHARED_DIR: /tmp/shared + deploy: + resources: + limits: + cpus: "1" + memory: 1g + # NOTE: no healthcheck here. proxy-sidecar.sh self-gates by agent type — for + # agent=claude-code it intentionally sleeps without binding :8082 ("Proxy not + # needed ... sleeping"). A port-based healthcheck would therefore never pass and + # block main via depends_on. main only needs the proxy container *started* + # (see main.depends_on: service_started below), not "healthy". + + # ── main service: harbor runner — decides overall exit code ── + main: + image: ${HARBOR_MAIN_IMAGE} + command: ["bash", "/rock/compose/main.sh"] + privileged: true # harbor CLI needs to spawn inner docker containers + depends_on: + proxy: + condition: service_started # proxy self-gates by agent; only need it started + volumes: + - /rock/compose:/rock/compose:ro + - /var/run/docker.sock:/var/run/docker.sock # reuse outer dockerd (avoid 3rd DinD layer) + environment: + # model + MODEL: "${MODEL}" + MODEL_BASE_URL: "http://proxy:8082" # service name is the DNS hostname + MODEL_API_KEY: "${MODEL_API_KEY}" + # harbor task + HARBOR_AGENT: "${HARBOR_AGENT}" + INSTANCE_ID: "${INSTANCE_ID}" + DATASET: "${DATASET}" + DATASET_NAME: "${DATASET_NAME:-${DATASET}}" + DATASET_VERSION: "${DATASET_VERSION:-latest}" + DATASET_TYPE: "${DATASET_TYPE:-registry}" + SPLIT: "${SPLIT:-test}" + HARBOR_ENV: docker + N_ATTEMPTS: "${N_ATTEMPTS:-1}" + N_CONCURRENT: "${N_CONCURRENT:-1}" + TIMEOUT_MULTIPLIER: "${TIMEOUT_MULTIPLIER:-3.0}" + MAX_RETRIES: "${MAX_RETRIES:-3}" + MAX_ITERATIONS: "${MAX_ITERATIONS:-200}" + SKIP_CONFIRM: "${SKIP_CONFIRM:-true}" + OUTPUT_DIR: /tmp/output + SHARED_DIR: /tmp/shared + # provider flags + PROVIDER: "${PROVIDER:-anthropic}" + FORCE_PROXY: "${FORCE_PROXY:-true}" + TEMPERATURE: "${TEMPERATURE:-1.0}" + INTERLEAVED_THINKING: "${INTERLEAVED_THINKING:-true}" + THINKING_TYPE: "${THINKING_TYPE:-adaptive}" + REASONING_EFFORT: "${REASONING_EFFORT:-high}" + CONTEXT_1M: "${CONTEXT_1M:-true}" + # OSS credentials (for dataset download and artifact upload) + OSS_BUCKET: "${OSS_BUCKET}" + OSS_ENDPOINT: "${OSS_ENDPOINT}" + OSS_REGION: "${OSS_REGION}" + OSS_ACCESS_KEY_ID: "${OSS_ACCESS_KEY_ID}" + OSS_ACCESS_KEY_SECRET: "${OSS_ACCESS_KEY_SECRET}" + deploy: + resources: + limits: + cpus: "8" + memory: 16g diff --git a/examples/job/compose/harbor_compose_demo.py b/examples/job/compose/harbor_compose_demo.py index 2bb6fec33b..ecf35d38cd 100644 --- a/examples/job/compose/harbor_compose_demo.py +++ b/examples/job/compose/harbor_compose_demo.py @@ -1,8 +1,8 @@ -"""ComposeJobConfig 端到端 demo —— 用 ComposeJobConfig 跑 harbor 任务。 +"""ComposeJobConfig 端到端 demo —— 用 ComposeJobConfig (v2) 跑 harbor 任务。 -这是经过 ROCK 真机后端(kata runtime)端到端验证的脚本: - Job → kata 沙箱 → dockerd → proxy sidecar → health → 主容器(复用外层 dockerd) - → harbor CLI 运行 → 下载 dataset → 跑 agent rollout +v2 变更:容器编排完全迁移到标准 docker-compose.yaml,job_config 只持有 compose_file 指针。 +ROCK 不再解析 compose 内部结构,只负责: + ① 准备 DinD 外层沙箱;② 引导 dockerd;③ docker compose up;④ 收退出码 + 可选 OSS 上传。 对应的 AP 命令(claude-code agent / aone-bench-java100 / glm-5): ap job create harbor --instance-id codereview-20789198 -p '{...}' --runner rock @@ -42,14 +42,7 @@ from rock.sdk.envhub import EnvironmentConfig from rock.sdk.job import Job -from rock.sdk.job.compose.config import ( - ComposeJobConfig, - ComposeSpec, - HealthSpec, - MainContainerSpec, - SidecarSpec, - VolumeMount, -) +from rock.sdk.job.compose.config import ComposeJobConfig from rock.sdk.job.operator import ScatterOperator logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") @@ -63,10 +56,13 @@ "ROCK_BASE_URL": "http://xrl.alibaba-inc.com", "ROCK_CLUSTER": "vpc-sg-a", "INSTANCE_ID": "codereview-20789198", - "DATASET": "terminal-bench/aone-bench-java100", + "DATASET": "alibaba/aone-bench-java100", + "DATASET_NAME": "alibaba/aone-bench-java100", + "DATASET_VERSION": "latest", + "DATASET_TYPE": "registry", "SPLIT": "test", "HARBOR_AGENT": "claude-code", - "HARBOR_MAIN_IMAGE": "rock-registry.ap-southeast-1.cr.aliyuncs.com/harbor/harbor:086a7b5822fc09891b190e18d", + "HARBOR_MAIN_IMAGE": "rock-registry.ap-southeast-1.cr.aliyuncs.com/harbor/harbor:33180a83", "PROXY_IMAGE": "agent-platform-staging-registry-vpc.ap-southeast-1.cr.aliyuncs.com/eflops/proxy-hub:bailian-usage-dev", "JOB_TIMEOUT": "9000", } @@ -101,8 +97,8 @@ def check_required() -> None: def build_config() -> ComposeJobConfig: - # 容器内 env(harbor main.sh + proxy sidecar 都从这里读) - container_env = { + # 外层沙箱 env(docker compose 执行时可用 ${VAR} 插值注入内层容器) + sandbox_env = { # 模型 "MODEL": cfg("MODEL"), "MODEL_BASE_URL": cfg("MODEL_BASE_URL"), @@ -111,17 +107,16 @@ def build_config() -> ComposeJobConfig: "HARBOR_AGENT": cfg("HARBOR_AGENT"), "INSTANCE_ID": cfg("INSTANCE_ID"), "DATASET": cfg("DATASET"), + "DATASET_NAME": cfg("DATASET_NAME"), + "DATASET_VERSION": cfg("DATASET_VERSION"), "SPLIT": cfg("SPLIT"), - "DATASET_TYPE": "local", - "HARBOR_ENV": "docker", + "DATASET_TYPE": cfg("DATASET_TYPE"), "N_ATTEMPTS": "1", "N_CONCURRENT": "1", "TIMEOUT_MULTIPLIER": "3.0", "MAX_RETRIES": "3", "MAX_ITERATIONS": "200", - "AGENT_VERSION": "2.1.87", - "AGENT_TIMEOUT_MULTIPLIER": "8.0", - "RETRY_INCLUDE": "NonZeroAgentExitCodeError", + "SKIP_CONFIRM": "true", "FORCE_PROXY": "true", "PROVIDER": "anthropic", "TEMPERATURE": "1.0", @@ -129,10 +124,10 @@ def build_config() -> ComposeJobConfig: "THINKING_TYPE": "adaptive", "REASONING_EFFORT": "high", "CONTEXT_1M": "true", - "SKIP_CONFIRM": "true", - "OUTPUT_DIR": "/tmp/output", - "SHARED_DIR": "/tmp/shared", - # OSS 凭证(harbor 下载 dataset 必需) + # 镜像(compose file 里用 ${VAR} 引用,让用户只需改 env 而无需改 compose file) + "HARBOR_MAIN_IMAGE": cfg("HARBOR_MAIN_IMAGE"), + "PROXY_IMAGE": cfg("PROXY_IMAGE"), + # OSS 凭证(harbor 下载 dataset 必需 + compose.main 产物上传) "OSS_BUCKET": cfg("OSS_BUCKET"), "OSS_ENDPOINT": cfg("OSS_ENDPOINT"), "OSS_REGION": cfg("OSS_REGION"), @@ -143,10 +138,12 @@ def build_config() -> ComposeJobConfig: return ComposeJobConfig( job_name="harbor-compose-demo", timeout=int(cfg("JOB_TIMEOUT")), - # 主容器入口脚本(harbor runner,从 Agent-Hub 复制并适配) - script_path=str(HERE / "main.sh"), + # v2: compose_file 指向本地 docker-compose.yaml(相对路径) + compose_file=str(HERE / "docker-compose.yaml"), + abort_on_container_exit=True, environment=EnvironmentConfig( - # 外层沙箱镜像须自带 docker 工具链(不要用 docker:27-dind,kata 下缺 containerd) + # 外层沙箱镜像:必须自带 docker 工具链(不要用 docker:27-dind,kata 下缺 containerd) + # 这里复用 harbor runner 镜像(自带完整 docker 工具链 + harbor CLI + bash) image=cfg("HARBOR_MAIN_IMAGE"), base_url=cfg("ROCK_BASE_URL"), cluster=cfg("ROCK_CLUSTER"), @@ -155,45 +152,23 @@ def build_config() -> ComposeJobConfig: startup_timeout=1200, memory="32g", cpus=16, + # v2 uploads: compose 文件 + 脚本目录一起上传到 /rock/compose/ uploads=[ - (str(HERE / "main.sh"), "/rock/scripts/main.sh"), - (str(HERE / "sidecars"), "/rock/scripts/sidecars"), - ], - env=container_env, - ), - compose=ComposeSpec( - main=MainContainerSpec( - image=cfg("HARBOR_MAIN_IMAGE"), - privileged=True, - env=container_env, - # 复用外层 dockerd(挂载外层 docker socket),避免主容器内再起第三层 dockerd - # —— 第三层 dockerd 在 kata 下会失败("Docker daemon failed to start") - volume_mounts=[ - VolumeMount( - name="docker-sock", - mount_path="/var/run/docker.sock", - host_path="/var/run/docker.sock", - ) - ], - ), - sidecars=[ - SidecarSpec( - name="proxy", - image=cfg("PROXY_IMAGE"), - script_path="/rock/scripts/sidecars/proxy-sidecar.sh", - env=container_env, - health=HealthSpec(port=8082, timeout_sec=120), - ), + (str(HERE / "docker-compose.yaml"), "/rock/compose/docker-compose.yaml"), + (str(HERE / "main.sh"), "/rock/compose/main.sh"), + (str(HERE / "sidecars"), "/rock/compose/sidecars"), ], + env=sandbox_env, ), ) async def main() -> None: config = build_config() - logger.info("Submitting harbor task via ComposeJobConfig (job_name=%s) ...", config.job_name) + logger.info("Submitting harbor task via ComposeJobConfig v2 (job_name=%s) ...", config.job_name) logger.info(" backend=%s cluster=%s", config.environment.base_url, config.environment.cluster) logger.info(" dataset=%s split=%s agent=%s", cfg("DATASET"), cfg("SPLIT"), cfg("HARBOR_AGENT")) + logger.info(" compose_file=%s", config.compose_file) # size=1:单 trial(避免 ScatterOperator 共享 config 引用的竞态) result = await Job(config, operator=ScatterOperator(size=1)).run() diff --git a/examples/job/compose/job_config.yaml.template b/examples/job/compose/job_config.yaml.template index 2f46e41197..861a8a1e0b 100644 --- a/examples/job/compose/job_config.yaml.template +++ b/examples/job/compose/job_config.yaml.template @@ -1,7 +1,7 @@ -# ComposeJobConfig YAML template — harbor task (claude-code / terminal-bench / aone-bench-java100) +# ComposeJobConfig YAML template (v2) — harbor task (claude-code / terminal-bench / aone-bench-java100) # -# 方案: 主容器自带 dockerd(DinD 沙箱 image=docker:27-dind 天然提供守护进程) -# 内层编排: runner.sh 在外层沙箱内用 docker CLI 拉起 main + proxy sidecar +# v2 方案: job_config.yaml(ROCK 层元信息 + DinD 外层沙箱)+ docker-compose.yaml(容器编排)双文件。 +# ROCK 不解析 compose 内部结构,只把 compose_file 指向的文件上传进沙箱后执行 docker compose up。 # # 使用前: 将所有 <...> 占位符替换为真实值,或通过上层脚本批量渲染。 # 用法: python examples/job/compose/compose_demo.py -c examples/job/compose/job_config.yaml @@ -10,14 +10,14 @@ job_name: harbor-compose-demo namespace: xrl-sandbox # experiment_id 不填(ComposeJobConfig 无需 Harbor 的 experiment_id 字段) -timeout: 8000 # 整体超时(秒);harbor run 内部超时由 HARBOR_* env 控制 +timeout: 8000 # 整体超时(秒);docker compose up 全生命周期 -# ── 主容器入口(继承 BashJobConfig)───────────────────────────────── -script_path: ./main.sh # 对应 compose.main service;由 ComposeTrial 上传到 /rock/scripts/main.sh +# ── ComposeJobConfig 专有:指向标准 compose 文件(本地相对路径)────── +compose_file: ./docker-compose.yaml +abort_on_container_exit: true # 任一 service 退出即停止整组(默认 true) # ── DinD 外层沙箱(EnvironmentConfig)────────────────────────────── -# image 必须自带 docker/dockerd/containerd/runc 工具链(不要用 docker:27-dind, -# 实测它在 ROCK kata 沙箱里缺 containerd)。runner.sh 会在 P0 主动启动 dockerd。 +# image 必须自带 docker/dockerd/containerd/runc 工具链。runner.sh 会在 P0 阶段主动启动 dockerd。 # 这里直接复用 harbor runner 镜像(自带完整 docker 工具链 + harbor CLI + bash)。 environment: image: "" # 自带 docker 工具链的业务镜像 @@ -27,33 +27,34 @@ environment: XRL-Authorization: "Bearer " use_kata_runtime: true # 生产环境推荐开启 kata,强隔离 startup_timeout: 1800 # 沙箱启动超时(秒) - cpus: 32 # 外层沙箱总 CPU(须 ≥ 内层各容器之和:main 8 + proxy 1 + 余量) + cpus: 16 # 外层沙箱总 CPU(须 ≥ 内层各容器之和) memory: "32g" # 外层沙箱总内存 - # 把本地脚本目录上传进沙箱(runner.sh 引用这些路径) + # 把 compose 文件 + 脚本目录上传进沙箱(runner.sh 引用 /rock/compose/ 路径) uploads: - - ["./main.sh", "/rock/scripts/main.sh"] - - ["./sidecars", "/rock/scripts/sidecars"] + - ["./docker-compose.yaml", "/rock/compose/docker-compose.yaml"] + - ["./main.sh", "/rock/compose/main.sh"] + - ["./sidecars", "/rock/compose/sidecars"] - # 沙箱级环境变量(注入外层沙箱,runner.sh 可读,同时透传给各内层容器) + # 沙箱级环境变量(注入外层沙箱;docker compose 执行时这些值可用于 compose 文件里的 ${VAR} 插值) env: - # 模型相关(harbor runner 所需) + # 镜像(compose file 里用 ${VAR} 引用) + HARBOR_MAIN_IMAGE: "" + PROXY_IMAGE: "" + + # 模型相关 MODEL: "" # e.g. claude-opus-4-8 MODEL_API_KEY: "" MODEL_BASE_URL: "" # e.g. https://api.anthropic.com # Harbor agent 配置 - HARBOR_AGENT: claude-code # 固定 claude-code - INSTANCE_ID: "" # 任务 ID,e.g. mailman - DATASET: terminal-bench # 或 aone-bench-java100 + HARBOR_AGENT: claude-code + INSTANCE_ID: "" + DATASET: terminal-bench SPLIT: test - DATASET_TYPE: local # local / registry - HARBOR_ENV: docker # DinD 下用 docker 模式 - - # DinD:主容器自带 dockerd,DOCKER_HOST 指向外层 DinD daemon - DOCKER_HOST: "tcp://localhost:2375" + DATASET_TYPE: registry - # OSS 凭证(数据集下载 + 产物上传) + # OSS 凭证(数据集下载) OSS_BUCKET: "" OSS_ENDPOINT: "" # e.g. oss-cn-hangzhou-internal.aliyuncs.com OSS_REGION: "" # e.g. cn-hangzhou @@ -67,42 +68,12 @@ environment: MAX_RETRIES: "3" MAX_ITERATIONS: "500" SKIP_CONFIRM: "true" - HARBOR_JOB_RESULT_PATH: "/tmp/shared/rollout_result" - OUTPUT_DIR: "/tmp/output" - SHARED_DIR: "/tmp/shared" - # cc-proxy 配置(proxy sidecar 的端口由 health.port 声明) + # cc-proxy 配置 PROVIDER: anthropic - FORCE_PROXY: "true" # 强制走 cc-proxy sidecar + FORCE_PROXY: "true" - # 产物上传(复用现有机制) + # 产物上传(复用现有机制,docker compose 结束后由 runner P2 执行) oss_mirror: enabled: true oss_bucket: "" - -# ── Compose 内层编排(DinD 内,由 runner.sh 用 docker CLI 拉起)── -compose: - # 主容器:harbor runner - # script_path 即顶层 ./main.sh(ComposeTrial 上传后路径 /rock/scripts/main.sh) - main: - image: "" # harbor runner 镜像,含 harbor CLI 及 claude-code - # e.g. code-agi-sg-docker-registry-vpc.ap-southeast-1.cr.aliyuncs.com/eflops/claude-code:bailian-xxx - privileged: true # harbor 在内层需要起 docker 容器,须特权 - resources: - cpus: 8 - memory: "16g" - - # Sidecar 容器:与主容器并行运行 - sidecars: - # cc-proxy sidecar:主容器通过 docker bridge gateway(http://${DOCKER_GATEWAY}:8082)访问 - # name=proxy 作为 docker network-alias,健康探测确保 proxy 就绪后再启动主容器 - - name: proxy - image: "" # claude-code proxy 镜像 - # e.g. code-agi-sg-docker-registry-vpc.ap-southeast-1.cr.aliyuncs.com/eflops/claude-code-proxy:latest - script_path: /rock/scripts/sidecars/proxy-sidecar.sh - resources: - cpus: 1 - memory: "1g" - health: - port: 8082 # proxy-sidecar.sh 中 PORT=8082 - timeout_sec: 60 diff --git a/examples/job/compose/main.sh b/examples/job/compose/main.sh index ec7625d820..c0c2c7317e 100644 --- a/examples/job/compose/main.sh +++ b/examples/job/compose/main.sh @@ -76,6 +76,9 @@ fi echo "Starting Harbor agent execution..." echo "Harbor Agent: ${HARBOR_AGENT}" +echo "DEBUG: DATASET_TYPE env var is: [${DATASET_TYPE}]" +echo "DEBUG: DATASET_NAME env var is: [${DATASET_NAME}]" +echo "DEBUG: DATASET_VERSION env var is: [${DATASET_VERSION}]" echo "Dataset Type: ${DATASET_TYPE}" if [ "${DATASET_TYPE}" = "registry" ]; then echo "Dataset Name: ${DATASET_NAME}" @@ -484,7 +487,8 @@ if [ "${HARBOR_AGENT}" = "claude-code" ] && [ "${USE_PROXY}" = "1" ]; then echo "Waiting for claude-code proxy..." if command -v curl > /dev/null 2>&1; then while true; do - curl -s --connect-timeout 1 http://localhost:8082 > /dev/null 2>&1 && break + # ComposeJobConfig: proxy sidecar is on the same docker network with alias=proxy + curl -s --connect-timeout 1 http://proxy:8082 > /dev/null 2>&1 && break sleep 1 done else @@ -496,7 +500,8 @@ if [ "${HARBOR_AGENT}" = "openclaw" ] && [ "${USE_PROXY}" = "1" ]; then echo "Waiting for claude-code proxy..." if command -v curl > /dev/null 2>&1; then while true; do - curl -s --connect-timeout 1 http://localhost:8082 > /dev/null 2>&1 && break + # ComposeJobConfig: proxy sidecar is on the same docker network with alias=proxy + curl -s --connect-timeout 1 http://proxy:8082 > /dev/null 2>&1 && break sleep 1 done else @@ -668,7 +673,9 @@ if [ "${DATASET_TYPE}" = "registry" ]; then HARBOR_ARGS+=(-d "${DATASET_NAME}@${DATASET_VERSION}") HARBOR_ARGS+=(-t "${INSTANCE_ID}") else + # local dataset type: pass both problem ID and dataset name HARBOR_ARGS+=(-p "${INSTANCE_ID}") + [ -n "${DATASET}" ] && HARBOR_ARGS+=(--dataset "${DATASET}") fi HARBOR_ARGS+=(-a "${HARBOR_AGENT}") @@ -813,9 +820,10 @@ fi HARBOR_ARGS+=(--no-delete) # Skip harbor run interactive confirmation when SKIP_CONFIRM=true (mirrors Agent-Service `skip_confirm` field) -if [ "${SKIP_CONFIRM:-}" = "true" ]; then - HARBOR_ARGS+=(-y) -fi +# NOTE: -y may not be supported in all harbor versions; commented out for compatibility +# if [ "${SKIP_CONFIRM:-}" = "true" ]; then +# HARBOR_ARGS+=(-y) +# fi echo "======= RUNNING HARBOR COMMAND =======" echo "harbor run ${HARBOR_ARGS[*]}" diff --git a/rock/sdk/job/__init__.py b/rock/sdk/job/__init__.py index 4aeb1e59f2..aa3292c305 100644 --- a/rock/sdk/job/__init__.py +++ b/rock/sdk/job/__init__.py @@ -1,9 +1,9 @@ # Auto-register BashTrial (safe: no bench dependency). # HarborTrial is registered by rock.sdk.bench.__init__ to avoid a circular # import when rock.sdk.job is triggered mid-bench-load. -# ComposeTrial is registered by rock.sdk.job.compose.trial (loaded by teammate B). -import rock.sdk.job.trial.bash # noqa: F401 +# ComposeTrial is registered by rock.sdk.job.compose.trial (loaded on import). import rock.sdk.job.compose.trial # noqa: F401 +import rock.sdk.job.trial.bash # noqa: F401 from rock.sdk.job.api import Job from rock.sdk.job.compose.config import ComposeJobConfig from rock.sdk.job.config import BashJobConfig, JobConfig diff --git a/rock/sdk/job/compose/config.py b/rock/sdk/job/compose/config.py index 5ef78d7a50..49e16fd7cc 100644 --- a/rock/sdk/job/compose/config.py +++ b/rock/sdk/job/compose/config.py @@ -1,289 +1,51 @@ -"""ComposeJobConfig — Docker Compose multi-container job configuration. +"""ComposeJobConfig — Docker Compose multi-container job configuration (v2). -Extends BashJobConfig with a ``compose`` block that describes the inner -DinD container orchestration (main + sidecars + init containers). +Replaces v1 custom ``compose:`` block with a pointer to a standard +``docker-compose.yaml`` file. ROCK does not parse the compose file's +internal structure — orchestration is fully delegated to docker compose. -Type detection signal: ``"compose" in yaml_data`` +Type detection signal: ``"compose_file" in yaml_data`` """ from __future__ import annotations -import re from datetime import datetime import yaml -from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator +from pydantic import ConfigDict, Field, model_validator -from rock.logger import init_logger -from rock.sdk.job.config import BashJobConfig +from rock.sdk.job.config import JobConfig -logger = init_logger(__name__) -# Regex for valid container names (used as docker --network-alias) -_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9-]*$") +class ComposeJobConfig(JobConfig): + """Docker Compose multi-container Job configuration (v2: standard compose file). + Directly inherits JobConfig (not BashJobConfig). There is no top-level + ``script`` / ``script_path`` — the main container entry-point is defined + inside the docker-compose.yaml ``main`` service. -# ── Sub-models: inner container specs ──────────────────────────────────────── - - -class ResourceSpec(BaseModel): - """Single inner-container resource declaration. - - request / limit dual-value (aligns with K8s requests/limits semantics). - In single-host docker mode: - cpus → --cpus (used as hard limit when cpu_limit absent) - cpu_limit → --cpus (hard upper bound, takes priority) - memory → --memory-reservation (soft limit) - memory_limit → --memory (hard upper bound) - Setting only cpus/memory is the common case (treated as hard limit). - """ - - model_config = ConfigDict(extra="forbid") - - cpus: float | None = None - memory: str | None = None - cpu_limit: float | None = None - memory_limit: str | None = None - - -class VolumeMount(BaseModel): - """Container volume mount. - - By default ``name`` refers to the shared named volume used for cross-container - (init → main) data passing, mounted at ``mount_path``. - - When ``host_path`` is set, the mount instead bind-mounts a real path from the - OUTER sandbox into the container at ``mount_path`` — e.g. to expose the outer - docker socket (``host_path: /var/run/docker.sock``) so the container reuses the - outer dockerd instead of starting its own (avoids the 3rd DinD layer). - """ - - model_config = ConfigDict(extra="forbid") - - name: str - mount_path: str - main_mount_path: str | None = None - host_path: str | None = None - read_only: bool = False - - -class SecretEnvEntry(BaseModel): - """Source declaration for a single secret environment variable (K8s Secret style).""" - - model_config = ConfigDict(extra="forbid") - - secret_name: str - secret_key: str - - -class OssDep(BaseModel): - """A single dependency to download from OSS before running.""" - - model_config = ConfigDict(extra="forbid") - - key: str - target_path: str - extract: bool = False - - -class HealthSpec(BaseModel): - """Sidecar readiness probe (optional).""" - - model_config = ConfigDict(extra="forbid") - - port: int - timeout_sec: int = 60 - - -class _ContainerBase(BaseModel): - """Common fields for init and sidecar containers. - - Entry-point (choose at most one): - - script Inline shell (runner writes to sandbox then runs with ``bash``) - - script_path Path inside sandbox (``bash ``) - - command/args Override image ENTRYPOINT (not via bash), e.g. for running - stock images like dockerd: command=["dockerd"], args=["--tls=false"] - All three absent → use image's own ENTRYPOINT/CMD. - """ - - model_config = ConfigDict(extra="forbid") - - name: str - image: str - script: str | None = None - script_path: str | None = None - command: list[str] | None = None - args: list[str] | None = None - env: dict[str, str] = Field(default_factory=dict) - secret_env: dict[str, SecretEnvEntry] = Field(default_factory=dict) - resources: ResourceSpec | None = None - privileged: bool = False - - @field_validator("name") - @classmethod - def _validate_name(cls, v: str) -> str: - if not _NAME_RE.match(v): - raise ValueError( - f"Container name '{v}' is invalid. Must match ^[a-z0-9][a-z0-9-]*$ (used as docker --network-alias)." - ) - return v - - @model_validator(mode="after") - def _entrypoint_exclusive(self) -> _ContainerBase: - modes = [bool(self.script), bool(self.script_path), bool(self.command)] - if sum(modes) > 1: - raise ValueError( - f"container '{self.name}': script / script_path / command are mutually exclusive — use at most one" - ) - if self.args and not self.command: - raise ValueError(f"container '{self.name}': args must be used together with command") - return self - - -class InitContainerSpec(_ContainerBase): - """Init container: runs serially before the main container starts.""" - - volume_mounts: list[VolumeMount] = Field(default_factory=list) - - -class SidecarSpec(_ContainerBase): - """Sidecar container: runs in parallel with main; name becomes docker network-alias.""" - - health: HealthSpec | None = None - volume_mounts: list[VolumeMount] = Field(default_factory=list) - - -class MainContainerSpec(BaseModel): - """Main container spec. Entry-point script is provided by ComposeJobConfig top-level script/script_path.""" - - model_config = ConfigDict(extra="forbid") - - image: str - resources: ResourceSpec | None = None - env: dict[str, str] = Field(default_factory=dict) - secret_env: dict[str, SecretEnvEntry] = Field(default_factory=dict) - oss_deps: list[OssDep] = Field(default_factory=list) - volume_mounts: list[VolumeMount] = Field(default_factory=list) - privileged: bool = False - - -class ComposeSpec(BaseModel): - """Top-level compose block: inner docker orchestration inside DinD.""" - - model_config = ConfigDict(extra="forbid") - - main: MainContainerSpec - init_containers: list[InitContainerSpec] = Field(default_factory=list) - sidecars: list[SidecarSpec] = Field(default_factory=list) - - @model_validator(mode="after") - def _unique_names(self) -> ComposeSpec: - names = [c.name for c in self.init_containers] + [s.name for s in self.sidecars] - if len(names) != len(set(names)): - raise ValueError("compose: init_containers / sidecars names must be globally unique") - return self - - -# ── ComposeJobConfig ────────────────────────────────────────────────────────── - - -class ComposeJobConfig(BashJobConfig): - """Docker Compose multi-container Job configuration. - - Inherits from BashJobConfig: - - script / script_path at the top level describe the main container entry-point - - environment describes the outer DinD sandbox - - Adds a top-level ``compose`` block describing the inner container orchestration. - Type-detection signal: presence of ``compose`` key in YAML data. + Type detection signal: presence of ``compose_file`` key in YAML data. """ model_config = ConfigDict(extra="forbid") job_name: str = Field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d__%H-%M-%S")) - compose: ComposeSpec # required; its presence identifies ComposeJobConfig + # Required. Identifies ComposeJobConfig. Local path (relative to job_config.yaml). + compose_file: str - @model_validator(mode="after") - def _proxy_conflict_check(self) -> ComposeJobConfig: - """Disallow environment.proxy together with a sidecar named 'proxy' (double-proxy).""" - if self.environment.proxy and self.environment.proxy.enabled: - if any(s.name == "proxy" for s in self.compose.sidecars): - raise ValueError( - "environment.proxy and a sidecar named 'proxy' cannot both be enabled. " - "Choose one: use the proxy sidecar container, or use the sandbox model-service." - ) - return self + # When True, any service exit stops the whole group (docker compose up --abort-on-container-exit). + # Set False to allow sidecar crashes without blocking main. + abort_on_container_exit: bool = True @model_validator(mode="after") - def _resource_budget_check(self) -> ComposeJobConfig: - """Warn (not fail) when inner container resources exceed the outer sandbox budget.""" - try: - self._check_resource_budget() - except Exception: - pass # never fail validation due to budget check errors + def _validate_compose_file(self) -> ComposeJobConfig: + if not self.compose_file: + raise ValueError("compose_file must not be empty") return self - def _check_resource_budget(self) -> None: - """Internal helper: accumulate inner cpus/memory and warn if they exceed outer sandbox.""" - - outer_cpus: float | None = getattr(self.environment, "cpus", None) - outer_memory_str: str | None = getattr(self.environment, "memory", None) - - def parse_memory_gb(s: str | None) -> float | None: - if s is None: - return None - s = s.strip().lower() - if s.endswith("gi"): - return float(s[:-2]) - if s.endswith("g"): - return float(s[:-1]) - if s.endswith("mi"): - return float(s[:-2]) / 1024 - if s.endswith("m"): - return float(s[:-1]) / 1024 - return None - - def container_cpus(r: ResourceSpec | None) -> float: - if r is None: - return 0.0 - return r.cpu_limit or r.cpus or 0.0 - - def container_mem(r: ResourceSpec | None) -> float: - if r is None: - return 0.0 - return parse_memory_gb(r.memory_limit) or parse_memory_gb(r.memory) or 0.0 - - all_specs: list[ResourceSpec | None] = ( - [self.compose.main.resources] - + [c.resources for c in self.compose.init_containers] - + [s.resources for s in self.compose.sidecars] - ) - - total_cpus = sum(container_cpus(r) for r in all_specs) - total_mem = sum(container_mem(r) for r in all_specs) - - if outer_cpus is not None and total_cpus > 0 and total_cpus > outer_cpus: - logger.warning( - "ComposeJobConfig resource budget: inner containers total cpus=%.1f " - "exceeds outer sandbox cpus=%.1f — may cause OOM or throttling.", - total_cpus, - outer_cpus, - ) - - if outer_memory_str is not None and total_mem > 0: - outer_mem = parse_memory_gb(outer_memory_str) - if outer_mem is not None and total_mem > outer_mem: - logger.warning( - "ComposeJobConfig resource budget: inner containers total memory=%.1fGi " - "exceeds outer sandbox memory=%.1fGi — may cause OOM.", - total_mem, - outer_mem, - ) - @classmethod def from_yaml(cls, path: str) -> ComposeJobConfig: - """Load a ComposeJobConfig from YAML file.""" + """Load a ComposeJobConfig from a YAML file.""" with open(path) as f: - data = yaml.safe_load(f) - return cls.model_validate(data) + return cls.model_validate(yaml.safe_load(f)) diff --git a/rock/sdk/job/compose/trial.py b/rock/sdk/job/compose/trial.py index 00b07c6ef3..3a81edc555 100644 --- a/rock/sdk/job/compose/trial.py +++ b/rock/sdk/job/compose/trial.py @@ -1,26 +1,20 @@ -"""ComposeTrial — Docker Compose multi-container job trial inside a DinD sandbox. +"""ComposeTrial — multi-container Docker Compose job inside a DinD sandbox (v2). -The outer sandbox is a Docker-in-Docker (DinD) environment. This trial -generates a ``runner.sh`` that orchestrates inner containers (init → sidecars → -main) entirely through the ``docker`` CLI available in the outer sandbox. +v2 delegates all container orchestration to ``docker compose up`` — +no more hand-written docker run phases. + +setup() → upload files (compose + scripts) via environment.uploads; render minimal runner.sh +build() → "bash /rock/runner.sh" +collect()→ exit code = main service exit code (--exit-code-from main) """ from __future__ import annotations import os -import shlex from typing import TYPE_CHECKING from rock.logger import init_logger -from rock.sdk.job.compose.config import ( - ComposeJobConfig, - InitContainerSpec, - MainContainerSpec, - OssDep, - ResourceSpec, - SecretEnvEntry, - SidecarSpec, -) +from rock.sdk.job.compose.config import ComposeJobConfig from rock.sdk.job.result import ExceptionInfo, TrialResult from rock.sdk.job.trial.abstract import AbstractTrial from rock.sdk.job.trial.registry import register_trial @@ -38,359 +32,82 @@ "oss_bucket", ) -# ── runner.sh skeleton ──────────────────────────────────────────────────────── +# ── runner.sh template (v2 minimal) ────────────────────────────────────────── # Placeholders use __UPPER__ style to avoid collision with bash ${var} syntax. # NEVER use str.format() on this template — it contains {} in bash constructs. -_RUNNER_SKELETON = r"""#!/bin/bash -set +e - +_RUNNER_TEMPLATE = r"""#!/bin/bash +# runner.sh — ROCK ComposeJob runtime (v2: delegates to docker compose) +set -uo pipefail +COMPOSE_FILE="/rock/compose/docker-compose.yaml" +LOG_DIR="/rock/logs"; mkdir -p "$LOG_DIR" RUNNER_EXIT=0 -# ──────────────────────────────────────────────────────────────────────────── -# cleanup_all — invoked by trap EXIT -# ──────────────────────────────────────────────────────────────────────────── cleanup_all() { - echo "[rock-compose] Cleaning up containers / network / volume ..." - docker rm -f rock-main-$$ 2>/dev/null || true -__SIDECAR_CLEANUP__ - docker network rm rock_compose_$$ 2>/dev/null || true - docker volume rm rock_shared_$$ 2>/dev/null || true + docker compose -f "$COMPOSE_FILE" logs --no-color > "$LOG_DIR/compose.log" 2>&1 || true + docker compose -f "$COMPOSE_FILE" down -v --remove-orphans >/dev/null 2>&1 || true } - trap cleanup_all EXIT +trap 'RUNNER_EXIT=143; exit 143' TERM INT -# ──────────────────────────────────────────────────────────────────────────── -# P0 — start dockerd (ROCK kata sandbox does NOT auto-start it), then wait ready -# ──────────────────────────────────────────────────────────────────────────── -# NOTE: in a ROCK kata DinD sandbox dockerd is NOT running on entry. We must -# start it ourselves. Gotchas learned from real runs on the kata backend: +# ── P0: bootstrap dockerd and wait for it to be ready ── +# NOTE: In a ROCK kata DinD sandbox, dockerd is NOT running on entry. We must +# start it ourselves. Two kata-environment gotchas learned from real runs: # 1. nohup'd dockerd does not inherit the interactive shell PATH, so it fails # with "containerd executable file not found" — we export PATH explicitly. # 2. the kata guest lacks /proc/sys/net/bridge/bridge-nf-call-iptables, so # dockerd's default bridge network init fails unless we set # DOCKER_IGNORE_BR_NETFILTER_ERROR=1. -echo "[rock-compose] P0: starting dockerd ..." +echo "[runner] P0: wait docker daemon" if ! docker info >/dev/null 2>&1; then - if ! pgrep -x dockerd >/dev/null 2>&1; then - PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin \ - DOCKER_IGNORE_BR_NETFILTER_ERROR=1 \ - nohup dockerd >/var/log/dockerd.log 2>&1 & - fi + if ! pgrep -x dockerd >/dev/null 2>&1; then + PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin \ + DOCKER_IGNORE_BR_NETFILTER_ERROR=1 nohup dockerd >/var/log/dockerd.log 2>&1 & + fi fi -for i in $(seq 1 60); do - if docker info >/dev/null 2>&1; then - echo "[rock-compose] dockerd is ready" - break - fi - sleep 2 - if [ "$i" -eq 60 ]; then - echo "[rock-compose] ERROR: dockerd not ready after 120s" - echo "[rock-compose] --- dockerd.log tail ---" - tail -20 /var/log/dockerd.log 2>/dev/null || true - exit 1 - fi -done - -docker network create rock_compose_$$ 2>/dev/null || true -docker volume create rock_shared_$$ 2>/dev/null || true +for i in $(seq 1 30); do docker info >/dev/null 2>&1 && break; sleep 2; done +docker info >/dev/null 2>&1 || { echo "docker daemon not ready after 60s"; exit 1; } +# Optional: private registry login (credentials from SandboxConfig.registry_*) __REGISTRY_LOGIN__ -mkdir -p /rock/logs - -# ──────────────────────────────────────────────────────────────────────────── -# P1 — OSS dependency download (conditional) -# ──────────────────────────────────────────────────────────────────────────── -__OSS_DEPS__ - -# ──────────────────────────────────────────────────────────────────────────── -# P2 — init containers (serial) -# ──────────────────────────────────────────────────────────────────────────── -echo "[rock-compose] P2: starting init containers ..." -__INIT_CONTAINERS__ - -# ──────────────────────────────────────────────────────────────────────────── -# P3 — sidecar containers (parallel, detached) -# ──────────────────────────────────────────────────────────────────────────── -echo "[rock-compose] P3: starting sidecar containers ..." -__SIDECAR_CONTAINERS__ - -# ──────────────────────────────────────────────────────────────────────────── -# P4 — health probes for sidecars that declare health -# ──────────────────────────────────────────────────────────────────────────── -echo "[rock-compose] P4: health probes ..." -__HEALTH_PROBES__ - -# ──────────────────────────────────────────────────────────────────────────── -# P5 — main container (foreground) -# ──────────────────────────────────────────────────────────────────────────── -echo "[rock-compose] P5: starting main container ..." -__MAIN_CONTAINER__ +# ── P1: docker compose up — main service exit code drives the overall result ── +echo "[runner] P1: docker compose up" +docker compose -f "$COMPOSE_FILE" up \ + __ABORT_FLAG__ \ + --exit-code-from main 2>&1 | tee "$LOG_DIR/up.log" RUNNER_EXIT=${PIPESTATUS[0]} -echo "[rock-compose] main exited with code $RUNNER_EXIT" - -# ──────────────────────────────────────────────────────────────────────────── -# P6 — collect sidecar logs + optional OSS mirror upload -# ──────────────────────────────────────────────────────────────────────────── -echo "[rock-compose] P6: collecting logs ..." -__COLLECT_SIDECAR_LOGS__ -__OSS_MIRROR_UPLOAD__ - -# ──────────────────────────────────────────────────────────────────────────── -# P7 — explicit exit with main's exit code (cleanup via trap) -# ──────────────────────────────────────────────────────────────────────────── -exit $RUNNER_EXIT -""" +echo "[runner] main service exited rc=$RUNNER_EXIT" +# ── P2: optional OSS artifact upload (only when environment.oss_mirror.enabled) ── +__PHASE2_OSS_UPLOAD__ -# ── helpers ─────────────────────────────────────────────────────────────────── - +exit "$RUNNER_EXIT" +""" -def _resource_args(res: ResourceSpec | None) -> list[str]: - """Convert ResourceSpec to docker run resource flag strings.""" - args: list[str] = [] - if res is None: - return args - cpu = res.cpu_limit if res.cpu_limit is not None else res.cpus - if cpu is not None: - args.append(f"--cpus {cpu}") - if res.memory is not None: - args.append(f"--memory-reservation {shlex.quote(res.memory)}") - mem_limit = res.memory_limit if res.memory_limit is not None else None - if mem_limit is not None: - args.append(f"--memory {shlex.quote(mem_limit)}") - return args +# ── render helpers ──────────────────────────────────────────────────────────── -def _volume_args(volume_mounts) -> list[str]: - """Render -v flags for volume_mounts. - host_path set → bind-mount real outer path; otherwise use the shared named volume. - """ - args: list[str] = [] - for vm in volume_mounts: - suffix = ":ro" if vm.read_only else "" - if vm.host_path: - args.append(f"-v {shlex.quote(vm.host_path)}:{shlex.quote(vm.mount_path)}{suffix}") - else: - args.append(f"-v rock_shared_$$:{shlex.quote(vm.mount_path)}{suffix}") - return args - - -def _env_args(env: dict[str, str], secret_env: dict[str, SecretEnvEntry]) -> list[str]: - """Build -e flags for plain env and secret_env (shell variable references).""" - args: list[str] = [] - for k, v in env.items(): - args.append(f"-e {shlex.quote(k)}={shlex.quote(v)}") - for k in secret_env: - # Render secret as a shell variable reference — value is never embedded literally. - args.append(f'-e {shlex.quote(k)}="${{{k}}}"') - return args - - -def _entrypoint_args(spec) -> tuple[list[str], str]: - """Return (flag_args, positional_cmd_str) for a container spec. - - Returns: - flag_args — e.g. ["--entrypoint bash"] or [] - positional — positional command string after the image, e.g. "bash /rock/scripts/name.sh" - """ - flag_args: list[str] = [] - positional = "" - - if spec.command: - # Override entrypoint with command[0]; remaining command args + spec.args → positional - flag_args.append(f"--entrypoint {shlex.quote(spec.command[0])}") - remainder = list(spec.command[1:]) + (spec.args or []) - if remainder: - positional = " ".join(shlex.quote(a) for a in remainder) - elif spec.script_path: - positional = f"bash {shlex.quote(spec.script_path)}" - elif spec.script: - # Inline script was written to /rock/scripts/.sh during setup - positional = f"bash /rock/scripts/{spec.name}.sh" - # else: use image's own ENTRYPOINT/CMD — no flags or positional needed - - return flag_args, positional - - -def _render_oss_deps(oss_deps: list[OssDep]) -> str: - """Render P1 OSS dependency download block.""" - if not oss_deps: - return "# (no oss_deps)" - lines = ['echo "[rock-compose] P1: downloading OSS dependencies ..."'] - for dep in oss_deps: - key_q = shlex.quote(dep.key) - target_q = shlex.quote(dep.target_path) - if dep.extract: - lines.append( - f"ossutil cp {key_q} /tmp/_rock_dep_archive && " - f"mkdir -p {target_q} && " - f"tar -xf /tmp/_rock_dep_archive -C {target_q}" - ) - else: - lines.append(f"ossutil cp {key_q} {target_q}") - return "\n".join(lines) - - -def _render_init_containers(init_containers: list[InitContainerSpec]) -> str: - """Render P2 init container serial execution block.""" - if not init_containers: - return "# (no init containers)" - lines = [] - for ic in init_containers: - run_parts = [ - "docker run --rm", - "--network rock_compose_$$", - f"--network-alias {shlex.quote(ic.name)}", - "-v rock_shared_$$:/rock/shared", - "-v /rock/scripts:/rock/scripts:ro", - ] - run_parts.extend(_resource_args(ic.resources)) - if ic.privileged: - run_parts.append("--privileged") - run_parts.extend(_env_args(ic.env, ic.secret_env)) - run_parts.extend(_volume_args(ic.volume_mounts)) - flag_a, pos = _entrypoint_args(ic) - run_parts.extend(flag_a) - run_parts.append(shlex.quote(ic.image)) - if pos: - run_parts.append(pos) - - cmd = " \\\n ".join(run_parts) - lines.append(f'echo "[rock-compose] init: {ic.name}"') - lines.append(cmd) - lines.append(f'if [ $? -ne 0 ]; then echo "[rock-compose] init container {ic.name} failed"; exit 1; fi') - return "\n".join(lines) - - -def _render_sidecar_containers(sidecars: list[SidecarSpec]) -> str: - """Render P3 sidecar container launch block (detached).""" - if not sidecars: - return "# (no sidecars)" - lines = [] - for sc in sidecars: - run_parts = [ - "docker run -d", - f"--name rock-sidecar-{sc.name}-$$", - "--network rock_compose_$$", - f"--network-alias {shlex.quote(sc.name)}", - "-v rock_shared_$$:/rock/shared", - "-v /rock/scripts:/rock/scripts:ro", - ] - run_parts.extend(_resource_args(sc.resources)) - if sc.privileged: - run_parts.append("--privileged") - run_parts.extend(_env_args(sc.env, sc.secret_env)) - run_parts.extend(_volume_args(sc.volume_mounts)) - flag_a, pos = _entrypoint_args(sc) - run_parts.extend(flag_a) - run_parts.append(shlex.quote(sc.image)) - if pos: - run_parts.append(pos) - - cmd = " \\\n ".join(run_parts) - lines.append(f'echo "[rock-compose] sidecar: {sc.name}"') - lines.append(cmd) - return "\n".join(lines) - - -def _render_health_probes(sidecars: list[SidecarSpec]) -> str: - """Render P4 health probe block for sidecars that declare health.""" - health_sidecars = [sc for sc in sidecars if sc.health is not None] - if not health_sidecars: - return "# (no health probes)" - lines = [] - for sc in health_sidecars: - h = sc.health - if h is None: - continue # type narrowing guard (already filtered above) - timeout = h.timeout_sec - port = h.port - lines.append(f'echo "[rock-compose] health probe: {sc.name}:{port} (timeout {timeout}s)"') - lines.append("_rock_health_ok=0") - lines.append(f"for _i in $(seq 1 {timeout}); do") - lines.append( - f" if docker run --rm --network rock_compose_$$ busybox " - f"nc -z {shlex.quote(sc.name)} {port} 2>/dev/null; then" - ) - lines.append(f' echo "[rock-compose] sidecar {sc.name} is ready"; _rock_health_ok=1; break; fi') - lines.append(" sleep 1") - lines.append("done") - lines.append( - f'if [ "$_rock_health_ok" -eq 0 ]; then echo "[rock-compose] ERROR: {sc.name} not ready after {timeout}s"; exit 1; fi' - ) - return "\n".join(lines) - - -def _render_main_container(main: MainContainerSpec) -> str: - """Render P5 main container execution (foreground, tee logs).""" - run_parts = [ - "docker run --name rock-main-$$", - "--network rock_compose_$$", - "--network-alias main", - "-v rock_shared_$$:/rock/shared", - # Mount the outer-sandbox scripts dir so the container can run main.sh. - # /rock/scripts lives in the OUTER sandbox; inner containers need it bind-mounted. - "-v /rock/scripts:/rock/scripts:ro", - ] - run_parts.extend(_resource_args(main.resources)) - if main.privileged: - run_parts.append("--privileged") - run_parts.extend(_env_args(main.env, main.secret_env)) - run_parts.extend(_volume_args(main.volume_mounts)) - run_parts.append(shlex.quote(main.image)) - # Main entrypoint is always bash /rock/scripts/main.sh (script/script_path uploaded there) - run_parts.append("bash /rock/scripts/main.sh") - - cmd = " \\\n ".join(run_parts) - return f"{cmd} 2>&1 | tee /rock/logs/main.log" - - -def _render_collect_sidecar_logs(sidecars: list[SidecarSpec]) -> str: - """Render P6 sidecar log collection and stop.""" - if not sidecars: - return "# (no sidecars to collect)" - lines = [] - for sc in sidecars: - lines.append(f"docker logs rock-sidecar-{sc.name}-$$ > /rock/logs/{sc.name}.log 2>&1 || true") - lines.append(f"docker stop rock-sidecar-{sc.name}-$$ 2>/dev/null || true") - return "\n".join(lines) - - -def _render_sidecar_cleanup(sidecars: list[SidecarSpec]) -> str: - """Render per-sidecar docker rm -f lines for cleanup_all.""" - if not sidecars: - return "" - lines = [f" docker rm -f rock-sidecar-{sc.name}-$$ 2>/dev/null || true" for sc in sidecars] - return "\n".join(lines) - - -def _render_oss_mirror_upload(config: ComposeJobConfig) -> str: - """Render P6 OSS mirror upload block (conditional).""" - mirror = config.environment.oss_mirror - if mirror is None or not mirror.enabled: - return "# (no oss mirror upload)" +def _render_registry_login(config: ComposeJobConfig) -> str: + """Render optional docker login block (conditional at runtime on $REGISTRY_USERNAME).""" return ( - 'echo "[rock-compose] uploading artifacts to OSS ..."\n' - 'ossutil cp /rock/logs/ "oss://$OSS_BUCKET/$ROCK_OSS_PREFIX/" \\\n' - " --recursive -f \\\n" - ' || echo "[rock-compose] oss upload failed (rc=$?), ignored" >&2' + 'if [ -n "${REGISTRY_USERNAME:-}" ]; then\n' + ' docker login "${REGISTRY_HOST:-}" -u "$REGISTRY_USERNAME" -p "$REGISTRY_PASSWORD" >/dev/null 2>&1 || true\n' + "fi" ) -def _render_registry_login(config: ComposeJobConfig) -> str: - """Render optional docker login using registry credentials from env.""" - env = config.environment.env - registry = env.get("ROCK_REGISTRY_HOST", "") - if not registry: - return "# (no registry login)" +def _render_oss_upload(config: ComposeJobConfig) -> str: + """Render P2 OSS artifact upload block (empty comment when not enabled).""" + mirror = config.environment.oss_mirror + if mirror is None or not mirror.enabled: + return "# (oss_mirror not enabled — skip upload)" return ( - f'echo "[rock-compose] logging in to registry {registry} ..."\n' - f"docker login {shlex.quote(registry)} \\\n" - f' -u "$ROCK_REGISTRY_USER" \\\n' - f' -p "$ROCK_REGISTRY_PASSWORD" || true' + 'echo "[runner] P2: uploading artifacts to OSS ..."\n' + 'ossutil cp "$LOG_DIR/" "oss://$OSS_BUCKET/$ROCK_OSS_PREFIX/" \\\n' + " --recursive -f \\\n" + ' || echo "[runner] oss upload failed (rc=$?), ignored" >&2' ) @@ -398,18 +115,18 @@ def _render_registry_login(config: ComposeJobConfig) -> str: class ComposeTrial(AbstractTrial): - """Docker Compose multi-container trial. + """Docker Compose multi-container trial (v2). + + Uploads the compose file + scripts, renders a minimal runner.sh that + bootstraps dockerd and delegates all orchestration to ``docker compose up``. - Manages inner container orchestration inside a DinD outer sandbox via - a generated ``runner.sh`` script. + setup() → _upload_files() + render/write runner.sh + build() → "bash /rock/runner.sh" + collect()→ exit_code != 0 → ExceptionInfo(ComposeMainServiceFailed); reads compose.log """ _config: ComposeJobConfig - def __init__(self, config: ComposeJobConfig): - super().__init__(config) - self._ossutil_ready: bool = False - def _oss_mirror_enabled(self) -> bool: mirror = self._config.environment.oss_mirror return mirror is not None and mirror.enabled @@ -417,7 +134,7 @@ def _oss_mirror_enabled(self) -> bool: def _prepare_oss_session_env(self) -> None: """Resolve OSS credentials and inject ROCK_* keys into environment.env. - Follows the same resolution order as BashTrial: + Resolution order (same as BashTrial): 1. OssMirrorConfig field 2. environment.env 3. host os.environ @@ -447,119 +164,68 @@ def _prepare_oss_session_env(self) -> None: ) async def on_sandbox_ready(self, sandbox: Sandbox) -> None: - """Backfill namespace/experiment_id then prepare OSS session env.""" + """Backfill namespace/experiment_id, then prepare OSS session env if needed.""" await super().on_sandbox_ready(sandbox) if self._oss_mirror_enabled(): self._prepare_oss_session_env() async def setup(self, sandbox: Sandbox) -> None: - """Upload files, write inline container scripts, and render runner.sh. + """Upload compose file + scripts; render and write minimal runner.sh. Deliberately does NOT call super().setup() to skip _setup_proxy — - DinD compose jobs manage their own networking and proxy sidecar. - However, we call _upload_files() directly to handle environment.uploads. + DinD compose jobs manage their own networking and proxy sidecar via + docker-compose.yaml. We call _upload_files() directly. """ - # Upload user-specified files (skip _setup_proxy) await self._upload_files(sandbox) - compose = self._config.compose - - # Write inline scripts for init/sidecar containers that have script= set - all_containers: list = list(compose.init_containers) + list(compose.sidecars) - for ctr in all_containers: - if ctr.script: - await sandbox.write_file_by_path(ctr.script, f"/rock/scripts/{ctr.name}.sh") - - # Write main container script (from top-level script / script_path) - main_script = self._config.script or "" - if self._config.script_path: - from pathlib import Path - - main_script = Path(self._config.script_path).read_text() - await sandbox.write_file_by_path(main_script, "/rock/scripts/main.sh") - - # Ensure ossutil available if any oss_deps declared - if compose.main.oss_deps: - self._ossutil_ready = await sandbox.fs.ensure_ossutil() - if not self._ossutil_ready: - logger.warning("ossutil install failed — OSS deps download may fail") - - # Render and write runner.sh - runner_content = self._render_runner_sh() - await sandbox.write_file_by_path(runner_content, "/rock/runner.sh") + runner = self._render_runner_sh() + await sandbox.write_file_by_path(runner, "/rock/runner.sh") + await sandbox.arun("chmod +x /rock/runner.sh") def build(self) -> str: return "bash /rock/runner.sh" async def collect(self, sandbox: Sandbox, output: str, exit_code: int) -> TrialResult: - """Collect result: on failure, capture container logs for diagnostics.""" - exception_info = None + """Collect result: on failure wrap exit code into ComposeMainServiceFailed. + + docker compose logs are captured by runner.sh's trap EXIT into + /rock/logs/compose.log — read it here for diagnostics. + """ + exc: ExceptionInfo | None = None if exit_code != 0: - exception_info = ExceptionInfo( - exception_type="ComposeMainContainerFailed", - exception_message=f"Compose main container exited with code {exit_code}", + exc = ExceptionInfo( + exception_type="ComposeMainServiceFailed", + exception_message=f"main service exited with {exit_code}", ) - compose = self._config.compose - - # Collect main container log - main_log_obs = await sandbox.arun("cat /rock/logs/main.log 2>/dev/null || true") - if main_log_obs.output: - logger.info("[rock-compose] main log:\n%s", main_log_obs.output) - - # Collect sidecar logs - for sc in compose.sidecars: - sc_log_obs = await sandbox.arun(f"cat /rock/logs/{sc.name}.log 2>/dev/null || true") - if sc_log_obs.output: - logger.info("[rock-compose] sidecar %s log:\n%s", sc.name, sc_log_obs.output) - - # Collect init container logs (best-effort, may not exist) - for ic in compose.init_containers: - ic_log_obs = await sandbox.arun(f"cat /rock/logs/{ic.name}.log 2>/dev/null || true") - if ic_log_obs.output: - logger.info("[rock-compose] init %s log:\n%s", ic.name, ic_log_obs.output) + compose_log_obs = await sandbox.arun("cat /rock/logs/compose.log 2>/dev/null || true") + if compose_log_obs.output: + logger.info("[compose-trial] compose log:\n%s", compose_log_obs.output) return TrialResult( task_name=self._config.job_name or "", - exception_info=exception_info, + exception_info=exc, raw_output=output, exit_code=exit_code, ) def _render_runner_sh(self) -> str: - """Render the complete runner.sh from the compose config. + """Render runner.sh from the template using str.replace only. - Uses str.replace on __PLACEHOLDER__ tokens — never str.format() — - to safely handle bash ${var}, ${PIPESTATUS[0]}, and {} literals. + Uses __PLACEHOLDER__ tokens — never str.format() — to safely handle + bash ${var}, ${PIPESTATUS[0]}, and {} literals inside the template. """ - compose = self._config.compose + runner = _RUNNER_TEMPLATE - runner = _RUNNER_SKELETON - - # P0 registry login + # P0: registry login (always rendered; conditional at runtime) runner = runner.replace("__REGISTRY_LOGIN__", _render_registry_login(self._config)) - # P1 OSS deps (from main container spec) - runner = runner.replace("__OSS_DEPS__", _render_oss_deps(compose.main.oss_deps)) - - # P2 init containers - runner = runner.replace("__INIT_CONTAINERS__", _render_init_containers(compose.init_containers)) - - # P3 sidecars - runner = runner.replace("__SIDECAR_CONTAINERS__", _render_sidecar_containers(compose.sidecars)) - - # P4 health probes - runner = runner.replace("__HEALTH_PROBES__", _render_health_probes(compose.sidecars)) - - # P5 main container - runner = runner.replace("__MAIN_CONTAINER__", _render_main_container(compose.main)) - - # P6 collect sidecar logs + OSS mirror upload - runner = runner.replace("__COLLECT_SIDECAR_LOGS__", _render_collect_sidecar_logs(compose.sidecars)) - runner = runner.replace("__OSS_MIRROR_UPLOAD__", _render_oss_mirror_upload(self._config)) + # P1: abort flag + abort_flag = "--abort-on-container-exit" if self._config.abort_on_container_exit else "" + runner = runner.replace("__ABORT_FLAG__", abort_flag) - # cleanup_all sidecar removal - runner = runner.replace("__SIDECAR_CLEANUP__", _render_sidecar_cleanup(compose.sidecars)) + # P2: OSS upload + runner = runner.replace("__PHASE2_OSS_UPLOAD__", _render_oss_upload(self._config)) return runner diff --git a/rock/sdk/job/config.py b/rock/sdk/job/config.py index 94545abc97..8543e65c21 100644 --- a/rock/sdk/job/config.py +++ b/rock/sdk/job/config.py @@ -87,8 +87,8 @@ def from_yaml(cls, path: str) -> JobConfig: except (ValidationError, ValueError) as exc: harbor_error = exc - # Compose: detected by the presence of a "compose" key in the YAML data. - if "compose" in data: + # Compose: detected by the presence of a "compose_file" key in the YAML data. + if "compose_file" in data: from rock.sdk.job.compose.config import ComposeJobConfig try: diff --git a/tests/unit/sdk/job/test_compose_config.py b/tests/unit/sdk/job/test_compose_config.py index 345d5128f5..4eafd868b5 100644 --- a/tests/unit/sdk/job/test_compose_config.py +++ b/tests/unit/sdk/job/test_compose_config.py @@ -1,623 +1,212 @@ -"""Tests for rock.sdk.job.compose.config — ComposeJobConfig and sub-models.""" +"""Tests for rock.sdk.job.compose.config — ComposeJobConfig v2 (standard docker-compose).""" from __future__ import annotations import textwrap -from unittest.mock import patch import pytest -from rock.sdk.job.compose.config import ( - ComposeJobConfig, - ComposeSpec, - HealthSpec, - InitContainerSpec, - MainContainerSpec, - OssDep, - ResourceSpec, - SecretEnvEntry, - SidecarSpec, - VolumeMount, -) -from rock.sdk.job.config import BashJobConfig, JobConfig +from rock.sdk.job.compose.config import ComposeJobConfig +from rock.sdk.job.config import JobConfig # --------------------------------------------------------------------------- -# ResourceSpec +# Inheritance # --------------------------------------------------------------------------- -class TestResourceSpec: - def test_defaults(self): - r = ResourceSpec() - assert r.cpus is None - assert r.memory is None - assert r.cpu_limit is None - assert r.memory_limit is None +class TestInheritance: + def test_inherits_job_config(self): + assert issubclass(ComposeJobConfig, JobConfig) - def test_all_fields(self): - r = ResourceSpec(cpus=4.0, memory="12g", cpu_limit=8.0, memory_limit="24g") - assert r.cpus == 4.0 - assert r.memory == "12g" - assert r.cpu_limit == 8.0 - assert r.memory_limit == "24g" + def test_does_not_inherit_bash_job_config(self): + from rock.sdk.job.config import BashJobConfig - def test_extra_forbid(self): - with pytest.raises(Exception): - ResourceSpec(cpus=1, unknown_field="x") + assert not issubclass(ComposeJobConfig, BashJobConfig) # --------------------------------------------------------------------------- -# VolumeMount +# compose_file — required & validation # --------------------------------------------------------------------------- -class TestVolumeMount: - def test_defaults(self): - v = VolumeMount(name="vol", mount_path="/data") - assert v.name == "vol" - assert v.mount_path == "/data" - assert v.main_mount_path is None - - def test_all_fields(self): - v = VolumeMount(name="vol", mount_path="/data", main_mount_path="/main-data") - assert v.main_mount_path == "/main-data" - - def test_extra_forbid(self): +class TestComposeFile: + def test_compose_file_required(self): + """compose_file is a required field; omitting it raises.""" with pytest.raises(Exception): - VolumeMount(name="v", mount_path="/p", extra_field="x") - - -# --------------------------------------------------------------------------- -# SecretEnvEntry -# --------------------------------------------------------------------------- + ComposeJobConfig() + def test_compose_file_empty_raises(self): + """compose_file must not be empty string.""" + with pytest.raises(Exception, match="compose_file"): + ComposeJobConfig(compose_file="") -class TestSecretEnvEntry: - def test_fields(self): - s = SecretEnvEntry(secret_name="my-secret", secret_key="api-key") - assert s.secret_name == "my-secret" - assert s.secret_key == "api-key" + def test_compose_file_valid(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") + assert cfg.compose_file == "./docker-compose.yaml" - def test_extra_forbid(self): - with pytest.raises(Exception): - SecretEnvEntry(secret_name="s", secret_key="k", bad="x") + def test_compose_file_absolute_path_allowed(self): + cfg = ComposeJobConfig(compose_file="/rock/compose/docker-compose.yaml") + assert cfg.compose_file == "/rock/compose/docker-compose.yaml" # --------------------------------------------------------------------------- -# OssDep +# abort_on_container_exit — default True # --------------------------------------------------------------------------- -class TestOssDep: - def test_defaults(self): - o = OssDep(key="path/to/obj", target_path="/data/") - assert o.key == "path/to/obj" - assert o.target_path == "/data/" - assert o.extract is False +class TestAbortOnContainerExit: + def test_default_true(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") + assert cfg.abort_on_container_exit is True - def test_extract_true(self): - o = OssDep(key="archive.tar.gz", target_path="/out/", extract=True) - assert o.extract is True + def test_explicit_false(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml", abort_on_container_exit=False) + assert cfg.abort_on_container_exit is False - def test_extra_forbid(self): - with pytest.raises(Exception): - OssDep(key="k", target_path="/t", bad="x") + def test_explicit_true(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml", abort_on_container_exit=True) + assert cfg.abort_on_container_exit is True # --------------------------------------------------------------------------- -# HealthSpec +# extra="forbid" — v1 fields must be rejected # --------------------------------------------------------------------------- -class TestHealthSpec: - def test_defaults(self): - h = HealthSpec(port=8080) - assert h.port == 8080 - assert h.timeout_sec == 60 - - def test_custom_timeout(self): - h = HealthSpec(port=9090, timeout_sec=120) - assert h.timeout_sec == 120 - - def test_extra_forbid(self): +class TestExtraForbid: + def test_v1_compose_field_rejected(self): + """v1-era top-level 'compose' block must be rejected.""" with pytest.raises(Exception): - HealthSpec(port=80, extra="x") - - -# --------------------------------------------------------------------------- -# InitContainerSpec / SidecarSpec (via _ContainerBase) -# --------------------------------------------------------------------------- - - -class TestContainerBase: - """Tests for _ContainerBase validators via InitContainerSpec (concrete subclass).""" - - def test_valid_name_pattern(self): - c = InitContainerSpec(name="my-container", image="ubuntu:22.04") - assert c.name == "my-container" - - def test_valid_name_alphanumeric_only(self): - c = InitContainerSpec(name="abc123", image="ubuntu:22.04") - assert c.name == "abc123" - - def test_invalid_name_uppercase(self): - with pytest.raises(Exception, match="invalid"): - InitContainerSpec(name="MyContainer", image="ubuntu:22.04") - - def test_invalid_name_starts_with_dash(self): - with pytest.raises(Exception, match="invalid"): - InitContainerSpec(name="-bad", image="ubuntu:22.04") - - def test_invalid_name_underscore(self): - with pytest.raises(Exception, match="invalid"): - InitContainerSpec(name="my_container", image="ubuntu:22.04") - - def test_defaults(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04") - assert c.script is None - assert c.script_path is None - assert c.command is None - assert c.args is None - assert c.env == {} - assert c.secret_env == {} - assert c.resources is None - assert c.privileged is False - assert c.volume_mounts == [] - - def test_script_only(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04", script="echo hi") - assert c.script == "echo hi" - - def test_script_path_only(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04", script_path="/run.sh") - assert c.script_path == "/run.sh" - - def test_command_only(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04", command=["dockerd"]) - assert c.command == ["dockerd"] - - def test_command_with_args(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04", command=["dockerd"], args=["--tls=false"]) - assert c.command == ["dockerd"] - assert c.args == ["--tls=false"] - - def test_entrypoint_exclusive_script_and_script_path(self): - with pytest.raises(Exception, match="mutually exclusive"): - InitContainerSpec(name="init", image="ubuntu:22.04", script="echo hi", script_path="/run.sh") - - def test_entrypoint_exclusive_script_and_command(self): - with pytest.raises(Exception, match="mutually exclusive"): - InitContainerSpec(name="init", image="ubuntu:22.04", script="echo hi", command=["bash"]) - - def test_entrypoint_exclusive_all_three(self): - with pytest.raises(Exception, match="mutually exclusive"): - InitContainerSpec( - name="init", - image="ubuntu:22.04", - script="echo hi", - script_path="/run.sh", - command=["bash"], + ComposeJobConfig( + compose_file="./docker-compose.yaml", + compose={"main": {"image": "main:latest"}}, ) - def test_args_without_command_raises(self): - with pytest.raises(Exception, match="args must be used together with command"): - InitContainerSpec(name="init", image="ubuntu:22.04", args=["--flag"]) - - def test_extra_forbid(self): - with pytest.raises(Exception): - InitContainerSpec(name="init", image="ubuntu:22.04", unknown="x") - - def test_privileged_default_false(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04") - assert c.privileged is False - - def test_privileged_true(self): - c = InitContainerSpec(name="init", image="ubuntu:22.04", privileged=True) - assert c.privileged is True - - -class TestSidecarSpec: - def test_defaults(self): - s = SidecarSpec(name="proxy", image="ubuntu:22.04") - assert s.health is None - assert s.volume_mounts == [] - - def test_health_field(self): - s = SidecarSpec(name="proxy", image="ubuntu:22.04", health=HealthSpec(port=8082)) - assert s.health.port == 8082 - - def test_extra_forbid(self): + def test_script_path_rejected(self): + """script_path (BashJobConfig field) must be rejected.""" with pytest.raises(Exception): - SidecarSpec(name="s", image="img", bad_field="x") - - -# --------------------------------------------------------------------------- -# MainContainerSpec -# --------------------------------------------------------------------------- - - -class TestMainContainerSpec: - def test_required_image(self): - m = MainContainerSpec(image="myregistry/main:latest") - assert m.image == "myregistry/main:latest" - - def test_defaults(self): - m = MainContainerSpec(image="img") - assert m.resources is None - assert m.env == {} - assert m.secret_env == {} - assert m.oss_deps == [] - assert m.volume_mounts == [] - assert m.privileged is False - - def test_extra_forbid(self): - with pytest.raises(Exception): - MainContainerSpec(image="img", unknown="x") - - def test_privileged(self): - m = MainContainerSpec(image="img", privileged=True) - assert m.privileged is True - - -# --------------------------------------------------------------------------- -# ComposeSpec -# --------------------------------------------------------------------------- - - -class TestComposeSpec: - def test_minimal(self): - cs = ComposeSpec(main=MainContainerSpec(image="main:latest")) - assert cs.init_containers == [] - assert cs.sidecars == [] - - def test_unique_names_ok(self): - cs = ComposeSpec( - main=MainContainerSpec(image="main:latest"), - init_containers=[InitContainerSpec(name="init1", image="img")], - sidecars=[SidecarSpec(name="sidecar1", image="img")], - ) - assert len(cs.init_containers) == 1 - assert len(cs.sidecars) == 1 - - def test_duplicate_names_raises(self): - with pytest.raises(Exception, match="unique"): - ComposeSpec( - main=MainContainerSpec(image="main:latest"), - init_containers=[InitContainerSpec(name="dup", image="img")], - sidecars=[SidecarSpec(name="dup", image="img")], + ComposeJobConfig( + compose_file="./docker-compose.yaml", + script_path="./main.sh", ) - def test_duplicate_among_sidecars_raises(self): - with pytest.raises(Exception, match="unique"): - ComposeSpec( - main=MainContainerSpec(image="main:latest"), - sidecars=[ - SidecarSpec(name="dup", image="img"), - SidecarSpec(name="dup", image="img2"), - ], + def test_script_rejected(self): + """script (BashJobConfig field) must be rejected.""" + with pytest.raises(Exception): + ComposeJobConfig( + compose_file="./docker-compose.yaml", + script="echo hello", ) - def test_extra_forbid(self): + def test_unknown_field_rejected(self): with pytest.raises(Exception): - ComposeSpec(main=MainContainerSpec(image="img"), unknown="x") + ComposeJobConfig(compose_file="./docker-compose.yaml", unknown_field="x") # --------------------------------------------------------------------------- -# ComposeJobConfig +# Inherited JobConfig fields # --------------------------------------------------------------------------- -class TestComposeJobConfig: - def _minimal_compose(self): - return {"main": {"image": "main:latest"}} - - def test_inherits_bash_job_config(self): - assert issubclass(ComposeJobConfig, BashJobConfig) - - def test_inherits_job_config(self): - assert issubclass(ComposeJobConfig, JobConfig) - - def test_minimal_config(self): - cfg = ComposeJobConfig(compose={"main": {"image": "main:latest"}}) - assert isinstance(cfg.compose, ComposeSpec) - assert cfg.compose.main.image == "main:latest" - +class TestInheritedFields: def test_job_name_default_datetime(self): import re - cfg = ComposeJobConfig(compose={"main": {"image": "main:latest"}}) + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") assert re.match(r"\d{4}-\d{2}-\d{2}__\d{2}-\d{2}-\d{2}", cfg.job_name) - def test_explicit_job_name_preserved(self): - cfg = ComposeJobConfig(job_name="my-job", compose={"main": {"image": "main:latest"}}) - assert cfg.job_name == "my-job" + def test_explicit_job_name(self): + cfg = ComposeJobConfig(job_name="my-compose-job", compose_file="./docker-compose.yaml") + assert cfg.job_name == "my-compose-job" - def test_extra_forbid(self): - with pytest.raises(Exception): - ComposeJobConfig(compose={"main": {"image": "img"}}, unknown_field="x") + def test_namespace_default_none(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") + assert cfg.namespace is None - def test_compose_required(self): - with pytest.raises(Exception): - ComposeJobConfig() + def test_namespace_set(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml", namespace="xrl-sandbox") + assert cfg.namespace == "xrl-sandbox" - def test_proxy_conflict_check_raises(self): - with pytest.raises(Exception, match="proxy"): - ComposeJobConfig( - compose={ - "main": {"image": "main:latest"}, - "sidecars": [{"name": "proxy", "image": "proxy:latest"}], - }, - environment={ - "proxy": {"enabled": True}, - }, - ) + def test_timeout_default(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") + assert cfg.timeout == 7200 - def test_proxy_conflict_check_ok_when_proxy_disabled(self): - """No conflict when environment.proxy.enabled=False.""" - - # Should not raise - cfg = ComposeJobConfig( - compose={ - "main": {"image": "main:latest"}, - "sidecars": [{"name": "proxy", "image": "proxy:latest"}], - }, - environment={ - "proxy": {"enabled": False}, - }, - ) - assert cfg is not None - - def test_proxy_conflict_check_ok_when_no_proxy_sidecar(self): - """No conflict when sidecar is not named 'proxy'.""" - - cfg = ComposeJobConfig( - compose={ - "main": {"image": "main:latest"}, - "sidecars": [{"name": "notproxy", "image": "proxy:latest"}], - }, - environment={ - "proxy": {"enabled": True}, - }, - ) - assert cfg is not None + def test_timeout_custom(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml", timeout=3600) + assert cfg.timeout == 3600 - def test_resource_budget_check_logs_warning_when_over(self): - """_resource_budget_check logs a warning when inner cpus exceed outer.""" - import rock.sdk.job.compose.config as compose_module + def test_labels_default_empty(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") + assert cfg.labels == {} - with patch.object(compose_module.logger, "warning") as mock_warn: - ComposeJobConfig( - environment={"cpus": 2, "memory": "4g"}, - compose={ - "main": {"image": "main:latest", "resources": {"cpus": 4}}, - "sidecars": [{"name": "side", "image": "side:latest", "resources": {"cpus": 4}}], - }, - ) - # Warning should have been called (inner total cpus > outer) - assert mock_warn.called + def test_labels_set(self): + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml", labels={"team": "xrl", "task": "swe"}) + assert cfg.labels == {"team": "xrl", "task": "swe"} - def test_resource_budget_check_no_warning_within_budget(self): - """No warning when inner resources are within outer sandbox budget.""" - import rock.sdk.job.compose.config as compose_module + def test_environment_accessible(self): + from rock.sdk.envhub import EnvironmentConfig - with patch.object(compose_module.logger, "warning") as mock_warn: - ComposeJobConfig( - environment={"cpus": 16, "memory": "32g"}, - compose={ - "main": {"image": "main:latest", "resources": {"cpus": 4, "memory": "8g"}}, - "sidecars": [{"name": "side", "image": "side:latest", "resources": {"cpus": 1, "memory": "2g"}}], - }, - ) - assert not mock_warn.called - - def test_full_config(self): - """Test a full ComposeJobConfig round-trip.""" - cfg = ComposeJobConfig( - job_name="test-job", - compose={ - "main": { - "image": "main:latest", - "resources": {"cpus": 4, "memory": "16g"}, - "env": {"KEY": "VALUE"}, - "oss_deps": [{"key": "path/to/obj", "target_path": "/data/", "extract": True}], - }, - "init_containers": [ - { - "name": "init1", - "image": "init:latest", - "script_path": "/init.sh", - "volume_mounts": [{"name": "vol", "mount_path": "/data", "main_mount_path": "/main-data"}], - } - ], - "sidecars": [ - { - "name": "sidecar1", - "image": "sidecar:latest", - "script": "echo hello", - "health": {"port": 8080}, - } - ], - }, - ) - assert cfg.job_name == "test-job" - assert cfg.compose.main.image == "main:latest" - assert cfg.compose.main.resources.cpus == 4 - assert cfg.compose.main.resources.memory == "16g" - assert cfg.compose.main.env == {"KEY": "VALUE"} - assert cfg.compose.main.oss_deps[0].key == "path/to/obj" - assert cfg.compose.main.oss_deps[0].extract is True - assert cfg.compose.init_containers[0].name == "init1" - assert cfg.compose.init_containers[0].script_path == "/init.sh" - assert cfg.compose.init_containers[0].volume_mounts[0].name == "vol" - assert cfg.compose.sidecars[0].name == "sidecar1" - assert cfg.compose.sidecars[0].health.port == 8080 + cfg = ComposeJobConfig(compose_file="./docker-compose.yaml") + assert isinstance(cfg.environment, EnvironmentConfig) # --------------------------------------------------------------------------- -# ComposeJobConfig.from_yaml +# from_yaml # --------------------------------------------------------------------------- -class TestComposeJobConfigFromYaml: +class TestFromYaml: def test_from_yaml_minimal(self, tmp_path): - content = textwrap.dedent( - """\ - compose: - main: - image: main:latest - """ - ) + content = "compose_file: ./docker-compose.yaml\n" p = tmp_path / "cfg.yaml" p.write_text(content) cfg = ComposeJobConfig.from_yaml(str(p)) assert isinstance(cfg, ComposeJobConfig) - assert cfg.compose.main.image == "main:latest" - - def test_from_yaml_file_not_found(self): - with pytest.raises(FileNotFoundError): - ComposeJobConfig.from_yaml("/nonexistent/path.yaml") + assert cfg.compose_file == "./docker-compose.yaml" def test_from_yaml_full(self, tmp_path): content = textwrap.dedent( """\ - job_name: my-compose-job + job_name: swe-bench-job + namespace: xrl-sandbox timeout: 3600 - compose: - main: - image: myregistry/main:latest - resources: - cpus: 4 - memory: "16g" - env: - DATASET: my-dataset - oss_deps: - - key: path/to/archive.tar.gz - target_path: /data/ - extract: true - init_containers: - - name: init1 - image: init:latest - script_path: /init.sh - sidecars: - - name: proxy - image: proxy:latest - script_path: /proxy.sh - health: - port: 8082 - timeout_sec: 60 + compose_file: ./docker-compose.yaml + abort_on_container_exit: false + labels: + team: xrl + task: swe-bench + environment: + image: docker:27-dind + memory: "32g" + cpus: 8 """ ) p = tmp_path / "cfg.yaml" p.write_text(content) cfg = ComposeJobConfig.from_yaml(str(p)) - assert cfg.job_name == "my-compose-job" + assert cfg.job_name == "swe-bench-job" + assert cfg.namespace == "xrl-sandbox" assert cfg.timeout == 3600 - assert cfg.compose.main.resources.cpus == 4 - assert cfg.compose.main.oss_deps[0].extract is True - assert cfg.compose.init_containers[0].name == "init1" - assert cfg.compose.sidecars[0].name == "proxy" - assert cfg.compose.sidecars[0].health.port == 8082 - - -# --------------------------------------------------------------------------- -# JobConfig.from_yaml — three-way auto-detection -# --------------------------------------------------------------------------- - - -class TestJobConfigFromYamlThreeWay: - """JobConfig.from_yaml dispatches to the correct subclass including ComposeJobConfig.""" - - def test_bash_script_detected(self, tmp_path): - yaml_content = "script: echo hello\ntimeout: 60\n" - p = tmp_path / "cfg.yaml" - p.write_text(yaml_content) - cfg = JobConfig.from_yaml(str(p)) - assert isinstance(cfg, BashJobConfig) - assert not isinstance(cfg, ComposeJobConfig) + assert cfg.compose_file == "./docker-compose.yaml" + assert cfg.abort_on_container_exit is False + assert cfg.labels == {"team": "xrl", "task": "swe-bench"} - def test_harbor_experiment_id_detected(self, tmp_path): - from rock.sdk.bench.models.job.config import HarborJobConfig - - yaml_content = "experiment_id: exp-1\nagents:\n - name: my-agent\n" - p = tmp_path / "cfg.yaml" - p.write_text(yaml_content) - cfg = JobConfig.from_yaml(str(p)) - assert isinstance(cfg, HarborJobConfig) - - def test_compose_key_detected(self, tmp_path): - yaml_content = textwrap.dedent( - """\ - compose: - main: - image: main:latest - """ - ) - p = tmp_path / "cfg.yaml" - p.write_text(yaml_content) - cfg = JobConfig.from_yaml(str(p)) - assert isinstance(cfg, ComposeJobConfig) - - def test_compose_with_script_path(self, tmp_path): - """script_path at top-level (from BashJobConfig) + compose block → ComposeJobConfig.""" - yaml_content = textwrap.dedent( - """\ - script_path: ./main.sh - compose: - main: - image: main:latest - """ - ) - p = tmp_path / "cfg.yaml" - p.write_text(yaml_content) - cfg = JobConfig.from_yaml(str(p)) - assert isinstance(cfg, ComposeJobConfig) - assert cfg.script_path == "./main.sh" + def test_from_yaml_file_not_found(self): + with pytest.raises(FileNotFoundError): + ComposeJobConfig.from_yaml("/nonexistent/path.yaml") - def test_compose_with_full_config(self, tmp_path): - yaml_content = textwrap.dedent( - """\ - job_name: swe-job - script_path: ./main.sh - timeout: 7200 - compose: - main: - image: myregistry/main:latest - resources: - cpus: 4 - memory: "16g" - sidecars: - - name: proxy - image: proxy:latest - script_path: /proxy.sh - """ - ) + def test_from_yaml_missing_compose_file_raises(self, tmp_path): + """YAML without compose_file should fail.""" + content = "job_name: my-job\ntimeout: 300\n" p = tmp_path / "cfg.yaml" - p.write_text(yaml_content) - cfg = JobConfig.from_yaml(str(p)) - assert isinstance(cfg, ComposeJobConfig) - assert cfg.job_name == "swe-job" - assert cfg.compose.sidecars[0].name == "proxy" + p.write_text(content) + with pytest.raises(Exception): + ComposeJobConfig.from_yaml(str(p)) - def test_invalid_compose_raises_value_error(self, tmp_path): - """A YAML with 'compose' key but an invalid compose structure raises a descriptive error.""" - yaml_content = textwrap.dedent( - """\ - compose: - main: {} - """ - ) - # compose.main is missing the required 'image' field → ComposeJobConfig fails - # BashJobConfig rejects 'compose' (extra=forbid) → also fails + def test_from_yaml_empty_compose_file_raises(self, tmp_path): + """YAML with compose_file: '' should fail.""" + content = "compose_file: ''\n" p = tmp_path / "cfg.yaml" - p.write_text(yaml_content) - with pytest.raises(ValueError, match="does not match any known job type"): - JobConfig.from_yaml(str(p)) - - def test_bash_direct_from_yaml_unaffected(self, tmp_path): - """BashJobConfig.from_yaml() still works independently.""" - yaml_content = "script: ls -la\ntimeout: 120\n" - p = tmp_path / "bash.yaml" - p.write_text(yaml_content) - cfg = BashJobConfig.from_yaml(str(p)) - assert isinstance(cfg, BashJobConfig) - assert cfg.script == "ls -la" + p.write_text(content) + with pytest.raises(Exception): + ComposeJobConfig.from_yaml(str(p)) diff --git a/tests/unit/sdk/job/test_config.py b/tests/unit/sdk/job/test_config.py index 631d576eef..709ea358f7 100644 --- a/tests/unit/sdk/job/test_config.py +++ b/tests/unit/sdk/job/test_config.py @@ -668,6 +668,67 @@ def test_harbor_from_yaml_direct_still_works(self, tmp_path): assert isinstance(cfg, HarborJobConfig) assert cfg.n_attempts == 2 + def test_auto_detect_compose_by_compose_file(self, tmp_path): + """compose_file present → ComposeJobConfig detected.""" + from rock.sdk.job.compose.config import ComposeJobConfig + + yaml_content = "compose_file: ./docker-compose.yaml\n" + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, ComposeJobConfig) + assert cfg.compose_file == "./docker-compose.yaml" + + def test_auto_detect_compose_no_compose_file_not_detected(self, tmp_path): + """Without compose_file key, BashJobConfig is returned instead.""" + yaml_content = "script: echo hello\ntimeout: 60\n" + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + from rock.sdk.job.compose.config import ComposeJobConfig + + assert not isinstance(cfg, ComposeJobConfig) + assert isinstance(cfg, BashJobConfig) + + def test_auto_detect_compose_with_full_config(self, tmp_path): + """compose_file + other job fields → ComposeJobConfig with all fields populated.""" + import textwrap + + from rock.sdk.job.compose.config import ComposeJobConfig + + yaml_content = textwrap.dedent( + """\ + job_name: swe-job + timeout: 7200 + compose_file: ./docker-compose.yaml + abort_on_container_exit: false + """ + ) + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + cfg = JobConfig.from_yaml(str(p)) + assert isinstance(cfg, ComposeJobConfig) + assert cfg.job_name == "swe-job" + assert cfg.compose_file == "./docker-compose.yaml" + assert cfg.abort_on_container_exit is False + + def test_auto_detect_compose_invalid_compose_file_raises(self, tmp_path): + """compose_file key present but empty → validation error surfaced.""" + yaml_content = "compose_file: ''\n" + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + with pytest.raises(ValueError, match="does not match any known job type"): + JobConfig.from_yaml(str(p)) + + def test_bash_with_compose_file_rejected(self, tmp_path): + """compose_file is not a Bash/Harbor field; mixing with bash-only fields raises.""" + yaml_content = "compose_file: ./docker-compose.yaml\nscript: echo hello\n" + p = tmp_path / "cfg.yaml" + p.write_text(yaml_content) + # compose_file triggers ComposeJobConfig path; script is extra→forbidden + with pytest.raises(ValueError, match="does not match any known job type"): + JobConfig.from_yaml(str(p)) + # --------------------------------------------------------------------------- # OssMirrorConfig on base EnvironmentConfig diff --git a/tests/unit/sdk/job/test_trial_compose.py b/tests/unit/sdk/job/test_trial_compose.py index 8320ba2d84..ecc4a78a6d 100644 --- a/tests/unit/sdk/job/test_trial_compose.py +++ b/tests/unit/sdk/job/test_trial_compose.py @@ -1,4 +1,4 @@ -"""Tests for rock.sdk.job.compose.trial — ComposeTrial.""" +"""Tests for rock.sdk.job.compose.trial — ComposeTrial v2 (docker compose up).""" from __future__ import annotations @@ -6,37 +6,29 @@ from rock.sdk.envhub import EnvironmentConfig from rock.sdk.envhub.config import OssMirrorConfig -from rock.sdk.job.compose.config import ( - ComposeJobConfig, - ComposeSpec, - HealthSpec, - InitContainerSpec, - MainContainerSpec, - OssDep, - ResourceSpec, - SecretEnvEntry, - SidecarSpec, -) -from rock.sdk.job.compose.trial import ( - ComposeTrial, - _entrypoint_args, - _env_args, - _render_oss_deps, - _resource_args, -) +from rock.sdk.job.result import TrialResult from rock.sdk.job.trial.registry import _create_trial -# ── Fixtures ────────────────────────────────────────────────────────────────── +def _make_config( + compose_file: str = "./docker-compose.yaml", + abort_on_container_exit: bool = True, + job_name: str = "test-job", + oss_mirror: OssMirrorConfig | None = None, +): + """Build a minimal v2 ComposeJobConfig.""" + # Import here to allow config-dev to have their impl ready first + from rock.sdk.job.compose.config import ComposeJobConfig -def _minimal_main(image="ubuntu:22.04") -> MainContainerSpec: - return MainContainerSpec(image=image) + env_kwargs = {} + if oss_mirror is not None: + env_kwargs["oss_mirror"] = oss_mirror - -def _minimal_compose(script="echo hello") -> ComposeJobConfig: return ComposeJobConfig( - script=script, - compose=ComposeSpec(main=_minimal_main()), + job_name=job_name, + compose_file=compose_file, + abort_on_container_exit=abort_on_container_exit, + environment=EnvironmentConfig(**env_kwargs), ) @@ -49,7 +41,6 @@ def _mock_sandbox() -> AsyncMock: obs.output = "" sb.arun = AsyncMock(return_value=obs) sb.fs = AsyncMock() - sb.fs.ensure_ossutil = AsyncMock(return_value=True) sb.fs.upload_dir = AsyncMock(return_value=MagicMock(exit_code=0)) sb.write_file_by_path = AsyncMock(return_value=MagicMock(success=True)) return sb @@ -60,7 +51,9 @@ def _mock_sandbox() -> AsyncMock: class TestRegistration: def test_compose_config_creates_compose_trial(self): - cfg = _minimal_compose() + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config() trial = _create_trial(cfg) assert isinstance(trial, ComposeTrial) @@ -70,532 +63,245 @@ def test_compose_config_creates_compose_trial(self): class TestBuild: def test_build_returns_bash_runner(self): - cfg = _minimal_compose() + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config() trial = ComposeTrial(cfg) assert trial.build() == "bash /rock/runner.sh" -# ── setup() ─────────────────────────────────────────────────────────────────── +# ── runner.sh rendering ─────────────────────────────────────────────────────── -class TestSetup: - async def test_setup_writes_runner_sh(self): - cfg = _minimal_compose() +class TestRunnerRendering: + def _get_runner(self, **kwargs) -> str: + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config(**kwargs) trial = ComposeTrial(cfg) - sb = _mock_sandbox() + return trial._render_runner_sh() - await trial.setup(sb) + def test_runner_contains_docker_compose_up(self): + runner = self._get_runner() + assert "docker compose" in runner + assert "up" in runner - # write_file_by_path should have been called with /rock/runner.sh - paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] - assert "/rock/runner.sh" in paths + def test_runner_contains_exit_code_from_main(self): + runner = self._get_runner() + assert "--exit-code-from main" in runner - async def test_setup_writes_main_script(self): - cfg = _minimal_compose(script="echo main-script") - trial = ComposeTrial(cfg) - sb = _mock_sandbox() + def test_abort_flag_when_abort_on_container_exit_true(self): + runner = self._get_runner(abort_on_container_exit=True) + assert "--abort-on-container-exit" in runner - await trial.setup(sb) + def test_no_abort_flag_when_abort_on_container_exit_false(self): + runner = self._get_runner(abort_on_container_exit=False) + assert "--abort-on-container-exit" not in runner - # main.sh should be written - paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] - assert "/rock/scripts/main.sh" in paths - - # Find main.sh content - for call in sb.write_file_by_path.call_args_list: - if call.args[1] == "/rock/scripts/main.sh": - assert "echo main-script" in call.args[0] - break - - async def test_setup_writes_inline_init_script(self): - cfg = ComposeJobConfig( - script="echo main", - compose=ComposeSpec( - main=_minimal_main(), - init_containers=[InitContainerSpec(name="setup", image="alpine", script="echo setup-init")], - ), - ) - trial = ComposeTrial(cfg) - sb = _mock_sandbox() + def test_compose_file_path_in_runner(self): + """Runner must reference the fixed in-sandbox path for compose file.""" + runner = self._get_runner() + assert "/rock/compose/docker-compose.yaml" in runner - await trial.setup(sb) + def test_p0_starts_dockerd_with_kata_fixes(self): + """P0 must start dockerd with kata-environment fixes: + explicit PATH (so nohup'd dockerd finds containerd) and + DOCKER_IGNORE_BR_NETFILTER_ERROR=1 (kata guest lacks br_netfilter).""" + runner = self._get_runner() + assert "nohup dockerd" in runner + assert "DOCKER_IGNORE_BR_NETFILTER_ERROR=1" in runner + assert "PATH=/usr/local/bin" in runner - paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] - assert "/rock/scripts/setup.sh" in paths + def test_pipestatus_preserved(self): + """str.replace approach must keep ${PIPESTATUS[0]} intact (not broken by str.format).""" + runner = self._get_runner() + assert "${PIPESTATUS[0]}" in runner - async def test_setup_runner_contains_docker_network_create(self): - cfg = _minimal_compose() - trial = ComposeTrial(cfg) - sb = _mock_sandbox() + def test_cleanup_trap_runs_compose_logs(self): + """trap EXIT must capture docker compose logs.""" + runner = self._get_runner() + assert "docker compose" in runner + assert "logs" in runner + assert "compose.log" in runner + + def test_cleanup_trap_runs_compose_down(self): + """trap EXIT must tear down compose stack.""" + runner = self._get_runner() + assert "docker compose" in runner + assert "down" in runner + + def test_no_oss_upload_when_mirror_disabled(self): + runner = self._get_runner() + # Should not contain ossutil commands when oss_mirror not enabled + assert "ossutil" not in runner + + def test_oss_upload_when_mirror_enabled(self): + mirror = OssMirrorConfig( + enabled=True, + oss_bucket="my-bucket", + oss_endpoint="oss-cn-hangzhou-internal.aliyuncs.com", + oss_region="cn-hangzhou", + ) + runner = self._get_runner(oss_mirror=mirror) + assert "ossutil" in runner - await trial.setup(sb) + def test_optional_registry_login_rendered(self): + """Registry login block must be present (even if conditional at runtime).""" + runner = self._get_runner() + # Must contain conditional registry login + assert "REGISTRY_USERNAME" in runner + assert "docker login" in runner - runner_content = None - for call in sb.write_file_by_path.call_args_list: - if call.args[1] == "/rock/runner.sh": - runner_content = call.args[0] - break + def test_runner_shebang(self): + runner = self._get_runner() + assert runner.startswith("#!/bin/bash") - assert runner_content is not None - assert "docker network create rock_compose_$$" in runner_content - assert "main.sh" in runner_content - async def test_setup_runner_contains_pipestatus(self): - """${PIPESTATUS[0]} must be present verbatim (not broken by str.format).""" - cfg = _minimal_compose() - trial = ComposeTrial(cfg) - sb = _mock_sandbox() +# ── setup() ─────────────────────────────────────────────────────────────────── - await trial.setup(sb) - runner_content = None - for call in sb.write_file_by_path.call_args_list: - if call.args[1] == "/rock/runner.sh": - runner_content = call.args[0] - break - - assert runner_content is not None - assert "${PIPESTATUS[0]}" in runner_content - - async def test_setup_ensures_ossutil_when_oss_deps(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=MainContainerSpec( - image="ubuntu:22.04", - oss_deps=[OssDep(key="oss://bucket/data.tar.gz", target_path="/data")], - ) - ), - ) +class TestSetup: + async def test_setup_writes_runner_sh(self): + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config() trial = ComposeTrial(cfg) sb = _mock_sandbox() await trial.setup(sb) - sb.fs.ensure_ossutil.assert_called_once() + paths = [call.args[1] for call in sb.write_file_by_path.call_args_list] + assert "/rock/runner.sh" in paths + + async def test_setup_calls_upload_files(self): + """setup() must call _upload_files to handle environment.uploads.""" + from rock.sdk.job.compose.trial import ComposeTrial - async def test_setup_no_ossutil_when_no_oss_deps(self): - cfg = _minimal_compose() + cfg = _make_config() trial = ComposeTrial(cfg) sb = _mock_sandbox() - await trial.setup(sb) + # Patch _upload_files to track calls + upload_called = [] - sb.fs.ensure_ossutil.assert_not_called() + async def fake_upload(sandbox): + upload_called.append(True) - async def test_setup_reads_script_path(self, tmp_path): - script_file = tmp_path / "main.sh" - script_file.write_text("echo from-file") - - cfg = ComposeJobConfig( - script_path=str(script_file), - compose=ComposeSpec(main=_minimal_main()), - ) - trial = ComposeTrial(cfg) - sb = _mock_sandbox() + trial._upload_files = fake_upload await trial.setup(sb) - # Ensure content from file was written to /rock/scripts/main.sh - for call in sb.write_file_by_path.call_args_list: - if call.args[1] == "/rock/scripts/main.sh": - assert "echo from-file" in call.args[0] - break + assert len(upload_called) == 1 # ── collect() ──────────────────────────────────────────────────────────────── class TestCollect: - async def test_collect_success(self): - cfg = ComposeJobConfig( - script="echo hi", - job_name="myjob", - compose=ComposeSpec(main=_minimal_main()), - ) + async def test_collect_success_returns_trial_result(self): + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config(job_name="my-job") trial = ComposeTrial(cfg) sb = _mock_sandbox() result = await trial.collect(sb, output="ok\n", exit_code=0) + assert isinstance(result, TrialResult) assert result.exception_info is None - assert result.task_name == "myjob" + assert result.task_name == "my-job" assert result.status == "completed" assert result.exit_code == 0 - async def test_collect_failure_sets_compose_exception(self): - cfg = ComposeJobConfig( - script="exit 1", - job_name="myjob", - compose=ComposeSpec(main=_minimal_main()), - ) + async def test_collect_failure_sets_compose_main_service_failed(self): + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config(job_name="fail-job") trial = ComposeTrial(cfg) sb = _mock_sandbox() - result = await trial.collect(sb, output="", exit_code=1) + result = await trial.collect(sb, output="", exit_code=2) assert result.exception_info is not None - assert result.exception_info.exception_type == "ComposeMainContainerFailed" + assert result.exception_info.exception_type == "ComposeMainServiceFailed" + assert "2" in result.exception_info.exception_message assert result.status == "failed" - assert result.exit_code == 1 + assert result.exit_code == 2 + + async def test_collect_reads_compose_log(self): + from rock.sdk.job.compose.trial import ComposeTrial - async def test_collect_reads_main_log(self): - cfg = _minimal_compose() + cfg = _make_config() trial = ComposeTrial(cfg) sb = _mock_sandbox() obs = MagicMock() - obs.output = "some log output" + obs.output = "compose log content" sb.arun = AsyncMock(return_value=obs) result = await trial.collect(sb, output="", exit_code=0) - # arun should have been called with cat for main log calls = [str(call) for call in sb.arun.call_args_list] - assert any("main.log" in c for c in calls) + assert any("compose.log" in c for c in calls) assert result is not None - async def test_collect_reads_sidecar_log(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - sidecars=[SidecarSpec(name="proxy", image="nginx:latest")], - ), - ) + async def test_collect_exit_code_1_is_failure(self): + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config() trial = ComposeTrial(cfg) sb = _mock_sandbox() - obs = MagicMock() - obs.output = "proxy log" - sb.arun = AsyncMock(return_value=obs) - - await trial.collect(sb, output="", exit_code=0) - - calls = [str(call) for call in sb.arun.call_args_list] - assert any("proxy.log" in c for c in calls) + result = await trial.collect(sb, output="", exit_code=1) -# ── runner.sh rendering ─────────────────────────────────────────────────────── + assert result.exception_info is not None + assert result.exception_info.exception_type == "ComposeMainServiceFailed" + async def test_collect_returns_single_trial_result_not_list(self): + from rock.sdk.job.compose.trial import ComposeTrial -class TestRunnerRendering: - def _get_runner(self, cfg: ComposeJobConfig) -> str: + cfg = _make_config() trial = ComposeTrial(cfg) - return trial._render_runner_sh() - - def test_p0_starts_dockerd_with_kata_fixes(self): - """P0 must actively start dockerd with the fixes learned from real kata runs: - explicit PATH (so nohup'd dockerd finds containerd) and - DOCKER_IGNORE_BR_NETFILTER_ERROR=1 (kata guest lacks br_netfilter).""" - cfg = ComposeJobConfig(script="echo hi", compose=ComposeSpec(main=_minimal_main())) - runner = self._get_runner(cfg) - assert "nohup dockerd" in runner - assert "DOCKER_IGNORE_BR_NETFILTER_ERROR=1" in runner - assert "PATH=/usr/local/bin" in runner - - def test_main_mounts_scripts_dir(self): - """Inner main container must bind-mount the outer /rock/scripts so main.sh exists.""" - cfg = ComposeJobConfig(script="echo hi", compose=ComposeSpec(main=_minimal_main())) - runner = self._get_runner(cfg) - assert "-v /rock/scripts:/rock/scripts:ro" in runner - assert "bash /rock/scripts/main.sh" in runner - - def test_init_container_has_rm_flag(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - init_containers=[InitContainerSpec(name="init1", image="alpine", script="echo init")], - ), - ) - runner = self._get_runner(cfg) - assert "docker run --rm" in runner - assert "init1" in runner - # init containers also need the scripts dir mounted - assert "-v /rock/scripts:/rock/scripts:ro" in runner - - def test_sidecar_has_d_flag_and_network_alias(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - sidecars=[SidecarSpec(name="proxy", image="nginx:latest")], - ), - ) - runner = self._get_runner(cfg) - assert "docker run -d" in runner - assert "--network-alias proxy" in runner - - def test_health_probe_triggers_nc(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - sidecars=[SidecarSpec(name="proxy", image="nginx:latest", health=HealthSpec(port=8080))], - ), - ) - runner = self._get_runner(cfg) - assert "nc -z" in runner - assert "proxy" in runner - assert "8080" in runner - - def test_no_health_probe_when_no_health(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - sidecars=[SidecarSpec(name="proxy", image="nginx:latest")], - ), - ) - runner = self._get_runner(cfg) - assert "nc -z" not in runner - - def test_privileged_flag(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=MainContainerSpec(image="ubuntu:22.04", privileged=True), - ), - ) - runner = self._get_runner(cfg) - # main container section should have --privileged - assert "--privileged" in runner - - def test_init_privileged(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - init_containers=[InitContainerSpec(name="priv-init", image="alpine", privileged=True)], - ), - ) - runner = self._get_runner(cfg) - assert "--privileged" in runner - - def test_command_produces_entrypoint(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=_minimal_main(), - init_containers=[ - InitContainerSpec(name="custom", image="alpine", command=["sh"], args=["-c", "echo custom"]) - ], - ), - ) - runner = self._get_runner(cfg) - assert "--entrypoint" in runner - - def test_secret_env_rendered_as_shell_var_reference(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=MainContainerSpec( - image="ubuntu:22.04", - secret_env={"MY_SECRET": SecretEnvEntry(secret_name="my-secret", secret_key="key")}, - ) - ), - ) - runner = self._get_runner(cfg) - # Secret should be a shell variable reference, not the actual secret value - assert "${MY_SECRET}" in runner - assert "secret_key" not in runner - assert "my-secret" not in runner - - def test_oss_deps_extract_branch_generates_tar(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=MainContainerSpec( - image="ubuntu:22.04", - oss_deps=[OssDep(key="oss://bucket/data.tar.gz", target_path="/data", extract=True)], - ) - ), - ) - runner = self._get_runner(cfg) - assert "tar -xf" in runner - assert "ossutil cp" in runner - - def test_oss_deps_no_extract_no_tar(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=MainContainerSpec( - image="ubuntu:22.04", - oss_deps=[OssDep(key="oss://bucket/model.bin", target_path="/model/model.bin")], - ) - ), - ) - runner = self._get_runner(cfg) - assert "ossutil cp" in runner - assert "tar -xf" not in runner - - def test_pipestatus_preserved(self): - """str.replace approach must keep ${PIPESTATUS[0]} intact.""" - runner = self._get_runner(_minimal_compose()) - assert "${PIPESTATUS[0]}" in runner + sb = _mock_sandbox() - def test_main_script_path_in_runner(self): - runner = self._get_runner(_minimal_compose()) - assert "bash /rock/scripts/main.sh" in runner + result = await trial.collect(sb, output="ok", exit_code=0) - def test_resource_spec_cpus_in_runner(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec(main=MainContainerSpec(image="ubuntu:22.04", resources=ResourceSpec(cpus=4.0))), - ) - runner = self._get_runner(cfg) - assert "--cpus" in runner - assert "4.0" in runner - - def test_resource_spec_memory_in_runner(self): - cfg = ComposeJobConfig( - script="echo hi", - compose=ComposeSpec( - main=MainContainerSpec( - image="ubuntu:22.04", - resources=ResourceSpec(memory="8g", memory_limit="16g"), - ) - ), - ) - runner = self._get_runner(cfg) - assert "--memory-reservation" in runner - assert "--memory" in runner - - -# ── Helper function unit tests ──────────────────────────────────────────────── - - -class TestResourceArgs: - def test_none_returns_empty(self): - assert _resource_args(None) == [] - - def test_cpus(self): - r = ResourceSpec(cpus=2.0) - args = _resource_args(r) - assert "--cpus 2.0" in args - - def test_cpu_limit_overrides_cpus(self): - r = ResourceSpec(cpus=2.0, cpu_limit=4.0) - args = _resource_args(r) - assert "--cpus 4.0" in args - assert "--cpus 2.0" not in args - - def test_memory(self): - r = ResourceSpec(memory="4g") - args = _resource_args(r) - assert "--memory-reservation 4g" in args - - def test_memory_limit(self): - r = ResourceSpec(memory_limit="8g") - args = _resource_args(r) - assert "--memory 8g" in args - - -class TestEnvArgs: - def test_plain_env(self): - args = _env_args({"FOO": "bar"}, {}) - assert "-e FOO=bar" in args - - def test_secret_env_shell_var(self): - secret = SecretEnvEntry(secret_name="my-secret", secret_key="my-key") - args = _env_args({}, {"MY_SECRET": secret}) - # Must reference ${MY_SECRET} not the literal key value - env_arg = next(a for a in args if "MY_SECRET" in a) - assert "${MY_SECRET}" in env_arg - assert "my-key" not in env_arg - assert "my-secret" not in env_arg - - -class TestEntrypointArgs: - def test_command_sets_entrypoint(self): - spec = MagicMock() - spec.command = ["dockerd"] - spec.args = ["--tls=false"] - spec.script_path = None - spec.script = None - flag_args, pos = _entrypoint_args(spec) - assert any("--entrypoint" in f for f in flag_args) - assert "--tls=false" in pos - - def test_script_path(self): - spec = MagicMock() - spec.command = None - spec.script_path = "/my/script.sh" - spec.script = None - flag_args, pos = _entrypoint_args(spec) - assert flag_args == [] - assert "bash" in pos - assert "/my/script.sh" in pos - - def test_inline_script_uses_name(self): - spec = MagicMock() - spec.command = None - spec.script_path = None - spec.script = "echo hi" - spec.name = "mycontainer" - flag_args, pos = _entrypoint_args(spec) - assert flag_args == [] - assert "bash /rock/scripts/mycontainer.sh" in pos - - def test_no_entrypoint_options(self): - spec = MagicMock() - spec.command = None - spec.script_path = None - spec.script = None - flag_args, pos = _entrypoint_args(spec) - assert flag_args == [] - assert pos == "" - - -class TestRenderOssDeps: - def test_empty(self): - result = _render_oss_deps([]) - assert "no oss_deps" in result - - def test_plain_dep(self): - dep = OssDep(key="oss://b/file.bin", target_path="/data/file.bin") - result = _render_oss_deps([dep]) - assert "ossutil cp" in result - assert "tar" not in result - - def test_extract_dep(self): - dep = OssDep(key="oss://b/data.tar.gz", target_path="/data", extract=True) - result = _render_oss_deps([dep]) - assert "tar -xf" in result - assert "ossutil cp" in result + assert isinstance(result, TrialResult) + assert not isinstance(result, list) # ── on_sandbox_ready hook ───────────────────────────────────────────────────── class TestOnSandboxReady: - async def test_backfills_namespace(self): - cfg = _minimal_compose() + async def test_backfills_namespace_and_experiment_id(self): + from rock.sdk.job.compose.trial import ComposeTrial + + cfg = _make_config() trial = ComposeTrial(cfg) sb = MagicMock() - sb._namespace = "test-ns" - sb._experiment_id = "test-exp" + sb._namespace = "ns-123" + sb._experiment_id = "exp-456" await trial.on_sandbox_ready(sb) - assert cfg.namespace == "test-ns" - assert cfg.experiment_id == "test-exp" + assert cfg.namespace == "ns-123" + assert cfg.experiment_id == "exp-456" async def test_oss_mirror_env_prepared_when_enabled(self, monkeypatch): + from rock.sdk.job.compose.trial import ComposeTrial + + # Clear any OSS env vars to test pure config path for k in list(__import__("os").environ): if k.startswith("OSS"): monkeypatch.delenv(k, raising=False) - cfg = ComposeJobConfig( - script="echo hi", - job_name="myjob", - compose=ComposeSpec(main=_minimal_main()), - environment=EnvironmentConfig( - oss_mirror=OssMirrorConfig( - enabled=True, - oss_bucket="b", - oss_endpoint="ep", - oss_region="rg", - ), + cfg = _make_config( + job_name="oss-job", + oss_mirror=OssMirrorConfig( + enabled=True, + oss_bucket="my-bucket", + oss_endpoint="oss-cn-hangzhou.aliyuncs.com", + oss_region="cn-hangzhou", ), ) trial = ComposeTrial(cfg) @@ -605,6 +311,6 @@ async def test_oss_mirror_env_prepared_when_enabled(self, monkeypatch): await trial.on_sandbox_ready(sb) - assert cfg.environment.env.get("OSS_BUCKET") == "b" + assert cfg.environment.env.get("OSS_BUCKET") == "my-bucket" assert "ROCK_ARTIFACT_DIR" in cfg.environment.env - assert cfg.environment.env["ROCK_OSS_PREFIX"] == "artifacts/ns1/exp1/myjob" + assert cfg.environment.env["ROCK_OSS_PREFIX"] == "artifacts/ns1/exp1/oss-job" From 0400514158c6d7ae9034bb9cd3fe642e931c012a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=82=E6=B4=9B?= Date: Sun, 14 Jun 2026 23:04:23 +0800 Subject: [PATCH 4/4] feat(compose-example): add reasoning env vars to proxy service Co-Authored-By: Claude Code AI-Model: claude-opus-4-6 AI-Contributed/Feature: 0/6 AI-Contributed/UT: 0/0 --- examples/job/compose/docker-compose.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/job/compose/docker-compose.yaml b/examples/job/compose/docker-compose.yaml index 06414f4c82..22cf5cda71 100644 --- a/examples/job/compose/docker-compose.yaml +++ b/examples/job/compose/docker-compose.yaml @@ -29,6 +29,12 @@ services: MODEL_BASE_URL: "${MODEL_BASE_URL}" HARBOR_AGENT: "${HARBOR_AGENT}" PROVIDER: "${PROVIDER:-anthropic}" + FORCE_PROXY: "${FORCE_PROXY:-true}" + TEMPERATURE: "${TEMPERATURE:-1.0}" + INTERLEAVED_THINKING: "${INTERLEAVED_THINKING:-true}" + THINKING_TYPE: "${THINKING_TYPE:-adaptive}" + REASONING_EFFORT: "${REASONING_EFFORT:-high}" + CONTEXT_1M: "${CONTEXT_1M:-true}" OUTPUT_DIR: /tmp/output SHARED_DIR: /tmp/shared deploy: