From 06ed4a0e542da3fe0ed88194836e19edde891faa Mon Sep 17 00:00:00 2001 From: junxin Date: Thu, 18 Jun 2026 11:28:35 +0800 Subject: [PATCH 1/3] feat(k8s): transport image auth to k8s --- rock/config.py | 5 ++ rock/sandbox/operator/k8s/provider.py | 52 +++++++++++++- rock/sandbox/operator/k8s/template_loader.py | 7 ++ .../sandbox/operator/test_k8s_provider.py | 68 +++++++++++++++++++ .../operator/test_k8s_template_loader.py | 29 ++++++++ 5 files changed, 160 insertions(+), 1 deletion(-) diff --git a/rock/config.py b/rock/config.py index f2bd988734..91dcf3297f 100644 --- a/rock/config.py +++ b/rock/config.py @@ -294,6 +294,11 @@ class K8sConfig: # Watch configuration resync_period: int = 60 # How often (seconds) to perform a full re-list + # Image auth encryption key (32 bytes) used to encrypt registry credentials + # passed to batch-agent via BatchSandbox annotations. Falls back to the + # ROCK_IMAGE_AUTH_KEY environment variable if not set here. + image_auth_key: str | None = None + # ============================================================================ # DEPRECATED: The following fields are deprecated and will be removed in a # future version. Do NOT use them in new code. diff --git a/rock/sandbox/operator/k8s/provider.py b/rock/sandbox/operator/k8s/provider.py index a3aaa31ba2..9080b61271 100644 --- a/rock/sandbox/operator/k8s/provider.py +++ b/rock/sandbox/operator/k8s/provider.py @@ -1,11 +1,14 @@ """K8s provider implementations for managing sandbox resources.""" +import base64 import json +import os import re from abc import ABC, abstractmethod from typing import Any, Protocol import yaml +from cryptography.hazmat.primitives.ciphers.aead import AESGCM from kubernetes import client from kubernetes import config as k8s_config @@ -195,6 +198,7 @@ def __init__(self, k8s_config: K8sConfig): self._k8s_api: K8sApiClient | None = None self._initialized = False self._nacos_provider = None + self._image_auth_key = self._load_image_auth_key(k8s_config) # Initialize template loader with config templates self._template_loader = K8sTemplateLoader( @@ -203,6 +207,27 @@ def __init__(self, k8s_config: K8sConfig): ) logger.info(f"Available K8S templates: {', '.join(self._template_loader.available_templates)}") + def _load_image_auth_key(self, k8s_config: K8sConfig) -> bytes | None: + """Load image auth encryption key from config or environment. + + Args: + k8s_config: K8s configuration. + + Returns: + 32-byte key or None if not configured. + """ + key_str = k8s_config.image_auth_key or os.environ.get("ROCK_IMAGE_AUTH_KEY", "") + if not key_str: + return None + key = key_str.encode("utf-8") + if len(key) != 32: + logger.warning( + f"ROCK_IMAGE_AUTH_KEY / K8sConfig.image_auth_key must be 32 bytes, got {len(key)}; " + "image auth encryption disabled" + ) + return None + return key + def set_nacos_provider(self, nacos_provider): """Set Nacos config provider for dynamic pool configuration. @@ -572,7 +597,8 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> # Template mode: build from template template_name = self._get_template_name(config) - # Build manifest using template loader + # Build manifest using template loader. Auth info is passed to the + # template so the template itself decides where to render it. manifest = self._template_loader.build_manifest( template_name=template_name, sandbox_id=sandbox_id, @@ -583,6 +609,7 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> num_gpus=config.num_gpus, accelerator_type=config.accelerator_type, limit_cpus=config.limit_cpus, + encrypted_image_auth=self._encrypt_image_auth(config), ) logger.info( @@ -591,6 +618,29 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> ) return manifest + def _encrypt_image_auth(self, config: DockerDeploymentConfig) -> str | None: + """Encrypt registry credentials into the pouch auth format. + + Returns: + Encrypted auth string, or None if no key/credentials are available. + """ + if not self._image_auth_key: + return None + if not config.registry_username or not config.registry_password: + return None + + try: + auth_plaintext = base64.b64encode( + json.dumps({"username": config.registry_username, "password": config.registry_password}).encode() + ).decode("ascii") + aesgcm = AESGCM(self._image_auth_key) + nonce = os.urandom(12) + ciphertext = aesgcm.encrypt(nonce, auth_plaintext.encode(), None) + return base64.b64encode(nonce + ciphertext).decode("ascii") + except Exception as e: + logger.warning(f"Failed to encrypt image auth for {config.container_name}: {e}") + return None + async def _create(self, config: DockerDeploymentConfig) -> tuple[str, str]: """Create a BatchSandbox resource without waiting for IP allocation. diff --git a/rock/sandbox/operator/k8s/template_loader.py b/rock/sandbox/operator/k8s/template_loader.py index 3f6a4a1a97..701a653da9 100644 --- a/rock/sandbox/operator/k8s/template_loader.py +++ b/rock/sandbox/operator/k8s/template_loader.py @@ -63,6 +63,7 @@ def build_manifest( num_gpus: int | None = None, accelerator_type: str | None = None, limit_cpus: float | None = None, + encrypted_image_auth: str | None = None, ) -> dict[str, Any]: """Build a complete BatchSandbox manifest from template. @@ -87,6 +88,9 @@ def build_manifest( overcommit baseline. A ``limit_cpus > cpus`` value lets the container burst above its reservation, mirroring the Ray path's ``--cpus`` flag. + Image auth variables are passed to the template so the template itself + can decide where to render them (e.g. encrypted-image-auth annotation). + Args: template_name: Name of the template to use. sandbox_id: Sandbox identifier (auto-generated if missing). @@ -99,6 +103,8 @@ def build_manifest( limit_cpus: CPU hard cap for overcommit (rendered via {{ limit_cpus }}). When None, falls back to ``cpus`` to keep requests.cpu == limits.cpu. + encrypted_image_auth: Pre-encrypted pouch auth string + (rendered via {{ encrypted_image_auth }}). Returns: Complete BatchSandbox manifest. @@ -132,6 +138,7 @@ def build_manifest( "num_gpus": num_gpus if num_gpus is not None else "", "accelerator_type": accelerator_type if accelerator_type is not None else "", "limit_cpus": str(effective_limit_cpus) if effective_limit_cpus is not None else "", + "encrypted_image_auth": encrypted_image_auth if encrypted_image_auth is not None else "", } rendered = render_node(config, self._jinja_env, ctx) diff --git a/tests/unit/sandbox/operator/test_k8s_provider.py b/tests/unit/sandbox/operator/test_k8s_provider.py index 364ea589c9..09719b7997 100644 --- a/tests/unit/sandbox/operator/test_k8s_provider.py +++ b/tests/unit/sandbox/operator/test_k8s_provider.py @@ -534,3 +534,71 @@ async def test_limit_cpus_defaults_to_cpus_when_none(self): container = manifest["spec"]["template"]["spec"]["containers"][0] assert container["resources"]["requests"]["cpu"] == "4.0" assert container["resources"]["limits"]["cpu"] == "4.0" + + +IMAGE_AUTH_TEMPLATE = { + "default": { + "namespace": "rock-test", + "ports": {"proxy": 8000, "server": 8080, "ssh": 22}, + "template": { + "metadata": { + "labels": {"app": "test"}, + "annotations": { + "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth": "{{ encrypted_image_auth }}" + }, + }, + "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, + }, + } +} + + +def _make_provider_with_templates(templates: dict) -> BatchSandboxProvider: + return BatchSandboxProvider( + k8s_config=K8sConfig( + kubeconfig_path=None, + templates=templates, + template_map={}, + ) + ) + + +class TestBuildBatchSandboxManifestImageAuth: + """build_manifest must pass encrypted image auth to the template so the + template can render it into the desired annotation.""" + + async def test_renders_encrypted_image_auth_from_template(self, monkeypatch): + monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32) + provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) + config = make_config() + config.registry_username = "user" + config.registry_password = "pass" + + manifest = await provider._build_batchsandbox_manifest(config) + + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth" in annotations + encrypted = annotations["apps.batch.scheduling.alibabacloud.com/encrypted-image-auth"] + assert len(encrypted) > 0 # base64 encoded ciphertext + + async def test_no_encrypted_auth_when_key_missing(self, monkeypatch): + monkeypatch.delenv("ROCK_IMAGE_AUTH_KEY", raising=False) + provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) + config = make_config() + config.registry_username = "user" + config.registry_password = "pass" + + manifest = await provider._build_batchsandbox_manifest(config) + + annotations = manifest["spec"]["template"]["metadata"].get("annotations", {}) + assert "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth" not in annotations + + async def test_no_encrypted_auth_when_credentials_missing(self, monkeypatch): + monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32) + provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) + config = make_config() + + manifest = await provider._build_batchsandbox_manifest(config) + + annotations = manifest["spec"]["template"]["metadata"].get("annotations", {}) + assert "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth" not in annotations diff --git a/tests/unit/sandbox/operator/test_k8s_template_loader.py b/tests/unit/sandbox/operator/test_k8s_template_loader.py index 86394df1b6..def6b934bb 100644 --- a/tests/unit/sandbox/operator/test_k8s_template_loader.py +++ b/tests/unit/sandbox/operator/test_k8s_template_loader.py @@ -348,3 +348,32 @@ def test_build_manifest_drops_disk_when_none(self): container = manifest["spec"]["template"]["spec"]["containers"][0] assert "ephemeral-storage" not in container["resources"]["requests"] assert "ephemeral-storage" not in container["resources"]["limits"] + + def test_build_manifest_passes_encrypted_image_auth(self): + """Test that encrypted_image_auth is exposed to the template.""" + templates = { + "default": { + "ports": {"proxy": 8000, "server": 8080, "ssh": 22}, + "template": { + "metadata": { + "annotations": { + "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth": "{{ encrypted_image_auth }}", + } + }, + "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, + }, + } + } + loader = K8sTemplateLoader(templates=templates, default_namespace="rock-test") + + manifest = loader.build_manifest( + template_name="default", + sandbox_id="test-sandbox", + image="python:3.11", + cpus=2.0, + memory="4Gi", + encrypted_image_auth="dGVzdC1lbmNyeXB0ZWQ=", + ) + + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert annotations["apps.batch.scheduling.alibabacloud.com/encrypted-image-auth"] == "dGVzdC1lbmNyeXB0ZWQ=" From 22ca767c8ddf8055b8826df03a195dc77e6e0cca Mon Sep 17 00:00:00 2001 From: junxin Date: Tue, 23 Jun 2026 14:19:14 +0800 Subject: [PATCH 2/3] feat(admin): fix some cr --- rock/sandbox/operator/k8s/provider.py | 9 +++++---- tests/unit/sandbox/operator/test_k8s_provider.py | 16 +++++++++------- .../sandbox/operator/test_k8s_template_loader.py | 4 ++-- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/rock/sandbox/operator/k8s/provider.py b/rock/sandbox/operator/k8s/provider.py index 9080b61271..26220cb3c1 100644 --- a/rock/sandbox/operator/k8s/provider.py +++ b/rock/sandbox/operator/k8s/provider.py @@ -619,15 +619,16 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> return manifest def _encrypt_image_auth(self, config: DockerDeploymentConfig) -> str | None: - """Encrypt registry credentials into the pouch auth format. + """Encrypt registry credentials or return 'public' for anonymous pull. Returns: - Encrypted auth string, or None if no key/credentials are available. + Encrypted auth string, 'public' for anonymous pull, or None if + no image auth key is configured. """ if not self._image_auth_key: return None - if not config.registry_username or not config.registry_password: - return None + if not config.registry_username and not config.registry_password: + return "public" try: auth_plaintext = base64.b64encode( diff --git a/tests/unit/sandbox/operator/test_k8s_provider.py b/tests/unit/sandbox/operator/test_k8s_provider.py index 09719b7997..7f7ba2edae 100644 --- a/tests/unit/sandbox/operator/test_k8s_provider.py +++ b/tests/unit/sandbox/operator/test_k8s_provider.py @@ -536,6 +536,8 @@ async def test_limit_cpus_defaults_to_cpus_when_none(self): assert container["resources"]["limits"]["cpu"] == "4.0" +IMAGE_AUTH_ANNOTATION = "example.com/encrypted-image-auth" + IMAGE_AUTH_TEMPLATE = { "default": { "namespace": "rock-test", @@ -544,7 +546,7 @@ async def test_limit_cpus_defaults_to_cpus_when_none(self): "metadata": { "labels": {"app": "test"}, "annotations": { - "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth": "{{ encrypted_image_auth }}" + IMAGE_AUTH_ANNOTATION: "{{ encrypted_image_auth }}" }, }, "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, @@ -577,8 +579,8 @@ async def test_renders_encrypted_image_auth_from_template(self, monkeypatch): manifest = await provider._build_batchsandbox_manifest(config) annotations = manifest["spec"]["template"]["metadata"]["annotations"] - assert "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth" in annotations - encrypted = annotations["apps.batch.scheduling.alibabacloud.com/encrypted-image-auth"] + assert IMAGE_AUTH_ANNOTATION in annotations + encrypted = annotations[IMAGE_AUTH_ANNOTATION] assert len(encrypted) > 0 # base64 encoded ciphertext async def test_no_encrypted_auth_when_key_missing(self, monkeypatch): @@ -591,14 +593,14 @@ async def test_no_encrypted_auth_when_key_missing(self, monkeypatch): manifest = await provider._build_batchsandbox_manifest(config) annotations = manifest["spec"]["template"]["metadata"].get("annotations", {}) - assert "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth" not in annotations + assert IMAGE_AUTH_ANNOTATION not in annotations - async def test_no_encrypted_auth_when_credentials_missing(self, monkeypatch): + async def test_public_auth_when_credentials_missing(self, monkeypatch): monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32) provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) config = make_config() manifest = await provider._build_batchsandbox_manifest(config) - annotations = manifest["spec"]["template"]["metadata"].get("annotations", {}) - assert "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth" not in annotations + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert annotations[IMAGE_AUTH_ANNOTATION] == "public" diff --git a/tests/unit/sandbox/operator/test_k8s_template_loader.py b/tests/unit/sandbox/operator/test_k8s_template_loader.py index def6b934bb..8a05f58bb2 100644 --- a/tests/unit/sandbox/operator/test_k8s_template_loader.py +++ b/tests/unit/sandbox/operator/test_k8s_template_loader.py @@ -357,7 +357,7 @@ def test_build_manifest_passes_encrypted_image_auth(self): "template": { "metadata": { "annotations": { - "apps.batch.scheduling.alibabacloud.com/encrypted-image-auth": "{{ encrypted_image_auth }}", + "example.com/image-auth": "{{ encrypted_image_auth }}", } }, "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, @@ -376,4 +376,4 @@ def test_build_manifest_passes_encrypted_image_auth(self): ) annotations = manifest["spec"]["template"]["metadata"]["annotations"] - assert annotations["apps.batch.scheduling.alibabacloud.com/encrypted-image-auth"] == "dGVzdC1lbmNyeXB0ZWQ=" + assert annotations["example.com/image-auth"] == "dGVzdC1lbmNyeXB0ZWQ=" From d93d00bf00f651118e88fc7e4a8b2a3069bf6b22 Mon Sep 17 00:00:00 2001 From: junxin Date: Thu, 25 Jun 2026 11:26:12 +0800 Subject: [PATCH 3/3] feat(k8s): fix some xxx --- rock/sandbox/operator/k8s/provider.py | 9 +++++---- tests/unit/sandbox/operator/test_k8s_provider.py | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/rock/sandbox/operator/k8s/provider.py b/rock/sandbox/operator/k8s/provider.py index 26220cb3c1..981e2680b5 100644 --- a/rock/sandbox/operator/k8s/provider.py +++ b/rock/sandbox/operator/k8s/provider.py @@ -619,16 +619,17 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> return manifest def _encrypt_image_auth(self, config: DockerDeploymentConfig) -> str | None: - """Encrypt registry credentials or return 'public' for anonymous pull. + """Encrypt registry credentials into the pouch auth format. Returns: - Encrypted auth string, 'public' for anonymous pull, or None if - no image auth key is configured. + Encrypted auth string, or None if no key/credentials are available. + The template can use ``{{ encrypted_image_auth | default('public', true) }}`` + to render 'public' when this returns None. """ if not self._image_auth_key: return None if not config.registry_username and not config.registry_password: - return "public" + return None try: auth_plaintext = base64.b64encode( diff --git a/tests/unit/sandbox/operator/test_k8s_provider.py b/tests/unit/sandbox/operator/test_k8s_provider.py index 7f7ba2edae..b6ae6ed33a 100644 --- a/tests/unit/sandbox/operator/test_k8s_provider.py +++ b/tests/unit/sandbox/operator/test_k8s_provider.py @@ -546,7 +546,7 @@ async def test_limit_cpus_defaults_to_cpus_when_none(self): "metadata": { "labels": {"app": "test"}, "annotations": { - IMAGE_AUTH_ANNOTATION: "{{ encrypted_image_auth }}" + IMAGE_AUTH_ANNOTATION: "{{ encrypted_image_auth | default('public', true) }}" }, }, "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, @@ -583,7 +583,7 @@ async def test_renders_encrypted_image_auth_from_template(self, monkeypatch): encrypted = annotations[IMAGE_AUTH_ANNOTATION] assert len(encrypted) > 0 # base64 encoded ciphertext - async def test_no_encrypted_auth_when_key_missing(self, monkeypatch): + async def test_public_auth_when_key_missing(self, monkeypatch): monkeypatch.delenv("ROCK_IMAGE_AUTH_KEY", raising=False) provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) config = make_config() @@ -592,8 +592,8 @@ async def test_no_encrypted_auth_when_key_missing(self, monkeypatch): manifest = await provider._build_batchsandbox_manifest(config) - annotations = manifest["spec"]["template"]["metadata"].get("annotations", {}) - assert IMAGE_AUTH_ANNOTATION not in annotations + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert annotations[IMAGE_AUTH_ANNOTATION] == "public" async def test_public_auth_when_credentials_missing(self, monkeypatch): monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32)