diff --git a/rock/config.py b/rock/config.py index f2bd988734..91dcf3297f 100644 --- a/rock/config.py +++ b/rock/config.py @@ -294,6 +294,11 @@ class K8sConfig: # Watch configuration resync_period: int = 60 # How often (seconds) to perform a full re-list + # Image auth encryption key (32 bytes) used to encrypt registry credentials + # passed to batch-agent via BatchSandbox annotations. Falls back to the + # ROCK_IMAGE_AUTH_KEY environment variable if not set here. + image_auth_key: str | None = None + # ============================================================================ # DEPRECATED: The following fields are deprecated and will be removed in a # future version. Do NOT use them in new code. diff --git a/rock/sandbox/operator/k8s/provider.py b/rock/sandbox/operator/k8s/provider.py index a3aaa31ba2..981e2680b5 100644 --- a/rock/sandbox/operator/k8s/provider.py +++ b/rock/sandbox/operator/k8s/provider.py @@ -1,11 +1,14 @@ """K8s provider implementations for managing sandbox resources.""" +import base64 import json +import os import re from abc import ABC, abstractmethod from typing import Any, Protocol import yaml +from cryptography.hazmat.primitives.ciphers.aead import AESGCM from kubernetes import client from kubernetes import config as k8s_config @@ -195,6 +198,7 @@ def __init__(self, k8s_config: K8sConfig): self._k8s_api: K8sApiClient | None = None self._initialized = False self._nacos_provider = None + self._image_auth_key = self._load_image_auth_key(k8s_config) # Initialize template loader with config templates self._template_loader = K8sTemplateLoader( @@ -203,6 +207,27 @@ def __init__(self, k8s_config: K8sConfig): ) logger.info(f"Available K8S templates: {', '.join(self._template_loader.available_templates)}") + def _load_image_auth_key(self, k8s_config: K8sConfig) -> bytes | None: + """Load image auth encryption key from config or environment. + + Args: + k8s_config: K8s configuration. + + Returns: + 32-byte key or None if not configured. + """ + key_str = k8s_config.image_auth_key or os.environ.get("ROCK_IMAGE_AUTH_KEY", "") + if not key_str: + return None + key = key_str.encode("utf-8") + if len(key) != 32: + logger.warning( + f"ROCK_IMAGE_AUTH_KEY / K8sConfig.image_auth_key must be 32 bytes, got {len(key)}; " + "image auth encryption disabled" + ) + return None + return key + def set_nacos_provider(self, nacos_provider): """Set Nacos config provider for dynamic pool configuration. @@ -572,7 +597,8 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> # Template mode: build from template template_name = self._get_template_name(config) - # Build manifest using template loader + # Build manifest using template loader. Auth info is passed to the + # template so the template itself decides where to render it. manifest = self._template_loader.build_manifest( template_name=template_name, sandbox_id=sandbox_id, @@ -583,6 +609,7 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> num_gpus=config.num_gpus, accelerator_type=config.accelerator_type, limit_cpus=config.limit_cpus, + encrypted_image_auth=self._encrypt_image_auth(config), ) logger.info( @@ -591,6 +618,31 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) -> ) return manifest + def _encrypt_image_auth(self, config: DockerDeploymentConfig) -> str | None: + """Encrypt registry credentials into the pouch auth format. + + Returns: + Encrypted auth string, or None if no key/credentials are available. + The template can use ``{{ encrypted_image_auth | default('public', true) }}`` + to render 'public' when this returns None. + """ + if not self._image_auth_key: + return None + if not config.registry_username and not config.registry_password: + return None + + try: + auth_plaintext = base64.b64encode( + json.dumps({"username": config.registry_username, "password": config.registry_password}).encode() + ).decode("ascii") + aesgcm = AESGCM(self._image_auth_key) + nonce = os.urandom(12) + ciphertext = aesgcm.encrypt(nonce, auth_plaintext.encode(), None) + return base64.b64encode(nonce + ciphertext).decode("ascii") + except Exception as e: + logger.warning(f"Failed to encrypt image auth for {config.container_name}: {e}") + return None + async def _create(self, config: DockerDeploymentConfig) -> tuple[str, str]: """Create a BatchSandbox resource without waiting for IP allocation. diff --git a/rock/sandbox/operator/k8s/template_loader.py b/rock/sandbox/operator/k8s/template_loader.py index 3f6a4a1a97..701a653da9 100644 --- a/rock/sandbox/operator/k8s/template_loader.py +++ b/rock/sandbox/operator/k8s/template_loader.py @@ -63,6 +63,7 @@ def build_manifest( num_gpus: int | None = None, accelerator_type: str | None = None, limit_cpus: float | None = None, + encrypted_image_auth: str | None = None, ) -> dict[str, Any]: """Build a complete BatchSandbox manifest from template. @@ -87,6 +88,9 @@ def build_manifest( overcommit baseline. A ``limit_cpus > cpus`` value lets the container burst above its reservation, mirroring the Ray path's ``--cpus`` flag. + Image auth variables are passed to the template so the template itself + can decide where to render them (e.g. encrypted-image-auth annotation). + Args: template_name: Name of the template to use. sandbox_id: Sandbox identifier (auto-generated if missing). @@ -99,6 +103,8 @@ def build_manifest( limit_cpus: CPU hard cap for overcommit (rendered via {{ limit_cpus }}). When None, falls back to ``cpus`` to keep requests.cpu == limits.cpu. + encrypted_image_auth: Pre-encrypted pouch auth string + (rendered via {{ encrypted_image_auth }}). Returns: Complete BatchSandbox manifest. @@ -132,6 +138,7 @@ def build_manifest( "num_gpus": num_gpus if num_gpus is not None else "", "accelerator_type": accelerator_type if accelerator_type is not None else "", "limit_cpus": str(effective_limit_cpus) if effective_limit_cpus is not None else "", + "encrypted_image_auth": encrypted_image_auth if encrypted_image_auth is not None else "", } rendered = render_node(config, self._jinja_env, ctx) diff --git a/tests/unit/sandbox/operator/test_k8s_provider.py b/tests/unit/sandbox/operator/test_k8s_provider.py index 364ea589c9..b6ae6ed33a 100644 --- a/tests/unit/sandbox/operator/test_k8s_provider.py +++ b/tests/unit/sandbox/operator/test_k8s_provider.py @@ -534,3 +534,73 @@ async def test_limit_cpus_defaults_to_cpus_when_none(self): container = manifest["spec"]["template"]["spec"]["containers"][0] assert container["resources"]["requests"]["cpu"] == "4.0" assert container["resources"]["limits"]["cpu"] == "4.0" + + +IMAGE_AUTH_ANNOTATION = "example.com/encrypted-image-auth" + +IMAGE_AUTH_TEMPLATE = { + "default": { + "namespace": "rock-test", + "ports": {"proxy": 8000, "server": 8080, "ssh": 22}, + "template": { + "metadata": { + "labels": {"app": "test"}, + "annotations": { + IMAGE_AUTH_ANNOTATION: "{{ encrypted_image_auth | default('public', true) }}" + }, + }, + "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, + }, + } +} + + +def _make_provider_with_templates(templates: dict) -> BatchSandboxProvider: + return BatchSandboxProvider( + k8s_config=K8sConfig( + kubeconfig_path=None, + templates=templates, + template_map={}, + ) + ) + + +class TestBuildBatchSandboxManifestImageAuth: + """build_manifest must pass encrypted image auth to the template so the + template can render it into the desired annotation.""" + + async def test_renders_encrypted_image_auth_from_template(self, monkeypatch): + monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32) + provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) + config = make_config() + config.registry_username = "user" + config.registry_password = "pass" + + manifest = await provider._build_batchsandbox_manifest(config) + + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert IMAGE_AUTH_ANNOTATION in annotations + encrypted = annotations[IMAGE_AUTH_ANNOTATION] + assert len(encrypted) > 0 # base64 encoded ciphertext + + async def test_public_auth_when_key_missing(self, monkeypatch): + monkeypatch.delenv("ROCK_IMAGE_AUTH_KEY", raising=False) + provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) + config = make_config() + config.registry_username = "user" + config.registry_password = "pass" + + manifest = await provider._build_batchsandbox_manifest(config) + + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert annotations[IMAGE_AUTH_ANNOTATION] == "public" + + async def test_public_auth_when_credentials_missing(self, monkeypatch): + monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32) + provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE) + config = make_config() + + manifest = await provider._build_batchsandbox_manifest(config) + + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert annotations[IMAGE_AUTH_ANNOTATION] == "public" diff --git a/tests/unit/sandbox/operator/test_k8s_template_loader.py b/tests/unit/sandbox/operator/test_k8s_template_loader.py index 86394df1b6..8a05f58bb2 100644 --- a/tests/unit/sandbox/operator/test_k8s_template_loader.py +++ b/tests/unit/sandbox/operator/test_k8s_template_loader.py @@ -348,3 +348,32 @@ def test_build_manifest_drops_disk_when_none(self): container = manifest["spec"]["template"]["spec"]["containers"][0] assert "ephemeral-storage" not in container["resources"]["requests"] assert "ephemeral-storage" not in container["resources"]["limits"] + + def test_build_manifest_passes_encrypted_image_auth(self): + """Test that encrypted_image_auth is exposed to the template.""" + templates = { + "default": { + "ports": {"proxy": 8000, "server": 8080, "ssh": 22}, + "template": { + "metadata": { + "annotations": { + "example.com/image-auth": "{{ encrypted_image_auth }}", + } + }, + "spec": {"containers": [{"name": "main", "image": "python:3.11"}]}, + }, + } + } + loader = K8sTemplateLoader(templates=templates, default_namespace="rock-test") + + manifest = loader.build_manifest( + template_name="default", + sandbox_id="test-sandbox", + image="python:3.11", + cpus=2.0, + memory="4Gi", + encrypted_image_auth="dGVzdC1lbmNyeXB0ZWQ=", + ) + + annotations = manifest["spec"]["template"]["metadata"]["annotations"] + assert annotations["example.com/image-auth"] == "dGVzdC1lbmNyeXB0ZWQ="