Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions rock/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,11 @@ class K8sConfig:
# Watch configuration
resync_period: int = 60 # How often (seconds) to perform a full re-list

# Image auth encryption key (32 bytes) used to encrypt registry credentials
# passed to batch-agent via BatchSandbox annotations. Falls back to the
# ROCK_IMAGE_AUTH_KEY environment variable if not set here.
image_auth_key: str | None = None

# ============================================================================
# DEPRECATED: The following fields are deprecated and will be removed in a
# future version. Do NOT use them in new code.
Expand Down
54 changes: 53 additions & 1 deletion rock/sandbox/operator/k8s/provider.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""K8s provider implementations for managing sandbox resources."""

import base64
import json
import os
import re
from abc import ABC, abstractmethod
from typing import Any, Protocol

import yaml
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from kubernetes import client
from kubernetes import config as k8s_config

Expand Down Expand Up @@ -195,6 +198,7 @@ def __init__(self, k8s_config: K8sConfig):
self._k8s_api: K8sApiClient | None = None
self._initialized = False
self._nacos_provider = None
self._image_auth_key = self._load_image_auth_key(k8s_config)

# Initialize template loader with config templates
self._template_loader = K8sTemplateLoader(
Expand All @@ -203,6 +207,27 @@ def __init__(self, k8s_config: K8sConfig):
)
logger.info(f"Available K8S templates: {', '.join(self._template_loader.available_templates)}")

def _load_image_auth_key(self, k8s_config: K8sConfig) -> bytes | None:
"""Load image auth encryption key from config or environment.

Args:
k8s_config: K8s configuration.

Returns:
32-byte key or None if not configured.
"""
key_str = k8s_config.image_auth_key or os.environ.get("ROCK_IMAGE_AUTH_KEY", "")
if not key_str:
return None
key = key_str.encode("utf-8")
if len(key) != 32:
logger.warning(
f"ROCK_IMAGE_AUTH_KEY / K8sConfig.image_auth_key must be 32 bytes, got {len(key)}; "
"image auth encryption disabled"
)
return None
return key

def set_nacos_provider(self, nacos_provider):
"""Set Nacos config provider for dynamic pool configuration.

Expand Down Expand Up @@ -572,7 +597,8 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) ->
# Template mode: build from template
template_name = self._get_template_name(config)

# Build manifest using template loader
# Build manifest using template loader. Auth info is passed to the
# template so the template itself decides where to render it.
manifest = self._template_loader.build_manifest(
template_name=template_name,
sandbox_id=sandbox_id,
Expand All @@ -583,6 +609,7 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) ->
num_gpus=config.num_gpus,
accelerator_type=config.accelerator_type,
limit_cpus=config.limit_cpus,
encrypted_image_auth=self._encrypt_image_auth(config),
)

logger.info(
Expand All @@ -591,6 +618,31 @@ async def _build_batchsandbox_manifest(self, config: DockerDeploymentConfig) ->
)
return manifest

def _encrypt_image_auth(self, config: DockerDeploymentConfig) -> str | None:
"""Encrypt registry credentials into the pouch auth format.

Returns:
Encrypted auth string, or None if no key/credentials are available.
The template can use ``{{ encrypted_image_auth | default('public', true) }}``
to render 'public' when this returns None.
"""
if not self._image_auth_key:
return None
if not config.registry_username and not config.registry_password:
return None

try:
auth_plaintext = base64.b64encode(
json.dumps({"username": config.registry_username, "password": config.registry_password}).encode()
).decode("ascii")
aesgcm = AESGCM(self._image_auth_key)
nonce = os.urandom(12)
ciphertext = aesgcm.encrypt(nonce, auth_plaintext.encode(), None)
return base64.b64encode(nonce + ciphertext).decode("ascii")
except Exception as e:
logger.warning(f"Failed to encrypt image auth for {config.container_name}: {e}")
return None

async def _create(self, config: DockerDeploymentConfig) -> tuple[str, str]:
"""Create a BatchSandbox resource without waiting for IP allocation.

Expand Down
7 changes: 7 additions & 0 deletions rock/sandbox/operator/k8s/template_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def build_manifest(
num_gpus: int | None = None,
accelerator_type: str | None = None,
limit_cpus: float | None = None,
encrypted_image_auth: str | None = None,
) -> dict[str, Any]:
"""Build a complete BatchSandbox manifest from template.

Expand All @@ -87,6 +88,9 @@ def build_manifest(
overcommit baseline. A ``limit_cpus > cpus`` value lets the container
burst above its reservation, mirroring the Ray path's ``--cpus`` flag.

Image auth variables are passed to the template so the template itself
can decide where to render them (e.g. encrypted-image-auth annotation).

Args:
template_name: Name of the template to use.
sandbox_id: Sandbox identifier (auto-generated if missing).
Expand All @@ -99,6 +103,8 @@ def build_manifest(
limit_cpus: CPU hard cap for overcommit (rendered via
{{ limit_cpus }}). When None, falls back to ``cpus`` to keep
requests.cpu == limits.cpu.
encrypted_image_auth: Pre-encrypted pouch auth string
(rendered via {{ encrypted_image_auth }}).

Returns:
Complete BatchSandbox manifest.
Expand Down Expand Up @@ -132,6 +138,7 @@ def build_manifest(
"num_gpus": num_gpus if num_gpus is not None else "",
"accelerator_type": accelerator_type if accelerator_type is not None else "",
"limit_cpus": str(effective_limit_cpus) if effective_limit_cpus is not None else "",
"encrypted_image_auth": encrypted_image_auth if encrypted_image_auth is not None else "",
}

rendered = render_node(config, self._jinja_env, ctx)
Expand Down
70 changes: 70 additions & 0 deletions tests/unit/sandbox/operator/test_k8s_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,3 +534,73 @@ async def test_limit_cpus_defaults_to_cpus_when_none(self):
container = manifest["spec"]["template"]["spec"]["containers"][0]
assert container["resources"]["requests"]["cpu"] == "4.0"
assert container["resources"]["limits"]["cpu"] == "4.0"


IMAGE_AUTH_ANNOTATION = "example.com/encrypted-image-auth"

IMAGE_AUTH_TEMPLATE = {
"default": {
"namespace": "rock-test",
"ports": {"proxy": 8000, "server": 8080, "ssh": 22},
"template": {
"metadata": {
"labels": {"app": "test"},
"annotations": {
IMAGE_AUTH_ANNOTATION: "{{ encrypted_image_auth | default('public', true) }}"
},
},
"spec": {"containers": [{"name": "main", "image": "python:3.11"}]},
},
}
}


def _make_provider_with_templates(templates: dict) -> BatchSandboxProvider:
return BatchSandboxProvider(
k8s_config=K8sConfig(
kubeconfig_path=None,
templates=templates,
template_map={},
)
)


class TestBuildBatchSandboxManifestImageAuth:
"""build_manifest must pass encrypted image auth to the template so the
template can render it into the desired annotation."""

async def test_renders_encrypted_image_auth_from_template(self, monkeypatch):
monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32)
provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE)
config = make_config()
config.registry_username = "user"
config.registry_password = "pass"

manifest = await provider._build_batchsandbox_manifest(config)

annotations = manifest["spec"]["template"]["metadata"]["annotations"]
assert IMAGE_AUTH_ANNOTATION in annotations
encrypted = annotations[IMAGE_AUTH_ANNOTATION]
assert len(encrypted) > 0 # base64 encoded ciphertext

async def test_public_auth_when_key_missing(self, monkeypatch):
monkeypatch.delenv("ROCK_IMAGE_AUTH_KEY", raising=False)
provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE)
config = make_config()
config.registry_username = "user"
config.registry_password = "pass"

manifest = await provider._build_batchsandbox_manifest(config)

annotations = manifest["spec"]["template"]["metadata"]["annotations"]
assert annotations[IMAGE_AUTH_ANNOTATION] == "public"

async def test_public_auth_when_credentials_missing(self, monkeypatch):
monkeypatch.setenv("ROCK_IMAGE_AUTH_KEY", "0" * 32)
provider = _make_provider_with_templates(IMAGE_AUTH_TEMPLATE)
config = make_config()

manifest = await provider._build_batchsandbox_manifest(config)

annotations = manifest["spec"]["template"]["metadata"]["annotations"]
assert annotations[IMAGE_AUTH_ANNOTATION] == "public"
29 changes: 29 additions & 0 deletions tests/unit/sandbox/operator/test_k8s_template_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,32 @@ def test_build_manifest_drops_disk_when_none(self):
container = manifest["spec"]["template"]["spec"]["containers"][0]
assert "ephemeral-storage" not in container["resources"]["requests"]
assert "ephemeral-storage" not in container["resources"]["limits"]

def test_build_manifest_passes_encrypted_image_auth(self):
"""Test that encrypted_image_auth is exposed to the template."""
templates = {
"default": {
"ports": {"proxy": 8000, "server": 8080, "ssh": 22},
"template": {
"metadata": {
"annotations": {
"example.com/image-auth": "{{ encrypted_image_auth }}",
}
},
"spec": {"containers": [{"name": "main", "image": "python:3.11"}]},
},
}
}
loader = K8sTemplateLoader(templates=templates, default_namespace="rock-test")

manifest = loader.build_manifest(
template_name="default",
sandbox_id="test-sandbox",
image="python:3.11",
cpus=2.0,
memory="4Gi",
encrypted_image_auth="dGVzdC1lbmNyeXB0ZWQ=",
)

annotations = manifest["spec"]["template"]["metadata"]["annotations"]
assert annotations["example.com/image-auth"] == "dGVzdC1lbmNyeXB0ZWQ="
Loading