Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
581ebd9
test(cli): scaffold rock job run parser smoke test
dengwx2009 Apr 15, 2026
8f79cff
refactor(cli): add _fail() helper for consistent job-run errors
dengwx2009 Apr 15, 2026
1cee735
refactor(cli): stash run sub-parser on JobCommand for parser.error reuse
dengwx2009 Apr 15, 2026
5971b64
feat(cli): friendly error when rock job run has no input
dengwx2009 Apr 15, 2026
339817a
feat(cli): enforce --config vs --script mutual exclusion
dengwx2009 Apr 15, 2026
743c3ce
feat(cli): route --script/--script-content mutex through _fail
dengwx2009 Apr 15, 2026
18f4af9
feat(cli): reject --type harbor without --config; default --type to None
dengwx2009 Apr 15, 2026
253eb2e
refactor(cli): extract _config_from_flags helper for bash CLI-mode
dengwx2009 Apr 15, 2026
f4f3d67
refactor(cli): add _config_from_yaml helper with type-consistency check
dengwx2009 Apr 15, 2026
ea3b324
refactor(cli): add _apply_overrides helper (shared bash/harbor)
dengwx2009 Apr 15, 2026
d093cc7
feat(cli): clarify rock job run description; --timeout default None
dengwx2009 Apr 15, 2026
67287f6
refactor(cli): unify rock job run flow through mode-agnostic pipeline
dengwx2009 Apr 15, 2026
a392072
test(cli): assert rock job run --help lists both modes
dengwx2009 Apr 15, 2026
ec67f36
fix(cli): rename sub-parser flag to --job_config to avoid top-level c…
dengwx2009 Apr 15, 2026
900bc52
test(cli): remove superseded test_cli_job.py; cover arun dispatch in …
dengwx2009 Apr 15, 2026
b9eb42b
fix(cli): skip base_url/cluster backfill for job command so YAML wins
dengwx2009 Apr 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
494 changes: 494 additions & 0 deletions docs/dev/cli/README.md

Large diffs are not rendered by default.

271 changes: 208 additions & 63 deletions rock/cli/command/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,26 @@
logger = init_logger(__name__)


def _fail(parser: argparse.ArgumentParser, msg: str, *, hint: str | None = None) -> None:
"""Emit a consistent CLI error: message + optional hint + help pointer, then exit 2.

Uses ``parser.error()`` which prints the parser's usage line to stderr, writes
the message, and calls ``sys.exit(2)``. Never returns.
"""
parts = [msg]
if hint:
parts.extend(["", hint])
parts.extend(["", "Run `rock job run --help` for full usage."])
parser.error("\n".join(parts))


class JobCommand(Command):
name = "job"

# Cached reference to the `run` sub-parser; populated by add_parser_to,
# used by _job_run to call parser.error() consistently.
_run_parser: argparse.ArgumentParser | None = None

async def arun(self, args: argparse.Namespace):
if args.job_command == "run":
await self._job_run(args)
Expand All @@ -17,73 +34,64 @@ async def arun(self, args: argparse.Namespace):

async def _job_run(self, args: argparse.Namespace):
# Import lazily to avoid pulling in bench/Harbor modules for bash-only uses
from rock.sdk.bench.models.trial.config import RockEnvironmentConfig
from rock.sdk.job import Job
from rock.sdk.job.config import BashJobConfig

job_type = args.type or "bash"

if job_type == "bash":
if not args.script and not args.script_content:
logger.error("Either --script or --script-content is required for bash type")
return
if args.script and args.script_content:
logger.error("--script and --script-content cannot be used together")
return

env_kwargs = {}
if args.image:
env_kwargs["image"] = args.image
if args.memory:
env_kwargs["memory"] = args.memory
if args.cpus:
env_kwargs["cpus"] = args.cpus
if getattr(args, "base_url", None):
env_kwargs["base_url"] = args.base_url
if getattr(args, "cluster", None):
env_kwargs["cluster"] = args.cluster
if getattr(args, "extra_headers", None):
env_kwargs["extra_headers"] = args.extra_headers
if getattr(args, "xrl_authorization", None):
env_kwargs["xrl_authorization"] = args.xrl_authorization

uploads = []
if args.local_path:
uploads.append((args.local_path, args.target_path))

env = {}
if getattr(args, "env", None):
for item in args.env:
key, _, value = item.partition("=")
env[key] = value

config = BashJobConfig(
script=args.script_content,
script_path=args.script,
environment=RockEnvironmentConfig(
**env_kwargs,
uploads=uploads,
auto_stop=True,
env=env,
parser = self._run_parser

# ── 1. Mode validation ────────────────────────────────────────
has_config = bool(args.job_config)
has_script = bool(args.script or args.script_content)

if not has_config and not has_script:
_fail(
parser,
"Missing job definition. Provide either a YAML config or inline script.",
hint=(
"Examples:\n"
" rock job run --job_config job.yaml # any job type, auto-detected\n"
" rock job run --script path/to/run.sh # bash, script file\n"
' rock job run --script-content "echo hi" # bash, inline snippet'
),
)

if has_config and has_script:
_fail(
parser,
"--job_config is mutually exclusive with --script / --script-content.",
hint=(
"Pick one mode:\n"
" - YAML mode: rock job run --job_config job.yaml\n"
" - flags mode: rock job run --script run.sh"
),
timeout=args.timeout,
)

elif job_type == "harbor":
if not args.config:
logger.error("--config is required for harbor type")
return
from rock.sdk.bench.models.job.config import HarborJobConfig
if args.script and args.script_content:
_fail(
parser,
"--script and --script-content are mutually exclusive (pick a file path OR an inline snippet).",
)

config = HarborJobConfig.from_yaml(args.config)
if args.image:
config.environment.image = args.image
config.environment.auto_stop = True
if args.type == "harbor" and not has_config:
_fail(
parser,
"--type harbor requires --job_config <yaml>.",
hint=(
"Harbor jobs cannot be expressed purely via CLI flags.\n"
"Example:\n"
" rock job run --job_config harbor.yaml"
),
)

# ── 2. Build config ───────────────────────────────────────────
if has_config:
config = self._config_from_yaml(parser, args)
else:
logger.error(f"Unknown job type: {job_type}")
return
config = self._config_from_flags(args)

# ── 3. Apply overrides (shared across both modes) ─────────────
self._apply_overrides(config, args)

# ── 4. Run ────────────────────────────────────────────────────
try:
result = await Job(config).run()
if result.trial_results:
Expand All @@ -95,23 +103,157 @@ async def _job_run(self, args: argparse.Namespace):
except Exception as e:
logger.error(f"Job failed: {e}")

def _apply_overrides(self, config, args: argparse.Namespace) -> None:
"""Apply CLI overrides that are valid in both YAML and flags modes.

Mutates ``config`` in place. Works for both BashJobConfig and HarborJobConfig
because both use ``RockEnvironmentConfig`` for ``environment``.
"""
env = config.environment
if args.image:
env.image = args.image
if args.memory:
env.memory = args.memory
if args.cpus:
env.cpus = args.cpus
if getattr(args, "base_url", None):
env.base_url = args.base_url
if getattr(args, "cluster", None):
env.cluster = args.cluster
if getattr(args, "extra_headers", None):
env.extra_headers = args.extra_headers
if getattr(args, "xrl_authorization", None):
env.xrl_authorization = args.xrl_authorization

for item in args.env or []:
key, _, value = item.partition("=")
env.env[key] = value

if args.local_path:
env.uploads = list(env.uploads) + [(args.local_path, args.target_path)]

if args.timeout is not None:
config.timeout = args.timeout

env.auto_stop = True

def _config_from_yaml(self, parser: argparse.ArgumentParser, args: argparse.Namespace):
"""Load config via JobConfig.from_yaml and enforce --type consistency."""
from pathlib import Path

from rock.sdk.job.config import BashJobConfig, JobConfig

path = args.job_config
if not Path(path).is_file():
_fail(parser, f"--job_config path does not exist: {path}")

try:
config = JobConfig.from_yaml(path)
except ValueError as exc:
# from_yaml raises ValueError with a combined Bash/Harbor error message
_fail(parser, f"Failed to load --job_config {path!r}:\n{exc}")
except Exception as exc: # YAML parse error, IO error, etc.
_fail(parser, f"Failed to load --job_config {path!r}:\n{exc}")

if args.type is not None:
actual_type = "bash" if isinstance(config, BashJobConfig) else "harbor"
if args.type != actual_type:
_fail(
parser,
f"--type {args.type} does not match YAML (detected as {actual_type}).",
hint="Remove --type and let the YAML decide, or pass a matching config file.",
)
return config

def _config_from_flags(self, args: argparse.Namespace):
"""Build a BashJobConfig purely from CLI flags (mode B)."""
from rock.sdk.bench.models.trial.config import RockEnvironmentConfig
from rock.sdk.job.config import BashJobConfig

env: dict[str, str] = {}
for item in args.env or []:
key, _, value = item.partition("=")
env[key] = value

uploads = [(args.local_path, args.target_path)] if args.local_path else []

env_kwargs: dict = {}
if args.image:
env_kwargs["image"] = args.image
if args.memory:
env_kwargs["memory"] = args.memory
if args.cpus:
env_kwargs["cpus"] = args.cpus
if getattr(args, "base_url", None):
env_kwargs["base_url"] = args.base_url
if getattr(args, "cluster", None):
env_kwargs["cluster"] = args.cluster
if getattr(args, "extra_headers", None):
env_kwargs["extra_headers"] = args.extra_headers
if getattr(args, "xrl_authorization", None):
env_kwargs["xrl_authorization"] = args.xrl_authorization

cfg_kwargs: dict = {}
if args.timeout is not None:
cfg_kwargs["timeout"] = args.timeout

return BashJobConfig(
script=args.script_content,
script_path=args.script,
environment=RockEnvironmentConfig(
**env_kwargs,
uploads=uploads,
auto_stop=True,
env=env,
),
**cfg_kwargs,
)

@staticmethod
async def add_parser_to(subparsers: argparse._SubParsersAction):
job_parser = subparsers.add_parser("job", help="Manage sandbox jobs")
job_subparsers = job_parser.add_subparsers(dest="job_command")

run_parser = job_subparsers.add_parser("run", help="Run a job in a sandbox")
run_parser.add_argument("--type", choices=["bash", "harbor"], default="bash", help="Job type (default: bash)")
run_parser = job_subparsers.add_parser(
"run",
help="Run a job in a sandbox",
description=(
"Run a sandbox job in one of two mutually-exclusive modes:\n"
" (1) YAML mode : --job_config <file> (type auto-detected)\n"
" (2) flags mode : --script / --script-content (bash only)"
),
formatter_class=argparse.RawDescriptionHelpFormatter,
)
run_parser.add_argument(
"--type",
choices=["bash", "harbor"],
default=None,
help="Explicit job type (flags mode only; YAML mode auto-detects).",
)
# bash args
run_parser.add_argument("--script", default=None, help="Path to script file")
run_parser.add_argument("--script-content", default=None, help="Inline script content")
# harbor args
run_parser.add_argument("--config", default=None, help="Harbor YAML config path")
# YAML config (mode A) — flag name is --job_config (distinct from the
# top-level --config that points at the CLI INI config). Also accept
# --job-config as the hyphen-form alias.
run_parser.add_argument(
"--job_config",
"--job-config",
dest="job_config",
default=None,
metavar="YAML",
help="Job YAML config path (any job type; auto-detected).",
)
# shared args
run_parser.add_argument("--image", default=None, help="Sandbox image")
run_parser.add_argument("--memory", default=None, help="Memory (e.g. 8g)")
run_parser.add_argument("--cpus", default=None, type=float, help="CPU count")
run_parser.add_argument("--timeout", type=int, default=3600, help="Timeout in seconds")
run_parser.add_argument(
"--timeout",
type=int,
default=None,
help="Timeout in seconds (overrides YAML when given).",
)
run_parser.add_argument("--local-path", default=None, help="Local dir to upload")
run_parser.add_argument("--target-path", default="/root/job", help="Target dir in sandbox")
run_parser.add_argument("--base-url", default=None, help="Admin service base URL")
Expand All @@ -128,3 +270,6 @@ async def add_parser_to(subparsers: argparse._SubParsersAction):
default=None,
help="XRL authorization token",
)

# Stash on the class so _job_run can call parser.error() with the right parser.
JobCommand._run_parser = run_parser
19 changes: 12 additions & 7 deletions rock/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,18 @@ def load_config_from_file(args):
manager = ConfigManager(config_path)
cli_config = manager.get_config()

# If command line arguments are not set, use configuration file
if not args.base_url:
args.base_url = cli_config.base_url
if not args.auth_token and (authorization := cli_config.extra_headers.get("xrl-authorization")):
args.auth_token = authorization
if not args.cluster and (cluster := cli_config.extra_headers.get("cluster")):
args.cluster = cluster
# For the `job` command, the job YAML (--job_config) is the source of truth
# for base_url / cluster / auth_token. Backfilling from the CLI INI here would
# clobber YAML-specified values inside JobCommand._apply_overrides, which
# cannot distinguish user-supplied --base-url from an INI backfill.
# (Users who want INI defaults for a job can pass --base-url explicitly.)
if args.command != "job":
if not args.base_url:
args.base_url = cli_config.base_url
if not args.auth_token and (authorization := cli_config.extra_headers.get("xrl-authorization")):
args.auth_token = authorization
if not args.cluster and (cluster := cli_config.extra_headers.get("cluster")):
args.cluster = cluster

# Process extra_headers, first get from configuration file
extra_headers = cli_config.extra_headers.copy()
Expand Down
Empty file added tests/unit/cli/__init__.py
Empty file.
Empty file.
Loading
Loading