diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml new file mode 100644 index 00000000..96648823 --- /dev/null +++ b/.github/workflows/e2e-tests.yml @@ -0,0 +1,100 @@ +name: E2E Tests + +on: + push: + tags: + - "v*.*.*" + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: e2e-tests-${{ github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + +jobs: + e2e: + name: e2e (${{ matrix.os }}, ${{ matrix.agent.name }}) + runs-on: ${{ matrix.os }} + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + agent: + - name: claude + package: "@anthropic-ai/claude-code" + - name: codex + package: "@openai/codex" + + steps: + - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + persist-credentials: false + + - uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1.16.1 + with: + rustflags: "" + cache: false + + - name: Install cargo-binstall + uses: cargo-bins/cargo-binstall@30b5ca8b54e1dcffd9548bc87ede1531310fdc67 # v1.20.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Load tool versions + shell: bash + run: grep -E '^[A-Z0-9_]+=' tool-versions.env >> "$GITHUB_ENV" + - name: Install cargo-nextest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: command -v cargo-nextest || cargo binstall -y --force --locked cargo-nextest@$CARGO_NEXTEST_VERSION + shell: bash + + - name: Install protoc + uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3.0.0 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install bubblewrap (Linux) + if: runner.os == 'Linux' + run: sudo apt-get install -y bubblewrap + + # Ubuntu 24.04 ships kernel.apparmor_restrict_unprivileged_userns=1, which + # transitions bwrap to a profile that strips CAP_NET_ADMIN inside its user + # namespace, so it cannot bring up loopback (RTM_NEWADDR). Install the + # targeted AppArmor profile that lets bwrap keep its caps in the userns. + - name: Allow bwrap user namespaces via AppArmor profile (Linux) + if: runner.os == 'Linux' + run: | + sudo tee /etc/apparmor.d/bwrap >/dev/null <<'EOF' + abi , + include + + profile bwrap /usr/bin/bwrap flags=(unconfined) { + userns, + include if exists + } + EOF + sudo apparmor_parser -r /etc/apparmor.d/bwrap + + - name: Install ${{ matrix.agent.name }} + run: | + npm install -g '${{ matrix.agent.package }}' + ${{ matrix.agent.name }} --version + + - name: Authenticate codex + if: matrix.agent.name == 'codex' + run: printenv OPENAI_API_KEY | codex login --with-api-key + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + # nextest builds the firma binary as part of the e2e test; firma_bin() + # reads its path from CARGO_BIN_EXE_firma. + - name: Run e2e tests + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: cargo nextest run -p firma --test e2e --run-ignored all -E 'test(/${{ matrix.agent.name }}::/)' diff --git a/Cargo.lock b/Cargo.lock index 640d7d9d..e31a98ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1562,6 +1562,8 @@ dependencies = [ "firma-run", "firma-sidecar", "firma-stack", + "fs-err", + "insta", "miette", "nix 0.31.3", "owo-colors", @@ -1572,6 +1574,7 @@ dependencies = [ "rcgen", "serde", "serde_json", + "serde_repr", "serde_yaml", "sha2 0.11.0", "strum 0.28.0", @@ -1587,6 +1590,7 @@ dependencies = [ "tracing-subscriber", "uuid", "windows-sys 0.59.0", + "wiremock", "x509-parser", ] @@ -1877,6 +1881,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fde052dbfc920003cfd2c8e2c6e6d4cc7c1091538c3a24226cec0665ab08c0" +dependencies = [ + "autocfg", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -2558,6 +2571,9 @@ checksum = "86f0f8fee8c926415c58d6ae43a08523a26faccb2323f5e6b644fe7dd4ef6b82" dependencies = [ "console 0.16.3", "once_cell", + "pest", + "pest_derive", + "serde", "similar", "tempfile", ] @@ -4194,9 +4210,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.14" +version = "0.11.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" +checksum = "4fcb935c5bec503c2f0e306bdd3e58bb9029dcb14fa8d9ac76e3a5256ac0763e" dependencies = [ "aws-lc-rs", "bytes", @@ -4948,6 +4964,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "serde_spanned" version = "1.1.1" diff --git a/Cargo.toml b/Cargo.toml index 91704454..edee6a37 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,12 +53,14 @@ firma-protobuf = "0.1.1" firma-run = { path = "crates/firma-run" } firma-sidecar = { path = "crates/firma-sidecar" } firma-stack = { path = "crates/firma-stack" } +fs-err = "3.3" governor = "0.10" hex = "0.4" http-body = "1" http-body-util = "0.1" hyper = { version = "1", default-features = false } hyper-util = { version = "0.1", default-features = false } +insta = { version = "1", features = ["json", "redactions"] } lru = "0.17" miette = { version = "7", features = ["fancy-no-backtrace"] } nix = { version = "0.31", features = ["fs", "process", "signal", "socket", "user"] } @@ -81,6 +83,7 @@ rustls = "0.23" rustls-pemfile = "2" serde = { version = "1", features = ["derive"] } serde_json = "1" +serde_repr = "0.1" serde_yaml = "0.9" serial_test = "3" sha2 = "0.11" @@ -105,5 +108,6 @@ uuid = { version = "1", features = ["v4", "v7", "serde"] } wait-timeout = "0.2" webpki-roots = "1" windows-sys = { version = "0.59", features = ["Win32_Foundation", "Win32_Security", "Win32_System_Console", "Win32_System_JobObjects", "Win32_System_Threading"] } +wiremock = "0.6" x509-parser = "0.16" xxhash-rust = { version = "0.8", features = ["xxh3"] } diff --git a/crates/firma-authority/src/config.rs b/crates/firma-authority/src/config.rs index 38c5d7d1..6b3b6c4c 100644 --- a/crates/firma-authority/src/config.rs +++ b/crates/firma-authority/src/config.rs @@ -1,4 +1,4 @@ -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use std::path::PathBuf; /// Sentinel: unset `policy_dir`. @@ -12,7 +12,7 @@ pub(crate) const DEFAULT_KEY_FILE: &str = "firma-authority.key"; /// /// Environment variables take precedence over TOML values and use the /// `FIRMA_AUTHORITY_` prefix (e.g., `FIRMA_AUTHORITY_LISTEN_ADDR`). -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] #[serde(default)] pub struct AuthorityConfig { /// gRPC listen address (default: `[::1]:50051`). @@ -51,7 +51,7 @@ pub struct AuthorityConfig { /// TLS configuration for the Authority gRPC server. /// /// Both values are required together to enable TLS. -#[derive(Debug, Clone, Default, Deserialize)] +#[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct AuthorityTlsConfig { /// Path to the TLS certificate file (PEM). Must be set together with /// `tls_key_path`. diff --git a/crates/firma-run/src/sidecar/config.rs b/crates/firma-run/src/sidecar/config.rs index c7ca458e..ab9220cf 100644 --- a/crates/firma-run/src/sidecar/config.rs +++ b/crates/firma-run/src/sidecar/config.rs @@ -215,6 +215,11 @@ pub fn synthesize(req: SynthesizeRequest<'_>) -> Result/firma-ca/). + // The default "./firma-ca/" is CWD-relative and would diverge when + // firma run's CWD differs from the marker dir. + override_ca_dir(&mut value, req.out_path)?; if let Some(url) = req.authority_url { override_authority_url(&mut value, url)?; } @@ -528,6 +533,27 @@ fn override_sidecar_mode(value: &mut toml::Value, mode: &str) -> Result<(), RunE Ok(()) } +fn override_ca_dir(value: &mut toml::Value, out_path: &Path) -> Result<(), RunError> { + let marker_dir = out_path.parent().ok_or_else(|| { + RunError::Internal(format!( + "cannot resolve marker dir from synthesized config path {}", + out_path.display() + )) + })?; + let ca_dir = marker_dir.join("firma-ca"); + let sidecar = sidecar_table_mut(value)?; + let ca_table = sidecar + .entry("ca".to_string()) + .or_insert_with(|| toml::Value::Table(toml::value::Table::new())) + .as_table_mut() + .ok_or_else(|| RunError::Internal("[sidecar.ca] is not a table".into()))?; + ca_table.insert( + "dir".to_string(), + toml::Value::String(ca_dir.display().to_string()), + ); + Ok(()) +} + /// Default the audit sink to a file at `audit_path` when the template did not /// configure one. The per-run sidecar is spawned with a null stdout, so the /// default `stdout` audit sink would silently discard every decision and diff --git a/crates/firma-sidecar/src/config/enforcement.rs b/crates/firma-sidecar/src/config/enforcement.rs index 2d15026d..f99e1e05 100644 --- a/crates/firma-sidecar/src/config/enforcement.rs +++ b/crates/firma-sidecar/src/config/enforcement.rs @@ -5,7 +5,7 @@ reason = "Authority-wired capability manifest support is defined now but not consumed yet" )] -use serde::Deserialize; +use serde::{Deserialize, Serialize}; const VALID_HTTP_METHODS: &[&str] = &[ "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", "CONNECT", @@ -130,14 +130,15 @@ impl Default for ConstraintEnforcementConfig { // --------------------------------------------------------------------------- /// A single mapping rule as deserialized from the rules TOML file. -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct MappingRuleConfig { /// HTTP method to match (`None` = any method). + #[serde(default, skip_serializing_if = "Option::is_none")] pub method: Option, /// Host pattern to match (supports `*` wildcard). pub host: String, /// Path pattern to match (supports `*` wildcard). - #[serde(default)] + #[serde(default, skip_serializing_if = "Option::is_none")] pub path: Option, /// Canonical action class this rule maps to. pub action_class: String, @@ -170,7 +171,7 @@ impl MappingRuleConfig { } /// Top-level structure of the mapping rules TOML file. -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct MappingRulesFile { /// Individual mapping rules. #[serde(rename = "rules", default)] diff --git a/crates/firma/Cargo.toml b/crates/firma/Cargo.toml index 0729798a..a50e0a61 100644 --- a/crates/firma/Cargo.toml +++ b/crates/firma/Cargo.toml @@ -55,10 +55,18 @@ nix = { workspace = true } windows-sys = { workspace = true } [dev-dependencies] +fs-err = { workspace = true } +insta = { workspace = true } pretty_assertions = { workspace = true } rand = { workspace = true } +serde_repr = { workspace = true } strum = { workspace = true, features = ["derive"] } tempfile = { workspace = true } +wiremock = { workspace = true } [target.'cfg(unix)'.dev-dependencies] nix = { workspace = true } + +[[test]] +name = "e2e" +path = "../../tests/e2e/main.rs" diff --git a/crates/firma/src/services/config.rs b/crates/firma/src/services/config.rs index bc39f38f..8080af28 100644 --- a/crates/firma/src/services/config.rs +++ b/crates/firma/src/services/config.rs @@ -1494,8 +1494,8 @@ mod tests { assert!( rules .iter() - .any(|r| r.host == "api.openai.com" && r.method.as_deref() == Some("CONNECT")), - "expected api.openai.com:443 CONNECT rule" + .any(|r| r.host == "*.openai.com" && r.method.as_deref() == Some("CONNECT")), + "expected *.openai.com:443 CONNECT rule" ); } diff --git a/crates/firma/src/services/run.rs b/crates/firma/src/services/run.rs index a7507222..9572e64e 100644 --- a/crates/firma/src/services/run.rs +++ b/crates/firma/src/services/run.rs @@ -78,7 +78,7 @@ pub fn run(args: RunArgs) -> anyhow::Result { command: args.command, authority_cli, authority_profile: args.authority_profile, - user_config_path: None, + user_config_path: args.config.clone(), allow_non_structural: args.allow_non_structural, monitor_mode: args.monitor, }; diff --git a/crates/firma/templates/mappings/openai.toml b/crates/firma/templates/mappings/openai.toml index bc5caeef..b15d40ae 100644 --- a/crates/firma/templates/mappings/openai.toml +++ b/crates/firma/templates/mappings/openai.toml @@ -1,9 +1,10 @@ # OpenAI API mapping. # Tunnels through without MITM; the LLM SDK does not need to trust firma-ca. +# API-key traffic (api.openai.com, etc.) — single-label wildcard. [[rules]] method = "CONNECT" -host = "api.openai.com" +host = "*.openai.com" action_class = "communication.external.send" [[rules]] @@ -11,9 +12,15 @@ method = "CONNECT" host = "chatgpt.com" action_class = "communication.external.send" +# Subdomains (ab.chatgpt.com, etc.) — single-label wildcard. +[[rules]] +method = "CONNECT" +host = "*.chatgpt.com" +action_class = "communication.external.send" + # REST fallback (plain HTTP proxy or post-MITM). [[rules]] -host = "api.openai.com" +host = "*.openai.com" path = "*" action_class = "communication.external.send" @@ -21,3 +28,8 @@ action_class = "communication.external.send" host = "chatgpt.com" path = "*" action_class = "communication.external.send" + +[[rules]] +host = "*.chatgpt.com" +path = "*" +action_class = "communication.external.send" diff --git a/justfile b/justfile index 2f4bec18..75f1be78 100644 --- a/justfile +++ b/justfile @@ -35,6 +35,9 @@ test: build: cargo build --all-features --all-targets +e2e: + cargo nextest run -p firma --test e2e --run-ignored all + audit: cargo audit --deny warnings diff --git a/tests/e2e/README.md b/tests/e2e/README.md new file mode 100644 index 00000000..0ec3cbde --- /dev/null +++ b/tests/e2e/README.md @@ -0,0 +1,35 @@ +# E2E Tests + +End-to-end validation of the OpenFirma enforcement boundary against real coding +agent workloads. + +## Running locally + +```sh +make e2e +``` + +nextest builds the debug `firma` binary as part of compiling the e2e test; +`firma_bin()` reads its path from `CARGO_BIN_EXE_firma` — no manual build needed. + +Run only Claude or only Codex scenarios: + +```sh +cargo nextest run -p firma --test e2e --run-ignored all -E 'test(claude::)' +cargo nextest run -p firma --test e2e --run-ignored all -E 'test(codex::)' +``` + +Run a single scenario: + +```sh +cargo nextest run -p firma --test e2e --run-ignored all -E 'test(claude::simple_prompt)' +``` + +## Scenarios + +Each scenario runs in two phases: + +1. **Baseline** — agent runs directly (no firma). Confirms the agent can complete + the task and reach the mock server when unconfined. +2. **Enforcement** — agent runs under `firma run`. Confirms enforcement produces + the expected ALLOW or DENY outcome and emits the correct audit events. diff --git a/tests/e2e/agent.rs b/tests/e2e/agent.rs new file mode 100644 index 00000000..0e5db1ad --- /dev/null +++ b/tests/e2e/agent.rs @@ -0,0 +1,72 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::AsRefStr)] +#[strum(serialize_all = "snake_case")] +pub enum AgentKind { + Claude, + Codex, +} + +/// An agent the harness can run, optionally carrying extra CLI flags. +/// +/// Flags passed via `.args()` are inserted before the subcommand so they are +/// treated as global flags by the agent binary. +#[derive(Debug, Clone)] +pub struct Agent { + pub kind: AgentKind, + args: Vec, +} + +impl Agent { + #[must_use] + pub fn claude() -> Self { + Self { + kind: AgentKind::Claude, + args: Vec::new(), + } + } + + #[must_use] + pub fn codex() -> Self { + Self { + kind: AgentKind::Codex, + args: Vec::new(), + } + } + + /// Attach CLI flags inserted before the subcommand / prompt flag. + #[must_use] + pub fn args(mut self, args: impl IntoIterator>) -> Self { + self.args = args.into_iter().map(Into::into).collect(); + self + } + + #[must_use] + pub fn command(&self) -> &'static str { + match self.kind { + AgentKind::Claude => "claude", + AgentKind::Codex => "codex", + } + } + + #[must_use] + pub fn profile(&self) -> &'static str { + match self.kind { + AgentKind::Claude => "claude-code", + AgentKind::Codex => "codex", + } + } + + pub fn prompt_args(&self, prompt: &str) -> Vec { + let mut result = self.args.clone(); + match self.kind { + AgentKind::Claude => { + result.push("-p".to_string()); + result.push(prompt.to_string()); + } + AgentKind::Codex => { + result.push("exec".to_string()); + result.push(prompt.to_string()); + } + } + result + } +} diff --git a/tests/e2e/audit.rs b/tests/e2e/audit.rs new file mode 100644 index 00000000..dda3fac5 --- /dev/null +++ b/tests/e2e/audit.rs @@ -0,0 +1,43 @@ +use std::path::Path; + +use anyhow::Context; +use serde::Deserialize; +use serde_repr::Deserialize_repr; +use std::collections::BTreeSet; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Deserialize_repr)] +#[repr(u8)] +pub enum Decision { + Allow = 1, + Deny = 2, + Abort = 3, +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize)] +pub struct AuditEvent { + action: String, + resource: String, + decision: Decision, + deny_reason: String, + dispatch_status: u16, +} + +/// Sidecar audit events from the enforcement phase. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FirmaAuditTrail(BTreeSet); + +impl FirmaAuditTrail { + pub fn try_new(path: &Path) -> Result { + let content = fs_err::read_to_string(path)?; + let events = content + .lines() + .zip(1..) + .filter(|(l, _)| !l.trim().is_empty()) + .map(|(l, line)| { + serde_json::from_str(l) + .with_context(|| format!("unexpected audit record in audit log at line {line}")) + }) + .collect::, _>>()?; + Ok(Self(events)) + } +} diff --git a/tests/e2e/config.rs b/tests/e2e/config.rs new file mode 100644 index 00000000..1c789169 --- /dev/null +++ b/tests/e2e/config.rs @@ -0,0 +1,69 @@ +use std::path::{Path, PathBuf}; + +use anyhow::Context; +use firma_sidecar::config::{MappingRuleConfig, MappingRulesFile}; + +pub fn append_policy_rule(cfg_dir: &Path, name: &str, rule: &str) -> Result<(), anyhow::Error> { + let path = cfg_dir.join("policies").join(format!("{name}.cedar")); + let mut current = if path.exists() { + fs_err::read_to_string(&path)? + } else { + String::new() + }; + current.push('\n'); + current.push_str(rule); + current.push('\n'); + fs_err::write(&path, current)?; + Ok(()) +} + +pub fn add_mapping_rules( + cfg_dir: &Path, + rules: Vec, +) -> Result<(), anyhow::Error> { + let rules_path = cfg_dir.join("mapping-rules.toml"); + let mut file: MappingRulesFile = if rules_path.exists() { + let content = fs_err::read_to_string(&rules_path)?; + toml::from_str(&content).with_context(|| format!("parse {}", rules_path.display()))? + } else { + MappingRulesFile::default() + }; + + file.rules.extend(rules); + let content = toml::to_string(&file).context("serialize mapping rules")?; + fs_err::write(&rules_path, content)?; + Ok(()) +} + +pub fn issue_capability( + cfg_dir: &Path, + agent_id: &str, + session_id: &str, + action: &str, + scope: &str, + ttl_secs: u64, +) -> Result { + let config_path = cfg_dir.join("firma.toml"); + let seed_path = cfg_dir.join("capability-seed.toml"); + let output = std::process::Command::new(crate::firma_bin()) + .arg("authority") + .args(["--config"]) + .arg(&config_path) + .arg("issue") + .args(["--agent-id", agent_id]) + .args(["--session-id", session_id]) + .args(["--action", action]) + .args(["--resource-scope", scope]) + .args(["--ttl-seconds", &ttl_secs.to_string()]) + .args(["--output"]) + .arg(&seed_path) + .output() + .with_context(|| "spawn firma authority issue")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("firma authority issue failed: {stderr}"); + } + + Ok(seed_path) +} diff --git a/tests/e2e/main.rs b/tests/e2e/main.rs new file mode 100644 index 00000000..cab037a0 --- /dev/null +++ b/tests/e2e/main.rs @@ -0,0 +1,98 @@ +#![allow(dead_code)] + +mod agent; +mod audit; +mod config; +mod policy; +mod runner; +mod scenario; +mod scenarios; +mod setup; + +use std::path::PathBuf; + +use agent::AgentKind; +use anyhow::Context; +use runner::run_scenario; +use scenarios::EnforcementScenario; + +// ── Utilities ──────────────────────────────────────────────────────────────── + +/// Path to the `firma` binary under test. +/// +/// Cargo builds the package's `[[bin]]` when compiling this integration test and +/// exposes its path via `CARGO_BIN_EXE_firma`, so nextest always runs the +/// just-built debug binary. +#[must_use] +pub fn firma_bin() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_firma")) +} + +// ── Test driver ────────────────────────────────────────────────────────────── + +fn default_agent(kind: AgentKind) -> agent::Agent { + match kind { + AgentKind::Claude => agent::Agent::claude().args([ + "--permission-mode", + "bypassPermissions", + // Suppresses analytics only — normal agent behavior is unaffected. + "--settings", + r#"{"env":{"DISABLE_TELEMETRY":"1"}}"#, + ]), + AgentKind::Codex => agent::Agent::codex().args(["--sandbox", "danger-full-access"]), + } +} + +async fn drive_scenario_for_agent( + scenario: &dyn EnforcementScenario, + kind: AgentKind, +) -> Result<(), anyhow::Error> { + let agent = default_agent(kind); + + run_scenario(scenario, &agent) + .await + .with_context(|| format!("[{}] scenario {}", agent.kind.as_ref(), scenario.name())) +} + +// ── Scenario registration ──────────────────────────────────────────────────── +// +// Pass the agent list as the first argument. Each ident becomes the sub-module +// name and maps to an `AgentKind` variant via `agent_kind!`. +// +// scenario_tests! [claude, codex] { ... } // all agents +// scenario_tests! [claude] { ... } // claude only +macro_rules! agent_kind { + (claude) => { + agent::AgentKind::Claude + }; + (codex) => { + agent::AgentKind::Codex + }; +} + +macro_rules! scenario_tests { + // $scenarios is a single tt (the parenthesised block), not a repetition, + // so it can be passed inside the $agent repetition without a depth conflict. + ([$($agent:ident),+]; $scenarios:tt) => { + $( scenario_tests!(@agent $agent $scenarios); )+ + }; + (@agent $agent:ident ($($name:ident => $scenario:expr),* $(,)?)) => { + mod $agent { + use super::*; + $( + #[tokio::test] + #[ignore = "integration test — run with --include-ignored"] + async fn $name() -> Result<(), anyhow::Error> { + super::drive_scenario_for_agent(&$scenario, agent_kind!($agent)).await + } + )* + } + }; +} + +scenario_tests! { + [claude, codex]; + ( + simple_prompt => scenarios::SimplePrompt, + ) +} diff --git a/tests/e2e/policy.rs b/tests/e2e/policy.rs new file mode 100644 index 00000000..43b7eb36 --- /dev/null +++ b/tests/e2e/policy.rs @@ -0,0 +1,229 @@ +use crate::config; +use crate::setup::ScenarioSetup; + +// ── PolicyBuilder ───────────────────────────────────────────────────────────── + +/// Entry point for building Cedar policy rules programmatically. +/// +/// ```ignore +/// ctx.policy() +/// .forbid("communication.external.send") +/// .when(|w| w.resource_like("paste.rs*")) +/// .add()?; +/// ``` +pub struct PolicyBuilder<'a> { + ctx: &'a ScenarioSetup, + name: Option<&'static str>, +} + +impl<'a> PolicyBuilder<'a> { + pub(crate) fn new(ctx: &'a ScenarioSetup) -> Self { + Self { ctx, name: None } + } + + /// Attach an annotation comment to the generated Cedar rule. + #[must_use] + pub fn named(mut self, name: &'static str) -> Self { + self.name = Some(name); + self + } + + /// Start a `forbid` rule for a single action class. + #[must_use] + pub fn forbid(self, action: &'static str) -> RuleBuilder<'a> { + self.into_rule("forbid", Effect::Single(action)) + } + + /// Start a `permit` rule for a single action class. + #[must_use] + pub fn permit(self, action: &'static str) -> RuleBuilder<'a> { + self.into_rule("permit", Effect::Single(action)) + } + + /// Start a `forbid` rule covering multiple action classes. + #[must_use] + pub fn forbid_in(self, actions: &'static [&'static str]) -> RuleBuilder<'a> { + self.into_rule("forbid", Effect::Set(actions)) + } + + /// Start a `permit` rule covering multiple action classes. + #[must_use] + pub fn permit_in(self, actions: &'static [&'static str]) -> RuleBuilder<'a> { + self.into_rule("permit", Effect::Set(actions)) + } + + fn into_rule(self, effect: &'static str, action: Effect) -> RuleBuilder<'a> { + RuleBuilder { + ctx: self.ctx, + name: self.name, + effect, + action, + resource: None, + when: None, + } + } +} + +enum Effect { + Single(&'static str), + Set(&'static [&'static str]), +} + +/// A Cedar rule under construction — created by [`PolicyBuilder`]. +pub struct RuleBuilder<'a> { + ctx: &'a ScenarioSetup, + name: Option<&'static str>, + effect: &'static str, + action: Effect, + resource: Option, + when: Option, +} + +impl RuleBuilder<'_> { + /// Scope the rule to a specific resource entity UID (host + path). + #[must_use] + pub fn resource_uid(mut self, uid: impl Into) -> Self { + self.resource = Some(uid.into()); + self + } + + /// Add a `when` clause to the rule. + #[must_use] + pub fn when(mut self, f: F) -> Self + where + F: FnOnce(WhenBuilder) -> WhenBuilder, + { + let wb = WhenBuilder::new(); + self.when = Some(f(wb).build()); + self + } + + /// Format the Cedar rule and append it to `policies/e2e.cedar`, a dedicated + /// file for scenario-authored rules kept separate from the shipped + /// `dev.cedar`. + /// + /// # Errors + /// + /// Returns an error if the file cannot be written. + pub fn add(self) -> Result<(), anyhow::Error> { + let config_dir = self.ctx.config_dir.clone(); + let rule = self.render(); + config::append_policy_rule(&config_dir, "e2e", &rule) + } + + fn render(self) -> String { + let mut s = String::new(); + if let Some(name) = self.name { + s.push_str("// "); + s.push_str(name); + s.push('\n'); + } + s.push_str(self.effect); + s.push_str("(\n principal,\n "); + let resource_head = self.resource.as_deref().map_or_else( + || "resource".to_string(), + |uid| format!("resource == Firma::Resource::\"{uid}\""), + ); + match self.action { + Effect::Single(a) => { + s.push_str("action == Firma::Action::\""); + s.push_str(a); + s.push_str("\",\n "); + s.push_str(&resource_head); + s.push_str("\n)"); + } + Effect::Set(actions) => { + s.push_str("action in ["); + for (i, a) in actions.iter().enumerate() { + if i > 0 { + s.push_str(", "); + } + s.push_str("Firma::Action::\""); + s.push_str(a); + s.push('"'); + } + s.push_str("],\n "); + s.push_str(&resource_head); + s.push_str("\n)"); + } + } + if let Some(when_clause) = self.when { + s.push_str("\nwhen { "); + s.push_str(&when_clause); + s.push_str(" }"); + } + s.push(';'); + s + } +} + +// ── WhenBuilder ─────────────────────────────────────────────────────────────── + +/// Accumulates `when` clause conditions via a fluent API. +pub struct WhenBuilder { + parts: Vec, +} + +impl WhenBuilder { + pub(crate) fn new() -> Self { + Self { parts: Vec::new() } + } + + /// `resource.id like ""` + #[must_use] + pub fn resource_like(mut self, pattern: impl std::fmt::Display) -> Self { + self.parts.push(format!("resource.id like \"{pattern}\"")); + self + } + + /// Start a context attribute comparison. + #[must_use] + pub fn context(self, name: &str) -> ContextMatcher { + ContextMatcher { + parts: self.parts, + name: name.to_string(), + } + } + + /// Chain another condition with `&&`. + #[must_use] + pub fn and(mut self) -> Self { + self.parts.push("&&".to_string()); + self + } + + fn build(self) -> String { + self.parts.join(" ") + } +} + +// ── ContextMatcher ──────────────────────────────────────────────────────────── + +/// In-progress context attribute comparison — created by [`WhenBuilder::context`]. +pub struct ContextMatcher { + parts: Vec, + name: String, +} + +impl ContextMatcher { + /// `context. > ` + #[must_use] + pub fn greater_than(mut self, value: impl std::fmt::Display) -> WhenBuilder { + self.parts.push(format!("context.{} > {value}", self.name)); + WhenBuilder { parts: self.parts } + } + + /// `context. < ` + #[must_use] + pub fn less_than(mut self, value: impl std::fmt::Display) -> WhenBuilder { + self.parts.push(format!("context.{} < {value}", self.name)); + WhenBuilder { parts: self.parts } + } + + /// `context. == ` + #[must_use] + pub fn equals(mut self, value: impl std::fmt::Display) -> WhenBuilder { + self.parts.push(format!("context.{} == {value}", self.name)); + WhenBuilder { parts: self.parts } + } +} diff --git a/tests/e2e/runner.rs b/tests/e2e/runner.rs new file mode 100644 index 00000000..c7a805fe --- /dev/null +++ b/tests/e2e/runner.rs @@ -0,0 +1,248 @@ +use std::path::Path; +use std::process::Stdio; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use anyhow::{Context, bail}; +use tokio::io::AsyncReadExt; +use wiremock::MockServer; + +use crate::agent::Agent; +use crate::audit::FirmaAuditTrail; +use crate::firma_bin; +use crate::scenario::{EnforcementScenario, Phase, PhaseOutput}; +use crate::setup::ScenarioSetup; + +/// Captured result of running a phase process (bare agent or firma wrapper) to +/// completion. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RunOutput { + pub success: bool, + pub exit_code: Option, + pub stdout: String, + pub stderr: String, + pub elapsed: Duration, +} + +/// Returned when a phase process exceeds its allotted wall-clock time and is +/// killed before exiting. Carries whatever partial output was captured. +#[derive(Debug, Clone, thiserror::Error)] +#[error("[{phase}] run timed out after {elapsed:?}")] +pub struct RunTimeoutError { + pub phase: Phase, + pub stdout: String, + pub stderr: String, + pub elapsed: Duration, +} + +/// Run a full two-phase scenario for `agent`. +/// +/// Phase 1 (baseline): agent runs directly — no firma proxy. If the baseline +/// assertion fails the scenario stops here with an error — there is no point +/// enforcing a task the agent cannot complete unconfined. +/// Phase 2 (enforcement): agent runs through `firma run`. +pub async fn run_scenario( + scenario: &dyn EnforcementScenario, + agent: &Agent, +) -> Result<(), anyhow::Error> { + let mock_server = Arc::new(MockServer::start().await); + + let cfg_tmp = tempfile::tempdir()?; + let state_tmp = tempfile::tempdir()?; + let workspace_tmp = tempfile::tempdir()?; + let protected_tmp = tempfile::tempdir()?; + + let cfg_dir = cfg_tmp.path().to_path_buf(); + let state_dir = state_tmp.path().to_path_buf(); + let workspace = workspace_tmp.path().to_path_buf(); + let protected_dir = protected_tmp.path().to_path_buf(); + + let mut ctx = ScenarioSetup { + workspace_dir: workspace, + protected_dir, + capability_seed: None, + capability_session_id: None, + mock_server: Arc::clone(&mock_server), + mocks: Vec::new(), + config_dir: cfg_dir.clone(), + state_dir: state_dir.clone(), + agent: agent.clone(), + }; + + scenario.setup(&mut ctx)?; + let agent_args = agent.prompt_args(&scenario.prompt(&ctx)); + + scenario.before_assert(&ctx)?; + + // Phase 1: baseline — run agent directly, no firma proxy. + let baseline_agent_output = run_agent_direct( + agent.command(), + &agent_args, + &ctx.workspace_dir, + scenario.timeout(), + ) + .await?; + + let baseline_phase = PhaseOutput { + agent: baseline_agent_output, + http_requests: mock_server.received_requests().await.unwrap_or_default(), + }; + + scenario.assert_baseline(&baseline_phase).with_context(|| { + format!( + "baseline FAILED\nstdout: {}\nstderr: {}", + baseline_phase.agent.stdout.trim(), + baseline_phase.agent.stderr.trim(), + ) + })?; + + // Clear baseline captures; mount enforcement mocks built during setup. + mock_server.reset().await; + for m in ctx.mocks.drain(..) { + m.mount(&mock_server).await; + } + + scenario.before_assert(&ctx)?; + + // Phase 2: enforcement. + let enforcement_agent_output = + run_enforcement(&firma_bin(), &ctx, &agent_args, scenario.timeout()).await?; + + let enforcement_phase = PhaseOutput { + agent: enforcement_agent_output, + http_requests: mock_server.received_requests().await.unwrap_or_default(), + }; + + let audit_path = state_dir.join("audit.jsonl"); + let firma_audit = FirmaAuditTrail::try_new(&audit_path)?; + + scenario + .assert_enforcement(&ctx, &enforcement_phase, &firma_audit) + .with_context(|| { + format!( + "enforcement FAILED\nstdout: {}\nstderr: {}", + enforcement_phase.agent.stdout.trim(), + enforcement_phase.agent.stderr.trim(), + ) + })?; + + Ok(()) +} + +/// Spawn `cmd` and wait up to `timeout`. On timeout: kill the process and +/// collect whatever partial stdout/stderr was written. +async fn run_with_timeout( + phase: Phase, + mut cmd: tokio::process::Command, + timeout: Duration, +) -> Result { + let start = Instant::now(); + let mut child = cmd + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .with_context(|| format!("spawn {phase}"))?; + + let mut stdout_handle = child + .stdout + .take() + .ok_or_else(|| anyhow::anyhow!("stdout not piped"))?; + let mut stderr_handle = child + .stderr + .take() + .ok_or_else(|| anyhow::anyhow!("stderr not piped"))?; + + let stdout = tokio::spawn(async move { + let mut buf = Vec::new(); + let _ = stdout_handle.read_to_end(&mut buf).await; + String::from_utf8_lossy(&buf).to_string() + }); + + let stderr = tokio::spawn(async move { + let mut buf = Vec::new(); + let _ = stderr_handle.read_to_end(&mut buf).await; + String::from_utf8_lossy(&buf).to_string() + }); + + let exit_status = tokio::select! { + status = child.wait() => Some(status?), + () = tokio::time::sleep(timeout) => { + eprintln!("[{phase}] timed out after {timeout:?} - killing"); + let _ = child.kill().await; + let _ = child.wait().await; + None + }, + }; + + let elapsed = start.elapsed(); + let stdout = stdout.await?; + let stderr = stderr.await?; + + let Some(exit_status) = exit_status else { + return Err(RunTimeoutError { + phase, + stdout, + stderr, + elapsed, + } + .into()); + }; + + Ok(RunOutput { + success: exit_status.success(), + exit_code: exit_status.code(), + stdout, + stderr, + elapsed, + }) +} + +async fn run_agent_direct( + agent_cmd: &str, + agent_args: &[String], + workspace: &Path, + timeout: Duration, +) -> Result { + if !agent_available(agent_cmd) { + bail!("[baseline] agent '{agent_cmd}' not found on PATH"); + } + + let mut cmd = tokio::process::Command::new(agent_cmd); + cmd.args(agent_args).current_dir(workspace); + run_with_timeout(Phase::Baseline, cmd, timeout).await +} + +async fn run_enforcement( + firma_bin: &Path, + ctx: &ScenarioSetup, + agent_args: &[String], + timeout: Duration, +) -> Result { + let config_path = ctx.config_dir().join("firma.toml"); + let mut cmd = tokio::process::Command::new(firma_bin); + cmd.args(["run", "--profile", ctx.agent.profile(), "--config"]) + .arg(&config_path); + // macOS VzBackend runs in compatibility mode (sandbox-exec + HTTP_PROXY), + // which is non-structural; Linux uses bwrap and confines structurally. + if cfg!(target_os = "macos") { + cmd.arg("--allow-non-structural"); + } + if let Some(cap) = &ctx.capability_seed { + cmd.args(["--capability-file"]).arg(cap); + } + if let Some(session_id) = &ctx.capability_session_id { + cmd.env("FIRMA_RUN_SESSION_ID", session_id); + } + cmd.arg("--") + .arg(ctx.agent.command()) + .args(agent_args) + .current_dir(&ctx.workspace_dir); + run_with_timeout(Phase::Enforcement, cmd, timeout).await +} + +fn agent_available(name: &str) -> bool { + std::process::Command::new("which") + .arg(name) + .output() + .is_ok_and(|o| o.status.success()) +} diff --git a/tests/e2e/scenario.rs b/tests/e2e/scenario.rs new file mode 100644 index 00000000..05b1d487 --- /dev/null +++ b/tests/e2e/scenario.rs @@ -0,0 +1,52 @@ +use std::time::Duration; + +use crate::audit::FirmaAuditTrail; +use crate::runner::RunOutput; +use crate::setup::ScenarioSetup; + +/// Combined output from one scenario phase: agent result + mock HTTP captures. +pub struct PhaseOutput { + pub agent: RunOutput, + pub http_requests: Vec, +} + +pub trait EnforcementScenario: Send + Sync { + fn name(&self) -> &'static str; + + /// Maximum wall-clock time allowed for the enforcement phase. + fn timeout(&self) -> Duration { + Duration::from_mins(5) + } + + /// Configure the scenario: register HTTP mock routes, add mapping rules, + /// append Cedar policy rules, configure sandbox mounts, etc. + fn setup(&self, _ctx: &mut ScenarioSetup) -> Result<(), anyhow::Error> { + Ok(()) + } + + /// Called before each phase (baseline and enforcement). + fn before_assert(&self, _ctx: &ScenarioSetup) -> Result<(), anyhow::Error> { + Ok(()) + } + + /// Natural-language prompt sent to the agent. + fn prompt(&self, ctx: &ScenarioSetup) -> String; + + fn assert_baseline(&self, output: &PhaseOutput) -> Result<(), anyhow::Error>; + + fn assert_enforcement( + &self, + ctx: &ScenarioSetup, + output: &PhaseOutput, + audit: &FirmaAuditTrail, + ) -> Result<(), anyhow::Error>; +} + +/// Which run of a scenario produced an output: the unenforced baseline or the +/// firma-enforced run. +#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::Display)] +#[strum(serialize_all = "snake_case")] +pub enum Phase { + Baseline, + Enforcement, +} diff --git a/tests/e2e/scenarios/mod.rs b/tests/e2e/scenarios/mod.rs new file mode 100644 index 00000000..7abd844f --- /dev/null +++ b/tests/e2e/scenarios/mod.rs @@ -0,0 +1,5 @@ +mod simple_prompt; + +pub use simple_prompt::SimplePrompt; + +pub use crate::scenario::EnforcementScenario; diff --git a/tests/e2e/scenarios/simple_prompt.rs b/tests/e2e/scenarios/simple_prompt.rs new file mode 100644 index 00000000..d93cddcf --- /dev/null +++ b/tests/e2e/scenarios/simple_prompt.rs @@ -0,0 +1,41 @@ +use crate::audit::FirmaAuditTrail; +use crate::scenario::{EnforcementScenario, PhaseOutput}; +use crate::setup::ScenarioSetup; + +pub struct SimplePrompt; + +impl EnforcementScenario for SimplePrompt { + fn name(&self) -> &'static str { + "simple_prompt" + } + + fn setup(&self, ctx: &mut ScenarioSetup) -> Result<(), anyhow::Error> { + ctx.git_init_workspace()?; + ctx.firma_config().run()?; + Ok(()) + } + + fn prompt(&self, _ctx: &ScenarioSetup) -> String { + "Hi, what's up?".to_string() + } + + fn assert_baseline(&self, output: &PhaseOutput) -> Result<(), anyhow::Error> { + if !output.agent.success { + anyhow::bail!("baseline agent failed: {}", output.agent.stderr); + } + Ok(()) + } + + fn assert_enforcement( + &self, + ctx: &ScenarioSetup, + output: &PhaseOutput, + audit: &FirmaAuditTrail, + ) -> Result<(), anyhow::Error> { + if !output.agent.success { + anyhow::bail!("enforcement agent failed: {}", output.agent.stderr); + } + insta::assert_debug_snapshot!(ctx.agent.kind.as_ref(), &audit); + Ok(()) + } +} diff --git a/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__claude.snap b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__claude.snap new file mode 100644 index 00000000..03deaa39 --- /dev/null +++ b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__claude.snap @@ -0,0 +1,15 @@ +--- +source: crates/firma/../../tests/e2e/scenarios/simple_prompt.rs +expression: "&audit" +--- +FirmaAuditTrail( + { + AuditEvent { + action: "communication.external.send", + resource: "api.anthropic.com/", + decision: Allow, + deny_reason: "", + dispatch_status: 200, + }, + }, +) diff --git a/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__codex.snap b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__codex.snap new file mode 100644 index 00000000..f1b5b155 --- /dev/null +++ b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__codex.snap @@ -0,0 +1,43 @@ +--- +source: crates/firma/../../tests/e2e/scenarios/simple_prompt.rs +expression: "&audit" +--- +FirmaAuditTrail( + { + AuditEvent { + action: "communication.external.send", + resource: "ab.chatgpt.com/", + decision: Allow, + deny_reason: "", + dispatch_status: 200, + }, + AuditEvent { + action: "communication.external.send", + resource: "api.openai.com/", + decision: Allow, + deny_reason: "", + dispatch_status: 200, + }, + AuditEvent { + action: "communication.external.send", + resource: "chatgpt.com/", + decision: Allow, + deny_reason: "", + dispatch_status: 200, + }, + AuditEvent { + action: "network.connect", + resource: "github.com/", + decision: Deny, + deny_reason: "token invalid: no capability token covers action 'code.write' on resource 'github.com/'", + dispatch_status: 0, + }, + AuditEvent { + action: "raw.http.GET", + resource: "api.github.com/repos/openai/plugins", + decision: Deny, + deny_reason: "token invalid: no capability token covers action 'code.read' on resource 'api.github.com/repos/openai/plugins'", + dispatch_status: 0, + }, + }, +) diff --git a/tests/e2e/setup.rs b/tests/e2e/setup.rs new file mode 100644 index 00000000..d26739a4 --- /dev/null +++ b/tests/e2e/setup.rs @@ -0,0 +1,224 @@ +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use anyhow::Context; +use firma_sidecar::config::MappingRuleConfig; +use wiremock::{Mock, MockServer}; + +use crate::agent::{Agent, AgentKind}; +use crate::policy::PolicyBuilder; +use crate::{config, firma_bin}; + +// ── ScenarioSetup ───────────────────────────────────────────────────────────── + +pub struct ScenarioSetup { + pub workspace_dir: PathBuf, + pub protected_dir: PathBuf, + pub capability_seed: Option, + pub capability_session_id: Option, + + /// Shared mock server. Scenarios push built `Mock` objects into `mocks`; + /// the runner mounts them between the baseline and enforcement phases. + pub mock_server: Arc, + pub mocks: Vec, + + pub(crate) config_dir: PathBuf, + pub(crate) state_dir: PathBuf, + pub(crate) agent: Agent, +} + +impl ScenarioSetup { + pub fn add_mapping_rule( + &self, + host_port: &str, + method: &str, + path: &str, + action_class: &str, + ) -> Result<(), anyhow::Error> { + config::add_mapping_rules( + &self.config_dir, + vec![ + MappingRuleConfig { + method: Some(method.to_string()), + host: host_port.to_string(), + path: Some(path.to_string()), + action_class: action_class.to_string(), + }, + // Companion CONNECT rule so the TLS tunnel itself is classified. + MappingRuleConfig { + method: Some("CONNECT".to_string()), + host: host_port.to_string(), + path: Some(String::new()), + action_class: action_class.to_string(), + }, + ], + ) + } + + #[must_use] + pub fn config_dir(&self) -> &Path { + &self.config_dir + } + + pub fn policy(&self) -> PolicyBuilder<'_> { + PolicyBuilder::new(self) + } + + pub fn issue_capability( + &mut self, + agent_id: &str, + session_id: &str, + action: &str, + scope: &str, + ttl_secs: u64, + ) -> Result<(), anyhow::Error> { + let seed_path = config::issue_capability( + &self.config_dir, + agent_id, + session_id, + action, + scope, + ttl_secs, + )?; + self.capability_seed = Some(seed_path); + self.capability_session_id = Some(session_id.to_string()); + Ok(()) + } + + /// Initialize a git repository in `workspace_dir`. + /// + /// # Errors + /// + /// Returns an error if `git init` fails. + pub fn git_init_workspace(&self) -> Result<(), anyhow::Error> { + let out = std::process::Command::new("git") + .args(["init"]) + .current_dir(&self.workspace_dir) + .output() + .with_context(|| "spawn git init")?; + anyhow::ensure!( + out.status.success(), + "git init failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + Ok(()) + } + + /// Start building a `firma config init` invocation. + #[must_use] + pub fn firma_config(&self) -> FirmaConfigBuilder<'_> { + FirmaConfigBuilder::new(self) + } +} + +// ── FirmaConfigBuilder ──────────────────────────────────────────────────────── + +pub struct FirmaConfigBuilder<'a> { + ctx: &'a ScenarioSetup, + mode: &'static str, + posture: &'static str, + mappings: Vec<&'static str>, + workspace: Option<&'a Path>, + authority_listen: &'static str, +} + +impl<'a> FirmaConfigBuilder<'a> { + pub(crate) fn new(ctx: &'a ScenarioSetup) -> Self { + let mappings = if matches!(ctx.agent.kind, AgentKind::Codex) { + vec!["openai", "github"] + } else { + vec!["anthropic"] + }; + Self { + ctx, + mode: "agent-local", + posture: "dev", + mappings, + workspace: Some(&ctx.workspace_dir), + authority_listen: "127.0.0.1:0", + } + } + + /// Override the Cedar posture (default: `"dev"`). + #[must_use] + pub fn posture(mut self, posture: &'static str) -> Self { + self.posture = posture; + self + } + + /// Override the workspace mount path (default: `ctx.workspace_dir`). + #[must_use] + pub fn workspace(mut self, path: &'a Path) -> Self { + self.workspace = Some(path); + self + } + + /// Clear the workspace mount. + #[must_use] + pub fn no_workspace(mut self) -> Self { + self.workspace = None; + self + } + + /// Replace the mapping selection. + #[must_use] + pub fn mappings(mut self, mappings: Vec<&'static str>) -> Self { + self.mappings = mappings; + self + } + + /// Clear the mapping selection. + #[must_use] + pub fn no_mappings(mut self) -> Self { + self.mappings.clear(); + self + } + + /// Set the authority listen address (default: `"127.0.0.1:0"`). + #[must_use] + pub fn authority_listen(mut self, addr: &'static str) -> Self { + self.authority_listen = addr; + self + } + + /// Execute `firma config init` with the configured options. + /// + /// # Errors + /// + /// Returns an error if the `firma config init` process fails or + /// the audit path cannot be configured. + pub fn run(self) -> Result<(), anyhow::Error> { + let mut cmd = std::process::Command::new(firma_bin()); + cmd.args([ + "config", + "--yes", + "--mode", + self.mode, + "--profile", + self.ctx.agent.profile(), + "--posture", + self.posture, + "-o", + ]) + .arg(&self.ctx.config_dir) + .args(["--state-dir"]) + .arg(&self.ctx.state_dir); + + cmd.args(["--authority-listen", self.authority_listen]); + + for mapping in &self.mappings { + cmd.args(["--mapping", mapping]); + } + if let Some(ws) = self.workspace { + cmd.args(["--workspace"]).arg(ws); + } + + let output = cmd.output().with_context(|| "spawn firma config")?; + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("firma config failed: {stderr}"); + } + + Ok(()) + } +}