diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml
new file mode 100644
index 00000000..96648823
--- /dev/null
+++ b/.github/workflows/e2e-tests.yml
@@ -0,0 +1,100 @@
+name: E2E Tests
+
+on:
+  push:
+    tags:
+      - "v*.*.*"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: e2e-tests-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  e2e:
+    name: e2e (${{ matrix.os }}, ${{ matrix.agent.name }})
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        agent:
+          - name: claude
+            package: "@anthropic-ai/claude-code"
+          - name: codex
+            package: "@openai/codex"
+
+    steps:
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
+        with:
+          persist-credentials: false
+
+      - uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1.16.1
+        with:
+          rustflags: ""
+          cache: false
+
+      - name: Install cargo-binstall
+        uses: cargo-bins/cargo-binstall@30b5ca8b54e1dcffd9548bc87ede1531310fdc67 # v1.20.0
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Load tool versions
+        shell: bash
+        run: grep -E '^[A-Z0-9_]+=' tool-versions.env >> "$GITHUB_ENV"
+      - name: Install cargo-nextest
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: command -v cargo-nextest || cargo binstall -y --force --locked cargo-nextest@$CARGO_NEXTEST_VERSION
+        shell: bash
+
+      - name: Install protoc
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3.0.0
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install bubblewrap (Linux)
+        if: runner.os == 'Linux'
+        run: sudo apt-get install -y bubblewrap
+
+      # Ubuntu 24.04 ships kernel.apparmor_restrict_unprivileged_userns=1, which
+      # transitions bwrap to a profile that strips CAP_NET_ADMIN inside its user
+      # namespace, so it cannot bring up loopback (RTM_NEWADDR). Install the
+      # targeted AppArmor profile that lets bwrap keep its caps in the userns.
+      - name: Allow bwrap user namespaces via AppArmor profile (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          sudo tee /etc/apparmor.d/bwrap >/dev/null <<'EOF'
+          abi <abi/4.0>,
+          include <tunables/global>
+
+          profile bwrap /usr/bin/bwrap flags=(unconfined) {
+              userns,
+              include if exists <local/bwrap>
+          }
+          EOF
+          sudo apparmor_parser -r /etc/apparmor.d/bwrap
+
+      - name: Install ${{ matrix.agent.name }}
+        run: |
+          npm install -g '${{ matrix.agent.package }}'
+          ${{ matrix.agent.name }} --version
+
+      - name: Authenticate codex
+        if: matrix.agent.name == 'codex'
+        run: printenv OPENAI_API_KEY | codex login --with-api-key
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+      # nextest builds the firma binary as part of the e2e test; firma_bin()
+      # reads its path from CARGO_BIN_EXE_firma.
+      - name: Run e2e tests
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+        run: cargo nextest run -p firma --test e2e --run-ignored all -E 'test(/${{ matrix.agent.name }}::/)'
diff --git a/Cargo.lock b/Cargo.lock
index 640d7d9d..e31a98ca 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1562,6 +1562,8 @@ dependencies = [
  "firma-run",
  "firma-sidecar",
  "firma-stack",
+ "fs-err",
+ "insta",
  "miette",
  "nix 0.31.3",
  "owo-colors",
@@ -1572,6 +1574,7 @@ dependencies = [
  "rcgen",
  "serde",
  "serde_json",
+ "serde_repr",
  "serde_yaml",
  "sha2 0.11.0",
  "strum 0.28.0",
@@ -1587,6 +1590,7 @@ dependencies = [
  "tracing-subscriber",
  "uuid",
  "windows-sys 0.59.0",
+ "wiremock",
  "x509-parser",
 ]
 
@@ -1877,6 +1881,15 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "fs-err"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73fde052dbfc920003cfd2c8e2c6e6d4cc7c1091538c3a24226cec0665ab08c0"
+dependencies = [
+ "autocfg",
+]
+
 [[package]]
 name = "fs_extra"
 version = "1.3.0"
@@ -2558,6 +2571,9 @@ checksum = "86f0f8fee8c926415c58d6ae43a08523a26faccb2323f5e6b644fe7dd4ef6b82"
 dependencies = [
  "console 0.16.3",
  "once_cell",
+ "pest",
+ "pest_derive",
+ "serde",
  "similar",
  "tempfile",
 ]
@@ -4194,9 +4210,9 @@ dependencies = [
 
 [[package]]
 name = "quinn-proto"
-version = "0.11.14"
+version = "0.11.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
+checksum = "4fcb935c5bec503c2f0e306bdd3e58bb9029dcb14fa8d9ac76e3a5256ac0763e"
 dependencies = [
  "aws-lc-rs",
  "bytes",
@@ -4948,6 +4964,17 @@ dependencies = [
  "zmij",
 ]
 
+[[package]]
+name = "serde_repr"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "serde_spanned"
 version = "1.1.1"
diff --git a/Cargo.toml b/Cargo.toml
index 91704454..edee6a37 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,12 +53,14 @@ firma-protobuf = "0.1.1"
 firma-run = { path = "crates/firma-run" }
 firma-sidecar = { path = "crates/firma-sidecar" }
 firma-stack = { path = "crates/firma-stack" }
+fs-err = "3.3"
 governor = "0.10"
 hex = "0.4"
 http-body = "1"
 http-body-util = "0.1"
 hyper = { version = "1", default-features = false }
 hyper-util = { version = "0.1", default-features = false }
+insta = { version = "1", features = ["json", "redactions"] }
 lru = "0.17"
 miette = { version = "7", features = ["fancy-no-backtrace"] }
 nix = { version = "0.31", features = ["fs", "process", "signal", "socket", "user"] }
@@ -81,6 +83,7 @@ rustls = "0.23"
 rustls-pemfile = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
+serde_repr = "0.1"
 serde_yaml = "0.9"
 serial_test = "3"
 sha2 = "0.11"
@@ -105,5 +108,6 @@ uuid = { version = "1", features = ["v4", "v7", "serde"] }
 wait-timeout = "0.2"
 webpki-roots = "1"
 windows-sys = { version = "0.59", features = ["Win32_Foundation", "Win32_Security", "Win32_System_Console", "Win32_System_JobObjects", "Win32_System_Threading"] }
+wiremock = "0.6"
 x509-parser = "0.16"
 xxhash-rust = { version = "0.8", features = ["xxh3"] }
diff --git a/crates/firma-authority/src/config.rs b/crates/firma-authority/src/config.rs
index 38c5d7d1..6b3b6c4c 100644
--- a/crates/firma-authority/src/config.rs
+++ b/crates/firma-authority/src/config.rs
@@ -1,4 +1,4 @@
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
 
 /// Sentinel: unset `policy_dir`.
@@ -12,7 +12,7 @@ pub(crate) const DEFAULT_KEY_FILE: &str = "firma-authority.key";
 ///
 /// Environment variables take precedence over TOML values and use the
 /// `FIRMA_AUTHORITY_` prefix (e.g., `FIRMA_AUTHORITY_LISTEN_ADDR`).
-#[derive(Debug, Clone, Deserialize)]
+#[derive(Debug, Clone, Deserialize, Serialize)]
 #[serde(default)]
 pub struct AuthorityConfig {
     /// gRPC listen address (default: `[::1]:50051`).
@@ -51,7 +51,7 @@ pub struct AuthorityConfig {
 /// TLS configuration for the Authority gRPC server.
 ///
 /// Both values are required together to enable TLS.
-#[derive(Debug, Clone, Default, Deserialize)]
+#[derive(Debug, Clone, Default, Deserialize, Serialize)]
 pub struct AuthorityTlsConfig {
     /// Path to the TLS certificate file (PEM). Must be set together with
     /// `tls_key_path`.
diff --git a/crates/firma-run/src/sidecar/config.rs b/crates/firma-run/src/sidecar/config.rs
index c7ca458e..ab9220cf 100644
--- a/crates/firma-run/src/sidecar/config.rs
+++ b/crates/firma-run/src/sidecar/config.rs
@@ -215,6 +215,11 @@ pub fn synthesize(req: SynthesizeRequest<'_>) -> Result<TemplateSource, RunError
         rebase_template_resource_paths(&mut value, dir)?;
     }
     override_interceptor(&mut value, req.socket_path, req.listen_addr)?;
+    // Pin ca.dir to the marker dir so the MITM CA cert lands where
+    // sidecar_trust_env_overrides expects it (<marker_dir>/firma-ca/).
+    // The default "./firma-ca/" is CWD-relative and would diverge when
+    // firma run's CWD differs from the marker dir.
+    override_ca_dir(&mut value, req.out_path)?;
     if let Some(url) = req.authority_url {
         override_authority_url(&mut value, url)?;
     }
@@ -528,6 +533,27 @@ fn override_sidecar_mode(value: &mut toml::Value, mode: &str) -> Result<(), RunE
     Ok(())
 }
 
+fn override_ca_dir(value: &mut toml::Value, out_path: &Path) -> Result<(), RunError> {
+    let marker_dir = out_path.parent().ok_or_else(|| {
+        RunError::Internal(format!(
+            "cannot resolve marker dir from synthesized config path {}",
+            out_path.display()
+        ))
+    })?;
+    let ca_dir = marker_dir.join("firma-ca");
+    let sidecar = sidecar_table_mut(value)?;
+    let ca_table = sidecar
+        .entry("ca".to_string())
+        .or_insert_with(|| toml::Value::Table(toml::value::Table::new()))
+        .as_table_mut()
+        .ok_or_else(|| RunError::Internal("[sidecar.ca] is not a table".into()))?;
+    ca_table.insert(
+        "dir".to_string(),
+        toml::Value::String(ca_dir.display().to_string()),
+    );
+    Ok(())
+}
+
 /// Default the audit sink to a file at `audit_path` when the template did not
 /// configure one. The per-run sidecar is spawned with a null stdout, so the
 /// default `stdout` audit sink would silently discard every decision and
diff --git a/crates/firma-sidecar/src/config/enforcement.rs b/crates/firma-sidecar/src/config/enforcement.rs
index 2d15026d..f99e1e05 100644
--- a/crates/firma-sidecar/src/config/enforcement.rs
+++ b/crates/firma-sidecar/src/config/enforcement.rs
@@ -5,7 +5,7 @@
     reason = "Authority-wired capability manifest support is defined now but not consumed yet"
 )]
 
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 
 const VALID_HTTP_METHODS: &[&str] = &[
     "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", "CONNECT",
@@ -130,14 +130,15 @@ impl Default for ConstraintEnforcementConfig {
 // ---------------------------------------------------------------------------
 
 /// A single mapping rule as deserialized from the rules TOML file.
-#[derive(Debug, Clone, Deserialize)]
+#[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct MappingRuleConfig {
     /// HTTP method to match (`None` = any method).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
     pub method: Option<String>,
     /// Host pattern to match (supports `*` wildcard).
     pub host: String,
     /// Path pattern to match (supports `*` wildcard).
-    #[serde(default)]
+    #[serde(default, skip_serializing_if = "Option::is_none")]
     pub path: Option<String>,
     /// Canonical action class this rule maps to.
     pub action_class: String,
@@ -170,7 +171,7 @@ impl MappingRuleConfig {
 }
 
 /// Top-level structure of the mapping rules TOML file.
-#[derive(Debug, Clone, Deserialize)]
+#[derive(Debug, Clone, Default, Deserialize, Serialize)]
 pub struct MappingRulesFile {
     /// Individual mapping rules.
     #[serde(rename = "rules", default)]
diff --git a/crates/firma/Cargo.toml b/crates/firma/Cargo.toml
index 0729798a..a50e0a61 100644
--- a/crates/firma/Cargo.toml
+++ b/crates/firma/Cargo.toml
@@ -55,10 +55,18 @@ nix = { workspace = true }
 windows-sys = { workspace = true }
 
 [dev-dependencies]
+fs-err = { workspace = true }
+insta = { workspace = true }
 pretty_assertions = { workspace = true }
 rand = { workspace = true }
+serde_repr = { workspace = true }
 strum = { workspace = true, features = ["derive"] }
 tempfile = { workspace = true }
+wiremock = { workspace = true }
 
 [target.'cfg(unix)'.dev-dependencies]
 nix = { workspace = true }
+
+[[test]]
+name = "e2e"
+path = "../../tests/e2e/main.rs"
diff --git a/crates/firma/src/services/config.rs b/crates/firma/src/services/config.rs
index bc39f38f..8080af28 100644
--- a/crates/firma/src/services/config.rs
+++ b/crates/firma/src/services/config.rs
@@ -1494,8 +1494,8 @@ mod tests {
         assert!(
             rules
                 .iter()
-                .any(|r| r.host == "api.openai.com" && r.method.as_deref() == Some("CONNECT")),
-            "expected api.openai.com:443 CONNECT rule"
+                .any(|r| r.host == "*.openai.com" && r.method.as_deref() == Some("CONNECT")),
+            "expected *.openai.com:443 CONNECT rule"
         );
     }
 
diff --git a/crates/firma/src/services/run.rs b/crates/firma/src/services/run.rs
index a7507222..9572e64e 100644
--- a/crates/firma/src/services/run.rs
+++ b/crates/firma/src/services/run.rs
@@ -78,7 +78,7 @@ pub fn run(args: RunArgs) -> anyhow::Result<ExitCode> {
         command: args.command,
         authority_cli,
         authority_profile: args.authority_profile,
-        user_config_path: None,
+        user_config_path: args.config.clone(),
         allow_non_structural: args.allow_non_structural,
         monitor_mode: args.monitor,
     };
diff --git a/crates/firma/templates/mappings/openai.toml b/crates/firma/templates/mappings/openai.toml
index bc5caeef..b15d40ae 100644
--- a/crates/firma/templates/mappings/openai.toml
+++ b/crates/firma/templates/mappings/openai.toml
@@ -1,9 +1,10 @@
 # OpenAI API mapping.
 # Tunnels through without MITM; the LLM SDK does not need to trust firma-ca.
 
+# API-key traffic (api.openai.com, etc.) — single-label wildcard.
 [[rules]]
 method = "CONNECT"
-host = "api.openai.com"
+host = "*.openai.com"
 action_class = "communication.external.send"
 
 [[rules]]
@@ -11,9 +12,15 @@ method = "CONNECT"
 host = "chatgpt.com"
 action_class = "communication.external.send"
 
+# Subdomains (ab.chatgpt.com, etc.) — single-label wildcard.
+[[rules]]
+method = "CONNECT"
+host = "*.chatgpt.com"
+action_class = "communication.external.send"
+
 # REST fallback (plain HTTP proxy or post-MITM).
 [[rules]]
-host = "api.openai.com"
+host = "*.openai.com"
 path = "*"
 action_class = "communication.external.send"
 
@@ -21,3 +28,8 @@ action_class = "communication.external.send"
 host = "chatgpt.com"
 path = "*"
 action_class = "communication.external.send"
+
+[[rules]]
+host = "*.chatgpt.com"
+path = "*"
+action_class = "communication.external.send"
diff --git a/justfile b/justfile
index 2f4bec18..75f1be78 100644
--- a/justfile
+++ b/justfile
@@ -35,6 +35,9 @@ test:
 build:
   cargo build --all-features --all-targets
 
+e2e:
+  cargo nextest run -p firma --test e2e --run-ignored all
+
 audit:
   cargo audit --deny warnings
 
diff --git a/tests/e2e/README.md b/tests/e2e/README.md
new file mode 100644
index 00000000..0ec3cbde
--- /dev/null
+++ b/tests/e2e/README.md
@@ -0,0 +1,35 @@
+# E2E Tests
+
+End-to-end validation of the OpenFirma enforcement boundary against real coding
+agent workloads.
+
+## Running locally
+
+```sh
+make e2e
+```
+
+nextest builds the debug `firma` binary as part of compiling the e2e test;
+`firma_bin()` reads its path from `CARGO_BIN_EXE_firma` — no manual build needed.
+
+Run only Claude or only Codex scenarios:
+
+```sh
+cargo nextest run -p firma --test e2e --run-ignored all -E 'test(claude::)'
+cargo nextest run -p firma --test e2e --run-ignored all -E 'test(codex::)'
+```
+
+Run a single scenario:
+
+```sh
+cargo nextest run -p firma --test e2e --run-ignored all -E 'test(claude::simple_prompt)'
+```
+
+## Scenarios
+
+Each scenario runs in two phases:
+
+1. **Baseline** — agent runs directly (no firma). Confirms the agent can complete
+   the task and reach the mock server when unconfined.
+2. **Enforcement** — agent runs under `firma run`. Confirms enforcement produces
+   the expected ALLOW or DENY outcome and emits the correct audit events.
diff --git a/tests/e2e/agent.rs b/tests/e2e/agent.rs
new file mode 100644
index 00000000..0e5db1ad
--- /dev/null
+++ b/tests/e2e/agent.rs
@@ -0,0 +1,72 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::AsRefStr)]
+#[strum(serialize_all = "snake_case")]
+pub enum AgentKind {
+    Claude,
+    Codex,
+}
+
+/// An agent the harness can run, optionally carrying extra CLI flags.
+///
+/// Flags passed via `.args()` are inserted before the subcommand so they are
+/// treated as global flags by the agent binary.
+#[derive(Debug, Clone)]
+pub struct Agent {
+    pub kind: AgentKind,
+    args: Vec<String>,
+}
+
+impl Agent {
+    #[must_use]
+    pub fn claude() -> Self {
+        Self {
+            kind: AgentKind::Claude,
+            args: Vec::new(),
+        }
+    }
+
+    #[must_use]
+    pub fn codex() -> Self {
+        Self {
+            kind: AgentKind::Codex,
+            args: Vec::new(),
+        }
+    }
+
+    /// Attach CLI flags inserted before the subcommand / prompt flag.
+    #[must_use]
+    pub fn args(mut self, args: impl IntoIterator<Item = impl Into<String>>) -> Self {
+        self.args = args.into_iter().map(Into::into).collect();
+        self
+    }
+
+    #[must_use]
+    pub fn command(&self) -> &'static str {
+        match self.kind {
+            AgentKind::Claude => "claude",
+            AgentKind::Codex => "codex",
+        }
+    }
+
+    #[must_use]
+    pub fn profile(&self) -> &'static str {
+        match self.kind {
+            AgentKind::Claude => "claude-code",
+            AgentKind::Codex => "codex",
+        }
+    }
+
+    pub fn prompt_args(&self, prompt: &str) -> Vec<String> {
+        let mut result = self.args.clone();
+        match self.kind {
+            AgentKind::Claude => {
+                result.push("-p".to_string());
+                result.push(prompt.to_string());
+            }
+            AgentKind::Codex => {
+                result.push("exec".to_string());
+                result.push(prompt.to_string());
+            }
+        }
+        result
+    }
+}
diff --git a/tests/e2e/audit.rs b/tests/e2e/audit.rs
new file mode 100644
index 00000000..dda3fac5
--- /dev/null
+++ b/tests/e2e/audit.rs
@@ -0,0 +1,43 @@
+use std::path::Path;
+
+use anyhow::Context;
+use serde::Deserialize;
+use serde_repr::Deserialize_repr;
+use std::collections::BTreeSet;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Deserialize_repr)]
+#[repr(u8)]
+pub enum Decision {
+    Allow = 1,
+    Deny = 2,
+    Abort = 3,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize)]
+pub struct AuditEvent {
+    action: String,
+    resource: String,
+    decision: Decision,
+    deny_reason: String,
+    dispatch_status: u16,
+}
+
+/// Sidecar audit events from the enforcement phase.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct FirmaAuditTrail(BTreeSet<AuditEvent>);
+
+impl FirmaAuditTrail {
+    pub fn try_new(path: &Path) -> Result<Self, anyhow::Error> {
+        let content = fs_err::read_to_string(path)?;
+        let events = content
+            .lines()
+            .zip(1..)
+            .filter(|(l, _)| !l.trim().is_empty())
+            .map(|(l, line)| {
+                serde_json::from_str(l)
+                    .with_context(|| format!("unexpected audit record in audit log at line {line}"))
+            })
+            .collect::<Result<BTreeSet<_>, _>>()?;
+        Ok(Self(events))
+    }
+}
diff --git a/tests/e2e/config.rs b/tests/e2e/config.rs
new file mode 100644
index 00000000..1c789169
--- /dev/null
+++ b/tests/e2e/config.rs
@@ -0,0 +1,69 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::Context;
+use firma_sidecar::config::{MappingRuleConfig, MappingRulesFile};
+
+pub fn append_policy_rule(cfg_dir: &Path, name: &str, rule: &str) -> Result<(), anyhow::Error> {
+    let path = cfg_dir.join("policies").join(format!("{name}.cedar"));
+    let mut current = if path.exists() {
+        fs_err::read_to_string(&path)?
+    } else {
+        String::new()
+    };
+    current.push('\n');
+    current.push_str(rule);
+    current.push('\n');
+    fs_err::write(&path, current)?;
+    Ok(())
+}
+
+pub fn add_mapping_rules(
+    cfg_dir: &Path,
+    rules: Vec<MappingRuleConfig>,
+) -> Result<(), anyhow::Error> {
+    let rules_path = cfg_dir.join("mapping-rules.toml");
+    let mut file: MappingRulesFile = if rules_path.exists() {
+        let content = fs_err::read_to_string(&rules_path)?;
+        toml::from_str(&content).with_context(|| format!("parse {}", rules_path.display()))?
+    } else {
+        MappingRulesFile::default()
+    };
+
+    file.rules.extend(rules);
+    let content = toml::to_string(&file).context("serialize mapping rules")?;
+    fs_err::write(&rules_path, content)?;
+    Ok(())
+}
+
+pub fn issue_capability(
+    cfg_dir: &Path,
+    agent_id: &str,
+    session_id: &str,
+    action: &str,
+    scope: &str,
+    ttl_secs: u64,
+) -> Result<PathBuf, anyhow::Error> {
+    let config_path = cfg_dir.join("firma.toml");
+    let seed_path = cfg_dir.join("capability-seed.toml");
+    let output = std::process::Command::new(crate::firma_bin())
+        .arg("authority")
+        .args(["--config"])
+        .arg(&config_path)
+        .arg("issue")
+        .args(["--agent-id", agent_id])
+        .args(["--session-id", session_id])
+        .args(["--action", action])
+        .args(["--resource-scope", scope])
+        .args(["--ttl-seconds", &ttl_secs.to_string()])
+        .args(["--output"])
+        .arg(&seed_path)
+        .output()
+        .with_context(|| "spawn firma authority issue")?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("firma authority issue failed: {stderr}");
+    }
+
+    Ok(seed_path)
+}
diff --git a/tests/e2e/main.rs b/tests/e2e/main.rs
new file mode 100644
index 00000000..cab037a0
--- /dev/null
+++ b/tests/e2e/main.rs
@@ -0,0 +1,98 @@
+#![allow(dead_code)]
+
+mod agent;
+mod audit;
+mod config;
+mod policy;
+mod runner;
+mod scenario;
+mod scenarios;
+mod setup;
+
+use std::path::PathBuf;
+
+use agent::AgentKind;
+use anyhow::Context;
+use runner::run_scenario;
+use scenarios::EnforcementScenario;
+
+// ── Utilities ────────────────────────────────────────────────────────────────
+
+/// Path to the `firma` binary under test.
+///
+/// Cargo builds the package's `[[bin]]` when compiling this integration test and
+/// exposes its path via `CARGO_BIN_EXE_firma`, so nextest always runs the
+/// just-built debug binary.
+#[must_use]
+pub fn firma_bin() -> PathBuf {
+    PathBuf::from(env!("CARGO_BIN_EXE_firma"))
+}
+
+// ── Test driver ──────────────────────────────────────────────────────────────
+
+fn default_agent(kind: AgentKind) -> agent::Agent {
+    match kind {
+        AgentKind::Claude => agent::Agent::claude().args([
+            "--permission-mode",
+            "bypassPermissions",
+            // Suppresses analytics only — normal agent behavior is unaffected.
+            "--settings",
+            r#"{"env":{"DISABLE_TELEMETRY":"1"}}"#,
+        ]),
+        AgentKind::Codex => agent::Agent::codex().args(["--sandbox", "danger-full-access"]),
+    }
+}
+
+async fn drive_scenario_for_agent(
+    scenario: &dyn EnforcementScenario,
+    kind: AgentKind,
+) -> Result<(), anyhow::Error> {
+    let agent = default_agent(kind);
+
+    run_scenario(scenario, &agent)
+        .await
+        .with_context(|| format!("[{}] scenario {}", agent.kind.as_ref(), scenario.name()))
+}
+
+// ── Scenario registration ────────────────────────────────────────────────────
+//
+// Pass the agent list as the first argument. Each ident becomes the sub-module
+// name and maps to an `AgentKind` variant via `agent_kind!`.
+//
+//   scenario_tests! [claude, codex] { ... }   // all agents
+//   scenario_tests! [claude]        { ... }   // claude only
+macro_rules! agent_kind {
+    (claude) => {
+        agent::AgentKind::Claude
+    };
+    (codex) => {
+        agent::AgentKind::Codex
+    };
+}
+
+macro_rules! scenario_tests {
+    // $scenarios is a single tt (the parenthesised block), not a repetition,
+    // so it can be passed inside the $agent repetition without a depth conflict.
+    ([$($agent:ident),+]; $scenarios:tt) => {
+        $( scenario_tests!(@agent $agent $scenarios); )+
+    };
+    (@agent $agent:ident ($($name:ident => $scenario:expr),* $(,)?)) => {
+        mod $agent {
+            use super::*;
+            $(
+                #[tokio::test]
+                #[ignore = "integration test — run with --include-ignored"]
+                async fn $name() -> Result<(), anyhow::Error> {
+                    super::drive_scenario_for_agent(&$scenario, agent_kind!($agent)).await
+                }
+            )*
+        }
+    };
+}
+
+scenario_tests! {
+    [claude, codex];
+    (
+        simple_prompt => scenarios::SimplePrompt,
+    )
+}
diff --git a/tests/e2e/policy.rs b/tests/e2e/policy.rs
new file mode 100644
index 00000000..43b7eb36
--- /dev/null
+++ b/tests/e2e/policy.rs
@@ -0,0 +1,229 @@
+use crate::config;
+use crate::setup::ScenarioSetup;
+
+// ── PolicyBuilder ─────────────────────────────────────────────────────────────
+
+/// Entry point for building Cedar policy rules programmatically.
+///
+/// ```ignore
+/// ctx.policy()
+///     .forbid("communication.external.send")
+///     .when(|w| w.resource_like("paste.rs*"))
+///     .add()?;
+/// ```
+pub struct PolicyBuilder<'a> {
+    ctx: &'a ScenarioSetup,
+    name: Option<&'static str>,
+}
+
+impl<'a> PolicyBuilder<'a> {
+    pub(crate) fn new(ctx: &'a ScenarioSetup) -> Self {
+        Self { ctx, name: None }
+    }
+
+    /// Attach an annotation comment to the generated Cedar rule.
+    #[must_use]
+    pub fn named(mut self, name: &'static str) -> Self {
+        self.name = Some(name);
+        self
+    }
+
+    /// Start a `forbid` rule for a single action class.
+    #[must_use]
+    pub fn forbid(self, action: &'static str) -> RuleBuilder<'a> {
+        self.into_rule("forbid", Effect::Single(action))
+    }
+
+    /// Start a `permit` rule for a single action class.
+    #[must_use]
+    pub fn permit(self, action: &'static str) -> RuleBuilder<'a> {
+        self.into_rule("permit", Effect::Single(action))
+    }
+
+    /// Start a `forbid` rule covering multiple action classes.
+    #[must_use]
+    pub fn forbid_in(self, actions: &'static [&'static str]) -> RuleBuilder<'a> {
+        self.into_rule("forbid", Effect::Set(actions))
+    }
+
+    /// Start a `permit` rule covering multiple action classes.
+    #[must_use]
+    pub fn permit_in(self, actions: &'static [&'static str]) -> RuleBuilder<'a> {
+        self.into_rule("permit", Effect::Set(actions))
+    }
+
+    fn into_rule(self, effect: &'static str, action: Effect) -> RuleBuilder<'a> {
+        RuleBuilder {
+            ctx: self.ctx,
+            name: self.name,
+            effect,
+            action,
+            resource: None,
+            when: None,
+        }
+    }
+}
+
+enum Effect {
+    Single(&'static str),
+    Set(&'static [&'static str]),
+}
+
+/// A Cedar rule under construction — created by [`PolicyBuilder`].
+pub struct RuleBuilder<'a> {
+    ctx: &'a ScenarioSetup,
+    name: Option<&'static str>,
+    effect: &'static str,
+    action: Effect,
+    resource: Option<String>,
+    when: Option<String>,
+}
+
+impl RuleBuilder<'_> {
+    /// Scope the rule to a specific resource entity UID (host + path).
+    #[must_use]
+    pub fn resource_uid(mut self, uid: impl Into<String>) -> Self {
+        self.resource = Some(uid.into());
+        self
+    }
+
+    /// Add a `when` clause to the rule.
+    #[must_use]
+    pub fn when<F>(mut self, f: F) -> Self
+    where
+        F: FnOnce(WhenBuilder) -> WhenBuilder,
+    {
+        let wb = WhenBuilder::new();
+        self.when = Some(f(wb).build());
+        self
+    }
+
+    /// Format the Cedar rule and append it to `policies/e2e.cedar`, a dedicated
+    /// file for scenario-authored rules kept separate from the shipped
+    /// `dev.cedar`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the file cannot be written.
+    pub fn add(self) -> Result<(), anyhow::Error> {
+        let config_dir = self.ctx.config_dir.clone();
+        let rule = self.render();
+        config::append_policy_rule(&config_dir, "e2e", &rule)
+    }
+
+    fn render(self) -> String {
+        let mut s = String::new();
+        if let Some(name) = self.name {
+            s.push_str("// ");
+            s.push_str(name);
+            s.push('\n');
+        }
+        s.push_str(self.effect);
+        s.push_str("(\n    principal,\n    ");
+        let resource_head = self.resource.as_deref().map_or_else(
+            || "resource".to_string(),
+            |uid| format!("resource == Firma::Resource::\"{uid}\""),
+        );
+        match self.action {
+            Effect::Single(a) => {
+                s.push_str("action == Firma::Action::\"");
+                s.push_str(a);
+                s.push_str("\",\n    ");
+                s.push_str(&resource_head);
+                s.push_str("\n)");
+            }
+            Effect::Set(actions) => {
+                s.push_str("action in [");
+                for (i, a) in actions.iter().enumerate() {
+                    if i > 0 {
+                        s.push_str(", ");
+                    }
+                    s.push_str("Firma::Action::\"");
+                    s.push_str(a);
+                    s.push('"');
+                }
+                s.push_str("],\n    ");
+                s.push_str(&resource_head);
+                s.push_str("\n)");
+            }
+        }
+        if let Some(when_clause) = self.when {
+            s.push_str("\nwhen { ");
+            s.push_str(&when_clause);
+            s.push_str(" }");
+        }
+        s.push(';');
+        s
+    }
+}
+
+// ── WhenBuilder ───────────────────────────────────────────────────────────────
+
+/// Accumulates `when` clause conditions via a fluent API.
+pub struct WhenBuilder {
+    parts: Vec<String>,
+}
+
+impl WhenBuilder {
+    pub(crate) fn new() -> Self {
+        Self { parts: Vec::new() }
+    }
+
+    /// `resource.id like "<pattern>"`
+    #[must_use]
+    pub fn resource_like(mut self, pattern: impl std::fmt::Display) -> Self {
+        self.parts.push(format!("resource.id like \"{pattern}\""));
+        self
+    }
+
+    /// Start a context attribute comparison.
+    #[must_use]
+    pub fn context(self, name: &str) -> ContextMatcher {
+        ContextMatcher {
+            parts: self.parts,
+            name: name.to_string(),
+        }
+    }
+
+    /// Chain another condition with `&&`.
+    #[must_use]
+    pub fn and(mut self) -> Self {
+        self.parts.push("&&".to_string());
+        self
+    }
+
+    fn build(self) -> String {
+        self.parts.join(" ")
+    }
+}
+
+// ── ContextMatcher ────────────────────────────────────────────────────────────
+
+/// In-progress context attribute comparison — created by [`WhenBuilder::context`].
+pub struct ContextMatcher {
+    parts: Vec<String>,
+    name: String,
+}
+
+impl ContextMatcher {
+    /// `context.<name> > <value>`
+    #[must_use]
+    pub fn greater_than(mut self, value: impl std::fmt::Display) -> WhenBuilder {
+        self.parts.push(format!("context.{} > {value}", self.name));
+        WhenBuilder { parts: self.parts }
+    }
+
+    /// `context.<name> < <value>`
+    #[must_use]
+    pub fn less_than(mut self, value: impl std::fmt::Display) -> WhenBuilder {
+        self.parts.push(format!("context.{} < {value}", self.name));
+        WhenBuilder { parts: self.parts }
+    }
+
+    /// `context.<name> == <value>`
+    #[must_use]
+    pub fn equals(mut self, value: impl std::fmt::Display) -> WhenBuilder {
+        self.parts.push(format!("context.{} == {value}", self.name));
+        WhenBuilder { parts: self.parts }
+    }
+}
diff --git a/tests/e2e/runner.rs b/tests/e2e/runner.rs
new file mode 100644
index 00000000..c7a805fe
--- /dev/null
+++ b/tests/e2e/runner.rs
@@ -0,0 +1,248 @@
+use std::path::Path;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use anyhow::{Context, bail};
+use tokio::io::AsyncReadExt;
+use wiremock::MockServer;
+
+use crate::agent::Agent;
+use crate::audit::FirmaAuditTrail;
+use crate::firma_bin;
+use crate::scenario::{EnforcementScenario, Phase, PhaseOutput};
+use crate::setup::ScenarioSetup;
+
+/// Captured result of running a phase process (bare agent or firma wrapper) to
+/// completion.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct RunOutput {
+    pub success: bool,
+    pub exit_code: Option<i32>,
+    pub stdout: String,
+    pub stderr: String,
+    pub elapsed: Duration,
+}
+
+/// Returned when a phase process exceeds its allotted wall-clock time and is
+/// killed before exiting. Carries whatever partial output was captured.
+#[derive(Debug, Clone, thiserror::Error)]
+#[error("[{phase}] run timed out after {elapsed:?}")]
+pub struct RunTimeoutError {
+    pub phase: Phase,
+    pub stdout: String,
+    pub stderr: String,
+    pub elapsed: Duration,
+}
+
+/// Run a full two-phase scenario for `agent`.
+///
+/// Phase 1 (baseline): agent runs directly — no firma proxy. If the baseline
+/// assertion fails the scenario stops here with an error — there is no point
+/// enforcing a task the agent cannot complete unconfined.
+/// Phase 2 (enforcement): agent runs through `firma run`.
+pub async fn run_scenario(
+    scenario: &dyn EnforcementScenario,
+    agent: &Agent,
+) -> Result<(), anyhow::Error> {
+    let mock_server = Arc::new(MockServer::start().await);
+
+    let cfg_tmp = tempfile::tempdir()?;
+    let state_tmp = tempfile::tempdir()?;
+    let workspace_tmp = tempfile::tempdir()?;
+    let protected_tmp = tempfile::tempdir()?;
+
+    let cfg_dir = cfg_tmp.path().to_path_buf();
+    let state_dir = state_tmp.path().to_path_buf();
+    let workspace = workspace_tmp.path().to_path_buf();
+    let protected_dir = protected_tmp.path().to_path_buf();
+
+    let mut ctx = ScenarioSetup {
+        workspace_dir: workspace,
+        protected_dir,
+        capability_seed: None,
+        capability_session_id: None,
+        mock_server: Arc::clone(&mock_server),
+        mocks: Vec::new(),
+        config_dir: cfg_dir.clone(),
+        state_dir: state_dir.clone(),
+        agent: agent.clone(),
+    };
+
+    scenario.setup(&mut ctx)?;
+    let agent_args = agent.prompt_args(&scenario.prompt(&ctx));
+
+    scenario.before_assert(&ctx)?;
+
+    // Phase 1: baseline — run agent directly, no firma proxy.
+    let baseline_agent_output = run_agent_direct(
+        agent.command(),
+        &agent_args,
+        &ctx.workspace_dir,
+        scenario.timeout(),
+    )
+    .await?;
+
+    let baseline_phase = PhaseOutput {
+        agent: baseline_agent_output,
+        http_requests: mock_server.received_requests().await.unwrap_or_default(),
+    };
+
+    scenario.assert_baseline(&baseline_phase).with_context(|| {
+        format!(
+            "baseline FAILED\nstdout: {}\nstderr: {}",
+            baseline_phase.agent.stdout.trim(),
+            baseline_phase.agent.stderr.trim(),
+        )
+    })?;
+
+    // Clear baseline captures; mount enforcement mocks built during setup.
+    mock_server.reset().await;
+    for m in ctx.mocks.drain(..) {
+        m.mount(&mock_server).await;
+    }
+
+    scenario.before_assert(&ctx)?;
+
+    // Phase 2: enforcement.
+    let enforcement_agent_output =
+        run_enforcement(&firma_bin(), &ctx, &agent_args, scenario.timeout()).await?;
+
+    let enforcement_phase = PhaseOutput {
+        agent: enforcement_agent_output,
+        http_requests: mock_server.received_requests().await.unwrap_or_default(),
+    };
+
+    let audit_path = state_dir.join("audit.jsonl");
+    let firma_audit = FirmaAuditTrail::try_new(&audit_path)?;
+
+    scenario
+        .assert_enforcement(&ctx, &enforcement_phase, &firma_audit)
+        .with_context(|| {
+            format!(
+                "enforcement FAILED\nstdout: {}\nstderr: {}",
+                enforcement_phase.agent.stdout.trim(),
+                enforcement_phase.agent.stderr.trim(),
+            )
+        })?;
+
+    Ok(())
+}
+
+/// Spawn `cmd` and wait up to `timeout`. On timeout: kill the process and
+/// collect whatever partial stdout/stderr was written.
+async fn run_with_timeout(
+    phase: Phase,
+    mut cmd: tokio::process::Command,
+    timeout: Duration,
+) -> Result<RunOutput, anyhow::Error> {
+    let start = Instant::now();
+    let mut child = cmd
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .with_context(|| format!("spawn {phase}"))?;
+
+    let mut stdout_handle = child
+        .stdout
+        .take()
+        .ok_or_else(|| anyhow::anyhow!("stdout not piped"))?;
+    let mut stderr_handle = child
+        .stderr
+        .take()
+        .ok_or_else(|| anyhow::anyhow!("stderr not piped"))?;
+
+    let stdout = tokio::spawn(async move {
+        let mut buf = Vec::new();
+        let _ = stdout_handle.read_to_end(&mut buf).await;
+        String::from_utf8_lossy(&buf).to_string()
+    });
+
+    let stderr = tokio::spawn(async move {
+        let mut buf = Vec::new();
+        let _ = stderr_handle.read_to_end(&mut buf).await;
+        String::from_utf8_lossy(&buf).to_string()
+    });
+
+    let exit_status = tokio::select! {
+        status = child.wait() => Some(status?),
+        () = tokio::time::sleep(timeout) => {
+            eprintln!("[{phase}] timed out after {timeout:?} - killing");
+            let _ = child.kill().await;
+            let _ = child.wait().await;
+            None
+        },
+    };
+
+    let elapsed = start.elapsed();
+    let stdout = stdout.await?;
+    let stderr = stderr.await?;
+
+    let Some(exit_status) = exit_status else {
+        return Err(RunTimeoutError {
+            phase,
+            stdout,
+            stderr,
+            elapsed,
+        }
+        .into());
+    };
+
+    Ok(RunOutput {
+        success: exit_status.success(),
+        exit_code: exit_status.code(),
+        stdout,
+        stderr,
+        elapsed,
+    })
+}
+
+async fn run_agent_direct(
+    agent_cmd: &str,
+    agent_args: &[String],
+    workspace: &Path,
+    timeout: Duration,
+) -> Result<RunOutput, anyhow::Error> {
+    if !agent_available(agent_cmd) {
+        bail!("[baseline] agent '{agent_cmd}' not found on PATH");
+    }
+
+    let mut cmd = tokio::process::Command::new(agent_cmd);
+    cmd.args(agent_args).current_dir(workspace);
+    run_with_timeout(Phase::Baseline, cmd, timeout).await
+}
+
+async fn run_enforcement(
+    firma_bin: &Path,
+    ctx: &ScenarioSetup,
+    agent_args: &[String],
+    timeout: Duration,
+) -> Result<RunOutput, anyhow::Error> {
+    let config_path = ctx.config_dir().join("firma.toml");
+    let mut cmd = tokio::process::Command::new(firma_bin);
+    cmd.args(["run", "--profile", ctx.agent.profile(), "--config"])
+        .arg(&config_path);
+    // macOS VzBackend runs in compatibility mode (sandbox-exec + HTTP_PROXY),
+    // which is non-structural; Linux uses bwrap and confines structurally.
+    if cfg!(target_os = "macos") {
+        cmd.arg("--allow-non-structural");
+    }
+    if let Some(cap) = &ctx.capability_seed {
+        cmd.args(["--capability-file"]).arg(cap);
+    }
+    if let Some(session_id) = &ctx.capability_session_id {
+        cmd.env("FIRMA_RUN_SESSION_ID", session_id);
+    }
+    cmd.arg("--")
+        .arg(ctx.agent.command())
+        .args(agent_args)
+        .current_dir(&ctx.workspace_dir);
+    run_with_timeout(Phase::Enforcement, cmd, timeout).await
+}
+
+fn agent_available(name: &str) -> bool {
+    std::process::Command::new("which")
+        .arg(name)
+        .output()
+        .is_ok_and(|o| o.status.success())
+}
diff --git a/tests/e2e/scenario.rs b/tests/e2e/scenario.rs
new file mode 100644
index 00000000..05b1d487
--- /dev/null
+++ b/tests/e2e/scenario.rs
@@ -0,0 +1,52 @@
+use std::time::Duration;
+
+use crate::audit::FirmaAuditTrail;
+use crate::runner::RunOutput;
+use crate::setup::ScenarioSetup;
+
+/// Combined output from one scenario phase: agent result + mock HTTP captures.
+pub struct PhaseOutput {
+    pub agent: RunOutput,
+    pub http_requests: Vec<wiremock::Request>,
+}
+
+pub trait EnforcementScenario: Send + Sync {
+    fn name(&self) -> &'static str;
+
+    /// Maximum wall-clock time allowed for the enforcement phase.
+    fn timeout(&self) -> Duration {
+        Duration::from_mins(5)
+    }
+
+    /// Configure the scenario: register HTTP mock routes, add mapping rules,
+    /// append Cedar policy rules, configure sandbox mounts, etc.
+    fn setup(&self, _ctx: &mut ScenarioSetup) -> Result<(), anyhow::Error> {
+        Ok(())
+    }
+
+    /// Called before each phase (baseline and enforcement).
+    fn before_assert(&self, _ctx: &ScenarioSetup) -> Result<(), anyhow::Error> {
+        Ok(())
+    }
+
+    /// Natural-language prompt sent to the agent.
+    fn prompt(&self, ctx: &ScenarioSetup) -> String;
+
+    fn assert_baseline(&self, output: &PhaseOutput) -> Result<(), anyhow::Error>;
+
+    fn assert_enforcement(
+        &self,
+        ctx: &ScenarioSetup,
+        output: &PhaseOutput,
+        audit: &FirmaAuditTrail,
+    ) -> Result<(), anyhow::Error>;
+}
+
+/// Which run of a scenario produced an output: the unenforced baseline or the
+/// firma-enforced run.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, strum::Display)]
+#[strum(serialize_all = "snake_case")]
+pub enum Phase {
+    Baseline,
+    Enforcement,
+}
diff --git a/tests/e2e/scenarios/mod.rs b/tests/e2e/scenarios/mod.rs
new file mode 100644
index 00000000..7abd844f
--- /dev/null
+++ b/tests/e2e/scenarios/mod.rs
@@ -0,0 +1,5 @@
+mod simple_prompt;
+
+pub use simple_prompt::SimplePrompt;
+
+pub use crate::scenario::EnforcementScenario;
diff --git a/tests/e2e/scenarios/simple_prompt.rs b/tests/e2e/scenarios/simple_prompt.rs
new file mode 100644
index 00000000..d93cddcf
--- /dev/null
+++ b/tests/e2e/scenarios/simple_prompt.rs
@@ -0,0 +1,41 @@
+use crate::audit::FirmaAuditTrail;
+use crate::scenario::{EnforcementScenario, PhaseOutput};
+use crate::setup::ScenarioSetup;
+
+pub struct SimplePrompt;
+
+impl EnforcementScenario for SimplePrompt {
+    fn name(&self) -> &'static str {
+        "simple_prompt"
+    }
+
+    fn setup(&self, ctx: &mut ScenarioSetup) -> Result<(), anyhow::Error> {
+        ctx.git_init_workspace()?;
+        ctx.firma_config().run()?;
+        Ok(())
+    }
+
+    fn prompt(&self, _ctx: &ScenarioSetup) -> String {
+        "Hi, what's up?".to_string()
+    }
+
+    fn assert_baseline(&self, output: &PhaseOutput) -> Result<(), anyhow::Error> {
+        if !output.agent.success {
+            anyhow::bail!("baseline agent failed: {}", output.agent.stderr);
+        }
+        Ok(())
+    }
+
+    fn assert_enforcement(
+        &self,
+        ctx: &ScenarioSetup,
+        output: &PhaseOutput,
+        audit: &FirmaAuditTrail,
+    ) -> Result<(), anyhow::Error> {
+        if !output.agent.success {
+            anyhow::bail!("enforcement agent failed: {}", output.agent.stderr);
+        }
+        insta::assert_debug_snapshot!(ctx.agent.kind.as_ref(), &audit);
+        Ok(())
+    }
+}
diff --git a/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__claude.snap b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__claude.snap
new file mode 100644
index 00000000..03deaa39
--- /dev/null
+++ b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__claude.snap
@@ -0,0 +1,15 @@
+---
+source: crates/firma/../../tests/e2e/scenarios/simple_prompt.rs
+expression: "&audit"
+---
+FirmaAuditTrail(
+    {
+        AuditEvent {
+            action: "communication.external.send",
+            resource: "api.anthropic.com/",
+            decision: Allow,
+            deny_reason: "",
+            dispatch_status: 200,
+        },
+    },
+)
diff --git a/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__codex.snap b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__codex.snap
new file mode 100644
index 00000000..f1b5b155
--- /dev/null
+++ b/tests/e2e/scenarios/snapshots/e2e__scenarios__simple_prompt__codex.snap
@@ -0,0 +1,43 @@
+---
+source: crates/firma/../../tests/e2e/scenarios/simple_prompt.rs
+expression: "&audit"
+---
+FirmaAuditTrail(
+    {
+        AuditEvent {
+            action: "communication.external.send",
+            resource: "ab.chatgpt.com/",
+            decision: Allow,
+            deny_reason: "",
+            dispatch_status: 200,
+        },
+        AuditEvent {
+            action: "communication.external.send",
+            resource: "api.openai.com/",
+            decision: Allow,
+            deny_reason: "",
+            dispatch_status: 200,
+        },
+        AuditEvent {
+            action: "communication.external.send",
+            resource: "chatgpt.com/",
+            decision: Allow,
+            deny_reason: "",
+            dispatch_status: 200,
+        },
+        AuditEvent {
+            action: "network.connect",
+            resource: "github.com/",
+            decision: Deny,
+            deny_reason: "token invalid: no capability token covers action 'code.write' on resource 'github.com/'",
+            dispatch_status: 0,
+        },
+        AuditEvent {
+            action: "raw.http.GET",
+            resource: "api.github.com/repos/openai/plugins",
+            decision: Deny,
+            deny_reason: "token invalid: no capability token covers action 'code.read' on resource 'api.github.com/repos/openai/plugins'",
+            dispatch_status: 0,
+        },
+    },
+)
diff --git a/tests/e2e/setup.rs b/tests/e2e/setup.rs
new file mode 100644
index 00000000..d26739a4
--- /dev/null
+++ b/tests/e2e/setup.rs
@@ -0,0 +1,224 @@
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+
+use anyhow::Context;
+use firma_sidecar::config::MappingRuleConfig;
+use wiremock::{Mock, MockServer};
+
+use crate::agent::{Agent, AgentKind};
+use crate::policy::PolicyBuilder;
+use crate::{config, firma_bin};
+
+// ── ScenarioSetup ─────────────────────────────────────────────────────────────
+
+pub struct ScenarioSetup {
+    pub workspace_dir: PathBuf,
+    pub protected_dir: PathBuf,
+    pub capability_seed: Option<PathBuf>,
+    pub capability_session_id: Option<String>,
+
+    /// Shared mock server. Scenarios push built `Mock` objects into `mocks`;
+    /// the runner mounts them between the baseline and enforcement phases.
+    pub mock_server: Arc<MockServer>,
+    pub mocks: Vec<Mock>,
+
+    pub(crate) config_dir: PathBuf,
+    pub(crate) state_dir: PathBuf,
+    pub(crate) agent: Agent,
+}
+
+impl ScenarioSetup {
+    pub fn add_mapping_rule(
+        &self,
+        host_port: &str,
+        method: &str,
+        path: &str,
+        action_class: &str,
+    ) -> Result<(), anyhow::Error> {
+        config::add_mapping_rules(
+            &self.config_dir,
+            vec![
+                MappingRuleConfig {
+                    method: Some(method.to_string()),
+                    host: host_port.to_string(),
+                    path: Some(path.to_string()),
+                    action_class: action_class.to_string(),
+                },
+                // Companion CONNECT rule so the TLS tunnel itself is classified.
+                MappingRuleConfig {
+                    method: Some("CONNECT".to_string()),
+                    host: host_port.to_string(),
+                    path: Some(String::new()),
+                    action_class: action_class.to_string(),
+                },
+            ],
+        )
+    }
+
+    #[must_use]
+    pub fn config_dir(&self) -> &Path {
+        &self.config_dir
+    }
+
+    pub fn policy(&self) -> PolicyBuilder<'_> {
+        PolicyBuilder::new(self)
+    }
+
+    pub fn issue_capability(
+        &mut self,
+        agent_id: &str,
+        session_id: &str,
+        action: &str,
+        scope: &str,
+        ttl_secs: u64,
+    ) -> Result<(), anyhow::Error> {
+        let seed_path = config::issue_capability(
+            &self.config_dir,
+            agent_id,
+            session_id,
+            action,
+            scope,
+            ttl_secs,
+        )?;
+        self.capability_seed = Some(seed_path);
+        self.capability_session_id = Some(session_id.to_string());
+        Ok(())
+    }
+
+    /// Initialize a git repository in `workspace_dir`.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if `git init` fails.
+    pub fn git_init_workspace(&self) -> Result<(), anyhow::Error> {
+        let out = std::process::Command::new("git")
+            .args(["init"])
+            .current_dir(&self.workspace_dir)
+            .output()
+            .with_context(|| "spawn git init")?;
+        anyhow::ensure!(
+            out.status.success(),
+            "git init failed: {}",
+            String::from_utf8_lossy(&out.stderr)
+        );
+        Ok(())
+    }
+
+    /// Start building a `firma config init` invocation.
+    #[must_use]
+    pub fn firma_config(&self) -> FirmaConfigBuilder<'_> {
+        FirmaConfigBuilder::new(self)
+    }
+}
+
+// ── FirmaConfigBuilder ────────────────────────────────────────────────────────
+
+pub struct FirmaConfigBuilder<'a> {
+    ctx: &'a ScenarioSetup,
+    mode: &'static str,
+    posture: &'static str,
+    mappings: Vec<&'static str>,
+    workspace: Option<&'a Path>,
+    authority_listen: &'static str,
+}
+
+impl<'a> FirmaConfigBuilder<'a> {
+    pub(crate) fn new(ctx: &'a ScenarioSetup) -> Self {
+        let mappings = if matches!(ctx.agent.kind, AgentKind::Codex) {
+            vec!["openai", "github"]
+        } else {
+            vec!["anthropic"]
+        };
+        Self {
+            ctx,
+            mode: "agent-local",
+            posture: "dev",
+            mappings,
+            workspace: Some(&ctx.workspace_dir),
+            authority_listen: "127.0.0.1:0",
+        }
+    }
+
+    /// Override the Cedar posture (default: `"dev"`).
+    #[must_use]
+    pub fn posture(mut self, posture: &'static str) -> Self {
+        self.posture = posture;
+        self
+    }
+
+    /// Override the workspace mount path (default: `ctx.workspace_dir`).
+    #[must_use]
+    pub fn workspace(mut self, path: &'a Path) -> Self {
+        self.workspace = Some(path);
+        self
+    }
+
+    /// Clear the workspace mount.
+    #[must_use]
+    pub fn no_workspace(mut self) -> Self {
+        self.workspace = None;
+        self
+    }
+
+    /// Replace the mapping selection.
+    #[must_use]
+    pub fn mappings(mut self, mappings: Vec<&'static str>) -> Self {
+        self.mappings = mappings;
+        self
+    }
+
+    /// Clear the mapping selection.
+    #[must_use]
+    pub fn no_mappings(mut self) -> Self {
+        self.mappings.clear();
+        self
+    }
+
+    /// Set the authority listen address (default: `"127.0.0.1:0"`).
+    #[must_use]
+    pub fn authority_listen(mut self, addr: &'static str) -> Self {
+        self.authority_listen = addr;
+        self
+    }
+
+    /// Execute `firma config init` with the configured options.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the `firma config init` process fails or
+    /// the audit path cannot be configured.
+    pub fn run(self) -> Result<(), anyhow::Error> {
+        let mut cmd = std::process::Command::new(firma_bin());
+        cmd.args([
+            "config",
+            "--yes",
+            "--mode",
+            self.mode,
+            "--profile",
+            self.ctx.agent.profile(),
+            "--posture",
+            self.posture,
+            "-o",
+        ])
+        .arg(&self.ctx.config_dir)
+        .args(["--state-dir"])
+        .arg(&self.ctx.state_dir);
+
+        cmd.args(["--authority-listen", self.authority_listen]);
+
+        for mapping in &self.mappings {
+            cmd.args(["--mapping", mapping]);
+        }
+        if let Some(ws) = self.workspace {
+            cmd.args(["--workspace"]).arg(ws);
+        }
+
+        let output = cmd.output().with_context(|| "spawn firma config")?;
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            anyhow::bail!("firma config failed: {stderr}");
+        }
+
+        Ok(())
+    }
+}