Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
67 commits
Select commit Hold shift + click to select a range
227869c
feat(tests): e2e integration harness + normal_llm_call scenario
luca-iachini Jun 19, 2026
c7a30d2
refactor(tests): rename integration_tests → e2e
luca-iachini Jun 19, 2026
5decd46
feat(tests): add remaining e2e enforcement scenarios
luca-iachini Jun 19, 2026
9ec6259
fix(run): wrap authority config in [authority] section before spawn
luca-iachini Jun 19, 2026
17b877d
fix(run): strip TLS + ephemeral port in resolve_persisted_paths
luca-iachini Jun 19, 2026
5c0e80d
fix(run): pin ca.dir to marker dir in synthesized sidecar config
luca-iachini Jun 19, 2026
d7709df
split harness module
luca-iachini Jun 19, 2026
ba8badd
fix clippy
luca-iachini Jun 19, 2026
d399954
test(e2e): keep only normal_llm_call on this branch
luca-iachini Jun 19, 2026
8fcaed3
ci: fix test binary name integration_tests → e2e
luca-iachini Jun 19, 2026
708ceab
test(e2e): pass --allow-non-structural when bwrap unavailable
luca-iachini Jun 19, 2026
53da9ee
test(e2e): assert baseline passed before checking enforcement
luca-iachini Jun 19, 2026
54a921d
test(e2e): capture partial output on agent timeout
luca-iachini Jun 19, 2026
4d66062
test(e2e): use fs_err for audit log reads, explain non-JSON lines
luca-iachini Jun 19, 2026
6664bb9
test(e2e): use insta snapshot for normal_llm_call allow event
luca-iachini Jun 19, 2026
d0a533a
docs(e2e): drop protoc from prerequisites (already in CLAUDE.md)
luca-iachini Jun 19, 2026
d6ad672
test(e2e): nextest setup script + make e2e entry point
luca-iachini Jun 19, 2026
733be59
test(e2e): explain why mock server is hand-rolled vs wiremock
luca-iachini Jun 19, 2026
4fa738a
fix clippy
luca-iachini Jun 19, 2026
abf5ca9
test(e2e): remove stale comment in audit log parser
luca-iachini Jun 19, 2026
62cfc5f
Revert "test(e2e): remove stale comment in audit log parser"
luca-iachini Jun 19, 2026
1a2a9d2
test(e2e): error on non-audit lines in audit log
luca-iachini Jun 19, 2026
7c0ddab
refactor audit parsing
luca-iachini Jun 19, 2026
8e6eeca
feat(e2e): simple_prompt scenario — greeting to LLM provider
luca-iachini Jun 19, 2026
757e171
better insta
luca-iachini Jun 19, 2026
511f119
refactor(e2e): replace hand-rolled mock server with wiremock
luca-iachini Jun 19, 2026
9c39b0d
refactor(e2e): expose wiremock directly in ScenarioSetup
luca-iachini Jun 19, 2026
557371d
use wiremock types
luca-iachini Jun 19, 2026
00accb2
fix(mappings): classify *.chatgpt.com subdomains as communication.ext…
luca-iachini Jun 19, 2026
72dc539
fix(e2e): always build debug + point firma_bin() at it
luca-iachini Jun 19, 2026
e202b12
fix audit trail snapshot assert
luca-iachini Jun 19, 2026
4d073b9
Merge origin/main into fir-368-e2e-tests
luca-iachini Jun 19, 2026
34192a4
refactor(e2e): inline audit path toml edit
luca-iachini Jun 19, 2026
8bea09a
refactor
luca-iachini Jun 19, 2026
4ab871b
refresh snap
luca-iachini Jun 19, 2026
f1f8ddb
fix: drop stale firma-protobuf gitlink after merge
luca-iachini Jun 19, 2026
d3d5c58
fix fmt
luca-iachini Jun 19, 2026
9d9d599
fix test assertion
luca-iachini Jun 19, 2026
01071e7
rename to e2e tests
luca-iachini Jun 19, 2026
6e5f99a
fix(mappings): add *.openai.com CONNECT + REST rules
luca-iachini Jun 19, 2026
171f801
refactor audit trail
luca-iachini Jun 20, 2026
a8fd390
refactor runner
luca-iachini Jun 20, 2026
428a145
use nextest in the workflow
luca-iachini Jun 20, 2026
781d1cf
simplify bin discovery
luca-iachini Jun 20, 2026
9670be8
remove doctor
luca-iachini Jun 20, 2026
94739ca
simplify readme
luca-iachini Jun 20, 2026
12a2194
simplify config writing
luca-iachini Jun 20, 2026
d4cf277
refactor runner
luca-iachini Jun 20, 2026
c314b2c
do not replace dev.cedar
luca-iachini Jun 20, 2026
30a797e
add --allow-non-structural for macOs
luca-iachini Jun 20, 2026
684f704
Merge remote-tracking branch 'origin/main' into fir-368-e2e-tests
luca-iachini Jun 20, 2026
8dba1ec
remove old snap
luca-iachini Jun 20, 2026
13d6ccd
remove unused helpers
luca-iachini Jun 20, 2026
f0b749b
fix line number
luca-iachini Jun 20, 2026
b3d6b3e
remove dead code
luca-iachini Jun 20, 2026
f5632a6
remove leftover
luca-iachini Jun 21, 2026
9bf6c22
update action tag
luca-iachini Jun 22, 2026
62112be
Merge remote-tracking branch 'origin/main' into fir-368-e2e-tests
luca-iachini Jun 22, 2026
ba3b254
add codex authentication step
luca-iachini Jun 22, 2026
10e3d18
suppress datadog calls
luca-iachini Jun 22, 2026
9d3b55b
simply changes
luca-iachini Jun 22, 2026
e47fedc
fix clippy
luca-iachini Jun 22, 2026
a2a2cdb
add apparmor bwrap profile
luca-iachini Jun 22, 2026
ada8352
wip test
luca-iachini Jun 22, 2026
7983396
update insta for api key scenario
luca-iachini Jun 22, 2026
113581d
fix advisory
luca-iachini Jun 22, 2026
51c5ed8
remove workflow trigger
luca-iachini Jun 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions .github/workflows/e2e-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
name: E2E Tests

on:
push:
tags:
- "v*.*.*"
workflow_dispatch:

permissions:
contents: read

concurrency:
group: e2e-tests-${{ github.ref }}
cancel-in-progress: true

env:
CARGO_TERM_COLOR: always

jobs:
e2e:
name: e2e (${{ matrix.os }}, ${{ matrix.agent.name }})
runs-on: ${{ matrix.os }}
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
Comment thread
luca-iachini marked this conversation as resolved.
agent:
- name: claude
package: "@anthropic-ai/claude-code"
- name: codex
package: "@openai/codex"

steps:
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
persist-credentials: false

- uses: actions-rust-lang/setup-rust-toolchain@46268bd060767258de96ed93c1251119784f2ab6 # v1.16.1
with:
rustflags: ""
cache: false

- name: Install cargo-binstall
uses: cargo-bins/cargo-binstall@30b5ca8b54e1dcffd9548bc87ede1531310fdc67 # v1.20.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Load tool versions
shell: bash
run: grep -E '^[A-Z0-9_]+=' tool-versions.env >> "$GITHUB_ENV"
- name: Install cargo-nextest
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: command -v cargo-nextest || cargo binstall -y --force --locked cargo-nextest@$CARGO_NEXTEST_VERSION
shell: bash

- name: Install protoc
uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3.0.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}

- name: Install bubblewrap (Linux)
if: runner.os == 'Linux'
run: sudo apt-get install -y bubblewrap

# Ubuntu 24.04 ships kernel.apparmor_restrict_unprivileged_userns=1, which
# transitions bwrap to a profile that strips CAP_NET_ADMIN inside its user
# namespace, so it cannot bring up loopback (RTM_NEWADDR). Install the
# targeted AppArmor profile that lets bwrap keep its caps in the userns.
- name: Allow bwrap user namespaces via AppArmor profile (Linux)
if: runner.os == 'Linux'
run: |
sudo tee /etc/apparmor.d/bwrap >/dev/null <<'EOF'
abi <abi/4.0>,
include <tunables/global>

profile bwrap /usr/bin/bwrap flags=(unconfined) {
userns,
include if exists <local/bwrap>
}
EOF
sudo apparmor_parser -r /etc/apparmor.d/bwrap

- name: Install ${{ matrix.agent.name }}
run: |
npm install -g '${{ matrix.agent.package }}'
${{ matrix.agent.name }} --version

- name: Authenticate codex
if: matrix.agent.name == 'codex'
run: printenv OPENAI_API_KEY | codex login --with-api-key
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

# nextest builds the firma binary as part of the e2e test; firma_bin()
# reads its path from CARGO_BIN_EXE_firma.
- name: Run e2e tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
run: cargo nextest run -p firma --test e2e --run-ignored all -E 'test(/${{ matrix.agent.name }}::/)'
31 changes: 29 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ firma-protobuf = "0.1.1"
firma-run = { path = "crates/firma-run" }
firma-sidecar = { path = "crates/firma-sidecar" }
firma-stack = { path = "crates/firma-stack" }
fs-err = "3.3"
governor = "0.10"
hex = "0.4"
http-body = "1"
http-body-util = "0.1"
hyper = { version = "1", default-features = false }
hyper-util = { version = "0.1", default-features = false }
insta = { version = "1", features = ["json", "redactions"] }
lru = "0.17"
miette = { version = "7", features = ["fancy-no-backtrace"] }
nix = { version = "0.31", features = ["fs", "process", "signal", "socket", "user"] }
Expand All @@ -81,6 +83,7 @@ rustls = "0.23"
rustls-pemfile = "2"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_repr = "0.1"
serde_yaml = "0.9"
serial_test = "3"
sha2 = "0.11"
Expand All @@ -105,5 +108,6 @@ uuid = { version = "1", features = ["v4", "v7", "serde"] }
wait-timeout = "0.2"
webpki-roots = "1"
windows-sys = { version = "0.59", features = ["Win32_Foundation", "Win32_Security", "Win32_System_Console", "Win32_System_JobObjects", "Win32_System_Threading"] }
wiremock = "0.6"
x509-parser = "0.16"
xxhash-rust = { version = "0.8", features = ["xxh3"] }
6 changes: 3 additions & 3 deletions crates/firma-authority/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;

/// Sentinel: unset `policy_dir`.
Expand All @@ -12,7 +12,7 @@ pub(crate) const DEFAULT_KEY_FILE: &str = "firma-authority.key";
///
/// Environment variables take precedence over TOML values and use the
/// `FIRMA_AUTHORITY_` prefix (e.g., `FIRMA_AUTHORITY_LISTEN_ADDR`).
#[derive(Debug, Clone, Deserialize)]
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(default)]
pub struct AuthorityConfig {
/// gRPC listen address (default: `[::1]:50051`).
Expand Down Expand Up @@ -51,7 +51,7 @@ pub struct AuthorityConfig {
/// TLS configuration for the Authority gRPC server.
///
/// Both values are required together to enable TLS.
#[derive(Debug, Clone, Default, Deserialize)]
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct AuthorityTlsConfig {
/// Path to the TLS certificate file (PEM). Must be set together with
/// `tls_key_path`.
Expand Down
26 changes: 26 additions & 0 deletions crates/firma-run/src/sidecar/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ pub fn synthesize(req: SynthesizeRequest<'_>) -> Result<TemplateSource, RunError
rebase_template_resource_paths(&mut value, dir)?;
}
override_interceptor(&mut value, req.socket_path, req.listen_addr)?;
// Pin ca.dir to the marker dir so the MITM CA cert lands where
// sidecar_trust_env_overrides expects it (<marker_dir>/firma-ca/).
// The default "./firma-ca/" is CWD-relative and would diverge when
// firma run's CWD differs from the marker dir.
override_ca_dir(&mut value, req.out_path)?;
if let Some(url) = req.authority_url {
override_authority_url(&mut value, url)?;
}
Expand Down Expand Up @@ -528,6 +533,27 @@ fn override_sidecar_mode(value: &mut toml::Value, mode: &str) -> Result<(), RunE
Ok(())
}

fn override_ca_dir(value: &mut toml::Value, out_path: &Path) -> Result<(), RunError> {
let marker_dir = out_path.parent().ok_or_else(|| {
RunError::Internal(format!(
"cannot resolve marker dir from synthesized config path {}",
out_path.display()
))
})?;
let ca_dir = marker_dir.join("firma-ca");
let sidecar = sidecar_table_mut(value)?;
let ca_table = sidecar
.entry("ca".to_string())
.or_insert_with(|| toml::Value::Table(toml::value::Table::new()))
.as_table_mut()
.ok_or_else(|| RunError::Internal("[sidecar.ca] is not a table".into()))?;
ca_table.insert(
"dir".to_string(),
toml::Value::String(ca_dir.display().to_string()),
);
Ok(())
}

/// Default the audit sink to a file at `audit_path` when the template did not
/// configure one. The per-run sidecar is spawned with a null stdout, so the
/// default `stdout` audit sink would silently discard every decision and
Expand Down
9 changes: 5 additions & 4 deletions crates/firma-sidecar/src/config/enforcement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
reason = "Authority-wired capability manifest support is defined now but not consumed yet"
)]

use serde::Deserialize;
use serde::{Deserialize, Serialize};

const VALID_HTTP_METHODS: &[&str] = &[
"GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", "CONNECT",
Expand Down Expand Up @@ -130,14 +130,15 @@ impl Default for ConstraintEnforcementConfig {
// ---------------------------------------------------------------------------

/// A single mapping rule as deserialized from the rules TOML file.
#[derive(Debug, Clone, Deserialize)]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct MappingRuleConfig {
/// HTTP method to match (`None` = any method).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub method: Option<String>,
/// Host pattern to match (supports `*` wildcard).
pub host: String,
/// Path pattern to match (supports `*` wildcard).
#[serde(default)]
#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
/// Canonical action class this rule maps to.
pub action_class: String,
Expand Down Expand Up @@ -170,7 +171,7 @@ impl MappingRuleConfig {
}

/// Top-level structure of the mapping rules TOML file.
#[derive(Debug, Clone, Deserialize)]
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct MappingRulesFile {
/// Individual mapping rules.
#[serde(rename = "rules", default)]
Expand Down
8 changes: 8 additions & 0 deletions crates/firma/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,18 @@ nix = { workspace = true }
windows-sys = { workspace = true }

[dev-dependencies]
fs-err = { workspace = true }
insta = { workspace = true }
pretty_assertions = { workspace = true }
rand = { workspace = true }
serde_repr = { workspace = true }
strum = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
wiremock = { workspace = true }

[target.'cfg(unix)'.dev-dependencies]
nix = { workspace = true }

[[test]]
name = "e2e"
path = "../../tests/e2e/main.rs"
4 changes: 2 additions & 2 deletions crates/firma/src/services/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1494,8 +1494,8 @@ mod tests {
assert!(
rules
.iter()
.any(|r| r.host == "api.openai.com" && r.method.as_deref() == Some("CONNECT")),
"expected api.openai.com:443 CONNECT rule"
.any(|r| r.host == "*.openai.com" && r.method.as_deref() == Some("CONNECT")),
"expected *.openai.com:443 CONNECT rule"
);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/firma/src/services/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ pub fn run(args: RunArgs) -> anyhow::Result<ExitCode> {
command: args.command,
authority_cli,
authority_profile: args.authority_profile,
user_config_path: None,
user_config_path: args.config.clone(),
allow_non_structural: args.allow_non_structural,
monitor_mode: args.monitor,
};
Expand Down
16 changes: 14 additions & 2 deletions crates/firma/templates/mappings/openai.toml
Original file line number Diff line number Diff line change
@@ -1,23 +1,35 @@
# OpenAI API mapping.
# Tunnels through without MITM; the LLM SDK does not need to trust firma-ca.

# API-key traffic (api.openai.com, etc.) — single-label wildcard.
[[rules]]
method = "CONNECT"
host = "api.openai.com"
host = "*.openai.com"
action_class = "communication.external.send"

[[rules]]
method = "CONNECT"
host = "chatgpt.com"
action_class = "communication.external.send"

# Subdomains (ab.chatgpt.com, etc.) — single-label wildcard.
[[rules]]
method = "CONNECT"
host = "*.chatgpt.com"
action_class = "communication.external.send"

# REST fallback (plain HTTP proxy or post-MITM).
[[rules]]
host = "api.openai.com"
host = "*.openai.com"
path = "*"
action_class = "communication.external.send"

[[rules]]
host = "chatgpt.com"
path = "*"
action_class = "communication.external.send"

[[rules]]
host = "*.chatgpt.com"
path = "*"
action_class = "communication.external.send"
3 changes: 3 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ test:
build:
cargo build --all-features --all-targets

e2e:
cargo nextest run -p firma --test e2e --run-ignored all

audit:
cargo audit --deny warnings

Expand Down
Loading
Loading