diff --git a/.api.env.example b/.api.env.example index d04ec8d..c763469 100644 --- a/.api.env.example +++ b/.api.env.example @@ -24,3 +24,4 @@ WEBHOOKERY_OBJECT_STORAGE_USE_SSL=false WEBHOOKERY_BOOTSTRAP_TENANT_ID=ten_dev WEBHOOKERY_BOOTSTRAP_API_KEY_HASH=sha256:5350c25a055e9bcd7668ea2145021dfb2be8d6c774228143f1cafc8e5f2d47c6 WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX=dev-bootstrap-key-change-me +WEBHOOKERY_TRUSTED_PROXY_CIDRS= diff --git a/.audits/codebase_audit_v1/remediation_backlog.md b/.audits/codebase_audit_v1/remediation_backlog.md new file mode 100644 index 0000000..12c0d37 --- /dev/null +++ b/.audits/codebase_audit_v1/remediation_backlog.md @@ -0,0 +1,285 @@ +# Backlog + +Project: Webhookery + +Status legend: + +- [ ] not done +- [x] done + +## Epic E1 - Evidence Integrity And Contract Safety [x] + +Description: Close the highest-risk correctness gaps around raw evidence discoverability and API contract drift before structural refactors. + +### Ticket E1-T1 - Link Duplicate Raw Payload Evidence [x] + +Description: Ensure duplicate inbound raw payload rows remain discoverable through receipts, timelines, retention, and body-inclusive exports. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- add DB-backed tests proving duplicate raw payloads are exported or explicitly represented as receipt-linked evidence +- preserve canonical event dedupe behavior and do not mutate original event history +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E1-T2 - Add Router And OpenAPI Parity Checks [x] + +Description: Replace the current `/openapi.yaml` smoke check with a real contract check that compares registered routes/methods against `openapi.yaml`. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- make the check deterministic and non-mutating so it can run from `make openapi-check` +- include high-risk request/response examples for ingest, raw reads, replay, exports, auth, alerts, notification, SIEM, and producer token endpoints +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E1-T3 - Strengthen Evidence Export Tests [x] + +Description: Add DB-backed export tests for raw payload bodies, duplicate receipts, delivery payloads, normalized envelopes, provider API evidence, and audit-chain proof files. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- require `WEBHOOKERY_TEST_DATABASE_URL` for DB-backed export assertions +- assert body permission gates and `410 Gone` retained-body behavior +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +## Epic E2 - Authorization Enforcement Consistency [x] + +Description: Make resource-aware role bindings and access policies the enforcement path for sensitive workflows while preserving fixed-role compatibility. + +### Ticket E2-T1 - Introduce A Central Authorization Service [x] + +Description: Add a single application-layer authorization service that wraps baseline `authz.Can`, role bindings, access policies, scopes, tenant checks, and explain logging. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- deny by default when tenant, actor, action, or resource context is missing +- preserve all existing fixed role and scoped API-key behavior +- add unit tests for allow, deny, wildcard, resource id, environment, scope, and wrong-tenant decisions +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E2-T2 - Wire Authorization Service Into Control Methods [x] + +Description: Replace direct `authz.Can` calls in management workflows with the central authorization service. + +Implementation rules: + +- implement the ticket in dependency order after `E2-T1` +- cover sources, endpoints, subscriptions, routes, schemas, events, deliveries, replay, audit, retention, ops, alerts, notifications, SIEM, identity, producer trust, and adapter registry paths +- keep body access elevated and audited +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E2-T3 - Add Resource Policy Regression Tests [x] + +Description: Add wrong-tenant, denied-policy, allowed-binding, and scope-limited tests for every sensitive resource family. + +Implementation rules: + +- implement the ticket in dependency order after `E2-T2` +- include negative tests for raw payload reads, replay creation, audit export payload inclusion, endpoint production changes, notification mutation, SIEM mutation, and secret rotation +- assert explain output does not leak secrets, payload bodies, sessions, or provider tokens +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +## Epic E3 - Hexagonal Boundary Repair [x] + +Description: Reduce the blast radius of core changes by moving orchestration out of infrastructure adapters and splitting god files into cohesive modules. + +### Ticket E3-T1 - Split Store Ports By Use Case [x] + +Description: Replace the monolithic `ControlStore` shape with smaller interfaces for source, endpoint, route, event, delivery, replay, audit, identity, ops, signal, and reconciliation use cases. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- keep public behavior unchanged +- avoid moving SQL and business logic in the same patch unless required for compile safety +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E3-T2 - Move Delivery Fanout Orchestration Into App Services [x] + +Description: Move route/subscription matching, transformation selection, payload snapshot creation decisions, and replay fanout policy out of `postgres.Store`. + +Implementation rules: + +- implement the ticket after `E3-T1` +- leave SQL persistence focused on storing and claiming records +- preserve delivery payload hashes, route evidence, replay config modes, and live-over-replay priority +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E3-T3 - Move Reconciliation Orchestration Out Of Postgres Store [x] + +Description: Move provider scan, lookup, redelivery, and recovered-event orchestration into an application reconciliation service. + +Implementation rules: + +- implement the ticket after `E3-T1` +- keep provider HTTP adapters outside persistence packages +- preserve provider API evidence hashes, recovered-event semantics, redelivery audit evidence, and cursor behavior +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E3-T4 - Split HTTP And CLI Entrypoint Files [x] + +Description: Split `server.go` and `cmd/whcp/main.go` into cohesive resource and command files without changing routes or flags. + +Implementation rules: + +- implement the ticket after high-risk behavior fixes +- keep route registration explicit and easy to compare with OpenAPI +- preserve CLI flags, exit behavior, file permissions, and redaction guarantees +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +## Epic E4 - Runtime Resilience And Startup Safety [x] + +Description: Improve operational isolation so one subsystem failure or backfill cannot degrade unrelated core product work. + +### Ticket E4-T1 - Isolate Worker Phases [x] + +Description: Change worker execution so delivery, retention, metrics, alerts, notification, and SIEM phases report independent results instead of returning on the first subsystem error. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- preserve at-least-once semantics and existing retry state +- add tests proving one phase failure does not prevent independent phases from running +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E4-T2 - Make Audit Chain Backfill Explicit And Bounded [x] + +Description: Move audit-chain backfill out of automatic store construction into an explicit migration/admin/scheduler path with leases and bounded progress. + +Implementation rules: + +- implement the ticket after adding characterization tests for current backfill behavior +- avoid startup-time unbounded scans in API and worker processes +- preserve idempotence and deterministic `occurred_at, id` ordering +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E4-T3 - Add Trusted Proxy Policy For Session Metadata [x] + +Description: Stop trusting `X-Forwarded-For` unconditionally and add explicit trusted-proxy configuration for OIDC session IP metadata. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- default to `RemoteAddr` unless a configured trusted proxy boundary applies +- do not accept proxy-supplied mTLS or auth identity headers +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +## Epic E5 - Persistence Test Quality [x] + +Description: Replace static persistence assertions with behavior-level confidence for tenant isolation, migrations, SQL constraints, and failure boundaries. + +### Ticket E5-T1 - Convert Highest-Risk Static Store Tests To DB Tests [x] + +Description: Convert static tests for source/endpoint/route/subscription CRUD, alerts, notification, SIEM, retry policy, and audit chain into live Postgres tests. + +Implementation rules: + +- implement the ticket incrementally by resource family +- require `WEBHOOKERY_TEST_DATABASE_URL` and skip cleanly when absent +- include wrong-tenant negatives and transaction assertions +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E5-T2 - Add Migration Upgrade And Idempotence Tests [x] + +Description: Add integration tests that migrate a clean database, re-run migrations, and validate key constraints/indexes through SQL behavior. + +Implementation rules: + +- implement the ticket after `E5-T1` starts the DB test helper pattern +- verify additive migration safety for key tables and indexes +- do not rely only on checksum or source-string checks +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +### Ticket E5-T3 - Add CI Artifact For DB-Backed RC Evidence [x] + +Description: Upload a concise integration evidence artifact from the GitHub integration workflow showing migrations, RC E2E, and skipped restore-drill status. + +Implementation rules: + +- implement the ticket after DB tests are reliable +- keep artifacts free of database URLs, secrets, raw payload bodies, and customer data +- preserve no-live-provider/cloud-credential CI behavior +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete diff --git a/.audits/codebase_audit_v2/remediation_backlog.md b/.audits/codebase_audit_v2/remediation_backlog.md new file mode 100644 index 0000000..d537433 --- /dev/null +++ b/.audits/codebase_audit_v2/remediation_backlog.md @@ -0,0 +1,218 @@ +# Backlog + +Project: Webhookery + +Status legend: + +- [ ] not done +- [x] done + +## Epic E1 - Ingress Trust Semantics [x] + +Description: Ensure public provider ingress never turns structural payload validity into trusted side-effecting work without cryptographic verification or an explicit unsafe policy. + +### Ticket E1-T1 - Separate CloudEvents Validity From Verification [x] + +Description: Change CloudEvents handling so a structurally valid unsigned CloudEvents payload is captured as evidence but is not marked `signature_verified=true` and cannot fan out as trusted work by default. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Cover `internal/provider/provider.go`, `internal/app/service.go`, and `internal/app/delivery_fanout.go`. +- Preserve durable capture for malformed or unsigned CloudEvents where current ack policy allows evidence capture. + +### Ticket E1-T2 - Add Explicit Unsafe Routing Policy Tests [x] + +Description: Add negative tests proving unsigned CloudEvents do not create deliveries, plus policy tests for any intentionally allowed unsafe/archive-only routing mode. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Include provider-level, ingest-service, and delivery-fanout coverage. +- Update OpenAPI/docs only if the public contract changes. + +## Epic E2 - SSRF-Safe Egress Dialing [x] + +Description: Bind SSRF validation to the actual outbound connection for customer-controlled endpoint, notification, and SIEM URLs. + +### Ticket E2-T1 - Implement Pinned-IP HTTP Transport [x] + +Description: Add an egress transport that resolves the hostname, validates every resolved IP against policy, dials an allowed IP, and preserves the original Host/SNI semantics. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Keep redirects disabled or revalidate every redirect target before following it. +- Include DNS rebinding, private CIDR, metadata IP, IPv4-mapped IPv6, and IDNA cases. + +### Ticket E2-T2 - Use Shared Safe Egress In Delivery And Signal Clients [x] + +Description: Wire the pinned egress transport into `deliveryhttp` and `signalhttp`, including worker runtime construction. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Cover customer deliveries, notification channels, and SIEM sinks. +- Keep response truncation and signing behavior unchanged. + +## Epic E3 - Durable Audit Evidence [x] + +Description: Make audit evidence for sensitive control-plane actions required, transactional, or durably recoverable. + +### Ticket E3-T1 - Replace Best-Effort Audit Writes For Sensitive Actions [x] + +Description: Update state-changing and evidence-sensitive store methods so audit write failure is returned or captured through a durable audit outbox instead of ignored. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Prioritize API key revocation, delivery retry/cancel, audit export download, dead-letter release, quarantine approval/rejection, and replay state changes. +- Keep read-only audit behavior explicit if reads intentionally remain best-effort. + +### Ticket E3-T2 - Add Audit Failure Injection Tests [x] + +Description: Add tests that force audit persistence failure and assert sensitive actions do not silently succeed without audit evidence. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Prefer focused fake-store tests for app behavior and live Postgres tests for transaction behavior. +- Ensure audit-chain updates remain compatible with existing chain verification. + +## Epic E4 - Concurrent Duplicate Capture [x] + +Description: Preserve raw duplicate evidence and provider receipts even when duplicate webhook deliveries arrive concurrently. + +### Ticket E4-T1 - Make Dedupe Capture Atomic [x] + +Description: Refactor inbound capture to avoid the select-then-insert race on `(tenant_id, dedupe_key)` while still linking duplicate raw payloads and receipts to the first event. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Consider `INSERT ... ON CONFLICT`, row-level locks, or an idempotency/dedupe record lock. +- Preserve one routing outbox item for the canonical event and evidence rows for every receipt. + +### Ticket E4-T2 - Add Live Postgres Concurrency Regression Test [x] + +Description: Add an integration test that sends concurrent duplicate captures and verifies one event, multiple raw payloads, multiple provider receipts, and no failed duplicate response. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Use `WEBHOOKERY_TEST_DATABASE_URL`. +- Keep the test deterministic and isolated by tenant/source identifiers. + +## Epic E5 - Maintainability And Persistence Test Depth [x] + +Description: Reduce future change risk around the largest modules and improve the live persistence safety net. + +### Ticket E5-T1 - Split PostgreSQL Store By Resource Family [x] + +Description: Move related PostgreSQL methods into smaller files by resource family while preserving public store interfaces and behavior. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Do this after the audit, SSRF, and dedupe fixes have tests. +- Avoid behavior changes in the file split. + +### Ticket E5-T2 - Add A Documented Live-Postgres Quality Gate [x] + +Description: Make live PostgreSQL integration coverage easier to run consistently and document exactly when it is required. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Align docs, Makefile, and CI naming around `WEBHOOKERY_TEST_DATABASE_URL`. +- Keep non-live `make fast-check` usable for local iteration. diff --git a/.audits/doc_audit_v1/remediation_backlog.md b/.audits/doc_audit_v1/remediation_backlog.md new file mode 100644 index 0000000..9be667f --- /dev/null +++ b/.audits/doc_audit_v1/remediation_backlog.md @@ -0,0 +1,370 @@ +# Backlog + +Project: Webhookery documentation remediation v1 + +Status legend: + +- [ ] not done +- [x] done + +## Epic E1 - Stabilize Documentation Entry Points [x] + +Description: Make the documentation source-of-truth clear before rewriting deeper docs. This epic removes stale guidance and gives readers a reliable first path through the repository. + +### Ticket E1-T1 - Update Agent And Source-Of-Truth Guidance [x] + +Description: Rewrite `AGENTS.md` so it reflects the current implementation-bearing repository and no longer claims the repo is pre-implementation. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Audit evidence: `AGENTS.md:20-24` and `AGENTS.md:103-110` are stale. + +### Ticket E1-T2 - Rewrite README As The Primary Entry Point [x] + +Description: Reduce `README.md` to product framing, implementation status, local quickstart, shortest smoke path, security promise, and links to canonical docs. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Move the long command list out of `README.md:24-113` instead of deleting useful commands. + +### Ticket E1-T3 - Add A Canonical Documentation Map [x] + +Description: Add a lean docs map in README or `docs/index.md` that names each canonical document, its audience, purpose, and source-of-truth boundary. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Keep the map short. Do not duplicate command lists or route catalogs. + +### Ticket E1-T4 - Reclassify The Initial Design Document [x] + +Description: Clarify whether `.initial_design.md` is historical design input or a maintained architecture reference, then add the minimum context needed to prevent misuse. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Audit evidence: `.initial_design.md:7-9` reads as prompt critique rather than maintained architecture documentation. + +## Epic E2 - Split And Tighten Operator Documentation [x] + +Description: Turn the overloaded operations runbook into maintainable operator documentation without losing the security-sensitive operational details. + +### Ticket E2-T1 - Extract Canonical Configuration Reference [x] + +Description: Create or designate one configuration reference for environment variables, defaults, safe production values, secrets, and process applicability. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Align `.env.example`, `.api.env.example`, `.test.env.example`, Helm values, and Kubernetes Secret examples. + +### Ticket E2-T2 - Restructure Operations Around Runbooks [x] + +Description: Rewrite `docs/operations.md` as a runbook-focused document for production doctor, RC checks, backup/restore, incident triage, audit verification, and recovery. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Preserve durable-capture, audit-chain, restore, raw-payload, and secret-redaction guidance. + +### Ticket E2-T3 - Move Feature Behavior Reference Out Of The Runbook [x] + +Description: Move dense feature behavior sections for auth, delivery, reconciliation, transformations, retention, signal egress, identity, producer trust, and SSRF into clearer reference sections or separate docs. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Do not create many tiny docs. Group by reader task and maintenance boundary. + +### Ticket E2-T4 - Consolidate Non-Claims And Security Promise Language [x] + +Description: Establish one canonical non-claims/security-promise section and replace repeated prose elsewhere with short links or references. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Keep legal, security, support, commercial, and trademark docs focused on their own audience. + +## Epic E3 - Improve API, CLI, SDK, And Collection Task Support [x] + +Description: Make the API-first product usable from contracts, CLI commands, SDKs, and request collections without forcing readers through the operations monolith. + +### Ticket E3-T1 - Add OpenAPI Navigation And Common Contract Detail [x] + +Description: Add OpenAPI tags, operation IDs, common error responses, and representative examples for high-value workflows without changing API behavior. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Preserve `openapi.yaml` as canonical and keep `sdk/openapi.yaml` aligned. + +### Ticket E3-T2 - Create CLI Reference From Current Command Groups [x] + +Description: Move the README command catalog into a CLI reference organized by command group, required scope, example, expected outcome, and elevated-risk action. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Prefer generated or command-verified content where practical so docs do not drift from `cmd/whcp`. + +### Ticket E3-T3 - Expand SDK README For All Committed SDKs [x] + +Description: Update `sdk/README.md` with Go, Python, and TypeScript setup, auth handling, basic event ingestion, audit-chain verification, and error-redaction notes. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Keep examples minimal and avoid showing real secrets. + +### Ticket E3-T4 - Document Request Collection Smoke Paths [x] + +Description: Add collection usage notes for Postman and Bruno, including local variables, placeholder signatures, expected responses, and what each smoke request proves. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Do not expand collections into full API coverage unless a real reader task requires it. + +## Epic E4 - Strengthen Deployment And Release Documentation [x] + +Description: Make the self-hosted RC posture clearer across Compose, Kubernetes, Helm, Terraform, release evidence, and restore workflows. + +### Ticket E4-T1 - Write Common Deployment Posture Guidance [x] + +Description: Add or designate one common deployment guide that explains external dependencies, TLS/ingress, secret custody, object storage, network policy, readiness, backup/restore, upgrade, and rollback expectations. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Keep profile-specific READMEs concise and link to the common guide. + +### Ticket E4-T2 - Rewrite Kubernetes, Helm, And Terraform Profile READMEs [x] + +Description: Update deployment profile READMEs with prerequisites, validation commands, secrets boundary, migration job behavior, image pinning, and links to operations/config references. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Avoid duplicating the same production-hardening text in three profile docs. + +### Ticket E4-T3 - Normalize Release Evidence Documentation [x] + +Description: Make `docs/release-evidence-template.md` the clear canonical release evidence artifact and reduce duplicated release-gate prose elsewhere. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Keep root `RELEASE_EVIDENCE.md` as a short router if useful. + +### Ticket E4-T4 - Add Migration And Schema Operations Overview [x] + +Description: Add a concise schema/migration overview for DB reviewers and operators, focused on migration ordering, rollback stance, evidence-authority tables, and restore compatibility risk. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- This should summarize migration practice, not duplicate every SQL table definition. + +## Epic E5 - Add Documentation Maintenance Discipline [x] + +Description: Reduce future drift by documenting ownership, freshness checks, provider-claim review, and validation expectations. + +### Ticket E5-T1 - Document Provider Claim Freshness Rules [x] + +Description: Add a rule for dated provider-specific claims that records owner, review cadence, official source links, and how stale claims should be updated. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Current dated claims include provider behavior checked on May 25, 2026. + +### Ticket E5-T2 - Add Documentation Review Checklist [x] + +Description: Add a short checklist for documentation changes covering audience, doc type, source of truth, examples, command validation, security claims, and non-claim consistency. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- Place this where contributors and agents will actually see it. + +### Ticket E5-T3 - Align Documentation Checks With The New Structure [x] + +Description: Update documentation-adjacent validation so `make docs-check` continues to verify canonical docs, derived OpenAPI/SDK artifacts, collections, deployment docs, and required metadata after the split. + +Implementation rules: + +- implement the ticket in the smallest sensible step +- run `make finalize` after completing the ticket, or an equivalent quality toolkit if `make finalize` is unavailable +- ensure the quality check covers testing, formatting, linting, and other relevant validation for the repository +- create a git commit immediately after the ticket is complete +- use Conventional Commits style for the commit message +- update the ticket checkmark from `[ ]` to `[x]` only after the ticket is actually complete +- update the epic checkmark from `[ ]` to `[x]` only when all child tickets are complete + +Notes: + +- This ticket depends on the earlier structural tickets so checks point at final canonical paths. diff --git a/.dockerignore b/.dockerignore index b75c56a..f9734b2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -21,6 +21,10 @@ release-evidence release-evidence/ backups backups/ +live-proof-private +live-proof-private/ +launch-metrics-private +launch-metrics-private/ coverage.out *.prof *.pprof diff --git a/.env.example b/.env.example index 69353f0..ca96968 100644 --- a/.env.example +++ b/.env.example @@ -3,12 +3,20 @@ POSTGRES_PASSWORD=change-me POSTGRES_DB=webhookery WEBHOOKERY_DATABASE_URL=postgres://webhookery:change-me@postgres:5432/webhookery?sslmode=disable WEBHOOKERY_HTTP_ADDR=:8080 +WEBHOOKERY_TLS_CERT_FILE= +WEBHOOKERY_TLS_KEY_FILE= +WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE= WEBHOOKERY_ENABLE_UI=false +WEBHOOKERY_LOG_LEVEL=info +WEBHOOKERY_ENVIRONMENT=development WEBHOOKERY_SECRET_BOX_MODE=local WEBHOOKERY_MASTER_KEY_BASE64=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= WEBHOOKERY_VAULT_ADDR= WEBHOOKERY_VAULT_TOKEN= WEBHOOKERY_VAULT_TRANSIT_KEY= +WEBHOOKERY_AWS_REGION= +WEBHOOKERY_AWS_KMS_KEY_ID= +WEBHOOKERY_AWS_KMS_ENDPOINT= WEBHOOKERY_RAW_STORAGE_MODE=postgres WEBHOOKERY_OBJECT_STORAGE_ENDPOINT=minio:9000 WEBHOOKERY_OBJECT_STORAGE_BUCKET=webhookery-raw @@ -18,3 +26,5 @@ WEBHOOKERY_OBJECT_STORAGE_REGION=us-east-1 WEBHOOKERY_OBJECT_STORAGE_USE_SSL=false WEBHOOKERY_BOOTSTRAP_TENANT_ID=ten_dev WEBHOOKERY_BOOTSTRAP_API_KEY_HASH=sha256:5350c25a055e9bcd7668ea2145021dfb2be8d6c774228143f1cafc8e5f2d47c6 +WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX=dev-bootstrap-key-change-me +WEBHOOKERY_TRUSTED_PROXY_CIDRS= diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..7b3eca3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,8 @@ +# Public Issue Guidance + +Use the structured issue forms when possible. + +Do not include API keys, webhook secrets, bearer tokens, session cookies, +private keys, provider credentials, database URLs with passwords, raw provider +signatures, raw payload bodies, customer data, or exploit payloads. Use +placeholders, hashes, redacted logs, and synthetic fixtures. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..b9574c8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,71 @@ +name: Bug report +description: Report a reproducible Webhookery bug without secrets or raw payloads. +title: "[bug]: " +labels: + - bug +body: + - type: markdown + attributes: + value: | + Do not include API keys, webhook secrets, bearer tokens, session cookies, private keys, provider credentials, database URLs with passwords, raw signatures, raw payload bodies, customer data, or exploit payloads. + - type: input + id: version + attributes: + label: Webhookery version or commit + placeholder: v0.1.0-rc1 or c43dc1d + validations: + required: true + - type: dropdown + id: area + attributes: + label: Area + options: + - ingest + - provider verification + - delivery/replay + - incident/evidence export + - CLI + - API/OpenAPI + - deployment/operations + - documentation + - other + validations: + required: true + - type: textarea + id: steps + attributes: + label: Reproduction steps + description: Use synthetic data, placeholders, hashes, and redacted output. + placeholder: | + 1. Run ... + 2. Call ... + 3. Observe ... + validations: + required: true + - type: textarea + id: expected + attributes: + label: Expected behavior + validations: + required: true + - type: textarea + id: actual + attributes: + label: Actual behavior + description: Include redacted errors or IDs. Do not include raw payloads or secrets. + validations: + required: true + - type: textarea + id: validation + attributes: + label: Checks run + placeholder: | + make docs-check: passed + make rc-check: failed at ... + - type: checkboxes + id: no_secrets + attributes: + label: No-secrets confirmation + options: + - label: I confirm this issue contains no secrets, raw payload bodies, raw signatures, private customer data, or production database URLs. + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..3fbe0f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: Security vulnerability + url: https://github.com/aatuh/webhookery/security/policy + about: Report vulnerabilities through the private security policy path. + - name: Commercial evaluation or support + url: https://github.com/aatuh/webhookery/blob/master/COMMERCIAL.md + about: Review commercial evaluation, license exception, and support boundaries. diff --git a/.github/ISSUE_TEMPLATE/docs.yml b/.github/ISSUE_TEMPLATE/docs.yml new file mode 100644 index 0000000..a80e059 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/docs.yml @@ -0,0 +1,45 @@ +name: Documentation issue +description: Report a docs gap, unclear guide, or stale public claim. +title: "[docs]: " +labels: + - documentation +body: + - type: markdown + attributes: + value: | + Do not include secrets, raw provider signatures, raw payload bodies, customer data, private proof artifacts, or production database URLs. + - type: input + id: doc_path + attributes: + label: Document path + placeholder: docs/evaluator-quickstart.md + validations: + required: true + - type: dropdown + id: issue_type + attributes: + label: Issue type + options: + - unclear setup step + - stale provider claim + - missing command output + - missing non-claim + - broken link + - typo or wording + - other + validations: + required: true + - type: textarea + id: details + attributes: + label: Details + description: Explain what is wrong and what evidence or source supports the correction. + validations: + required: true + - type: checkboxes + id: boundaries + attributes: + label: Claim boundaries + options: + - label: This issue does not ask Webhookery to claim exactly-once delivery, provider-side completeness, compliance certification, live-provider acceptance, or legal evidentiary certification without evidence. + required: true diff --git a/.github/ISSUE_TEMPLATE/evaluator-feedback.yml b/.github/ISSUE_TEMPLATE/evaluator-feedback.yml new file mode 100644 index 0000000..ca79a96 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/evaluator-feedback.yml @@ -0,0 +1,121 @@ +name: Evaluator or pilot feedback +description: Report sanitized evaluator or pilot feedback without secrets. +title: "[feedback]: " +labels: + - evaluator-feedback +body: + - type: markdown + attributes: + value: | + No secrets: do not include API keys, webhook secrets, bearer tokens, session cookies, private keys, provider credentials, database URLs with passwords, raw signatures, raw payload bodies, customer data, or exploit payloads. Use placeholders and redacted logs only. + - type: input + id: version + attributes: + label: Webhookery version or commit + description: Use a tag, commit SHA, or branch name. + placeholder: v0.2.0-pilot or abc1234 + validations: + required: true + - type: dropdown + id: deployment_mode + attributes: + label: Deployment mode + options: + - local docker compose + - helm + - kubernetes manifests + - terraform helm profile + - other self-hosted + validations: + required: true + - type: dropdown + id: provider_tested + attributes: + label: Provider tested + options: + - Stripe + - GitHub + - Shopify + - Slack + - generic HMAC + - generic JWT + - CloudEvents + - internal producer + - not provider-specific + validations: + required: true + - type: input + id: quickstart_step + attributes: + label: Quickstart or guide step reached + placeholder: docs/evaluator-quickstart.md step 3 + validations: + required: true + - type: textarea + id: failure_point + attributes: + label: Failure point or friction + description: Describe what blocked or slowed evaluation. Do not include secrets or raw payloads. + placeholder: The event reached DLQ, but I could not find which command generated the incident report. + validations: + required: true + - type: textarea + id: expected_behavior + attributes: + label: Expected behavior + description: What did you expect Webhookery or the docs to do? + validations: + required: true + - type: textarea + id: actual_behavior + attributes: + label: Actual behavior + description: What happened instead? Use IDs and hashes instead of raw payload bodies. + validations: + required: true + - type: textarea + id: sanitized_logs + attributes: + label: Sanitized logs or evidence references + description: Include redacted command output, status codes, hashes, or paths. Omit secrets, signatures, raw bodies, and customer data. + placeholder: | + make docs-check: passed + event_id: evt_redacted + raw_payload_sha256: sha256:redacted + - type: dropdown + id: commercial_relevance + attributes: + label: Commercial relevance + options: + - evaluating paid pilot + - evaluating commercial license exception + - evaluating production-readiness review + - open-source evaluation only + - unsure + validations: + required: true + - type: dropdown + id: roadmap_category + attributes: + label: Roadmap intake category + description: Choose the closest category from docs/roadmap-intake-policy.md. + options: + - docs gap + - bug + - evaluator friction + - missing provider compatibility + - production hardening + - paid custom integration + - commercial packaging + - general roadmap candidate + - enterprise/future + - out of scope + validations: + required: true + - type: checkboxes + id: no_secrets_confirmation + attributes: + label: No-secrets confirmation + options: + - label: I confirm this issue contains no secrets, raw payload bodies, raw signatures, private customer data, or production database URLs. + required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..8b3b53f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,54 @@ +name: Feature request +description: Propose product work through the roadmap intake policy. +title: "[feature]: " +labels: + - enhancement +body: + - type: markdown + attributes: + value: | + Keep requests evidence-first and sanitized. Do not include secrets, raw payloads, customer data, provider credentials, or private proof artifacts. + - type: textarea + id: problem + attributes: + label: Problem + description: What webhook evidence, replay, audit, deployment, or operator problem does this solve? + validations: + required: true + - type: textarea + id: workflow + attributes: + label: Target workflow + description: Describe the current workaround and the desired operator/evaluator workflow. + validations: + required: true + - type: dropdown + id: category + attributes: + label: Roadmap category + description: Choose the closest category from docs/roadmap-intake-policy.md. + options: + - docs gap + - bug + - evaluator friction + - missing provider compatibility + - production hardening + - paid custom integration + - commercial packaging + - general roadmap candidate + - enterprise/future + - out of scope + validations: + required: true + - type: textarea + id: evidence + attributes: + label: Evidence or urgency + description: Use sanitized examples, repeated evaluator feedback, or operational impact. Do not include sensitive data. + - type: checkboxes + id: non_goals + attributes: + label: Non-goal fit + options: + - label: This request does not require exactly-once semantics, arbitrary code plugins, a generic workflow engine, or unsafe routing of unverified provider payloads. + required: true diff --git a/.github/ISSUE_TEMPLATE/production_support.yml b/.github/ISSUE_TEMPLATE/production_support.yml new file mode 100644 index 0000000..cb4bbdd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/production_support.yml @@ -0,0 +1,51 @@ +name: Production support question +description: Ask about controlled self-hosted production or pilot operation without exposing sensitive data. +title: "[support]: " +labels: + - support +body: + - type: markdown + attributes: + value: | + Public support issues must be sanitized. Do not include API keys, webhook secrets, bearer tokens, session cookies, private keys, provider credentials, database URLs with passwords, raw signatures, raw payload bodies, customer data, or private live-provider proof artifacts. + - type: input + id: version + attributes: + label: Webhookery version or commit + placeholder: v0.1.0-rc1 or c43dc1d + validations: + required: true + - type: dropdown + id: deployment + attributes: + label: Deployment profile + options: + - docker compose + - kubernetes manifests + - helm + - terraform helm profile + - custom self-hosted + validations: + required: true + - type: textarea + id: question + attributes: + label: Question + description: Include sanitized topology, checks run, and current blocker. + validations: + required: true + - type: textarea + id: checks + attributes: + label: Checks run + placeholder: | + go run ./cmd/whcp doctor pilot --no-network: ... + make rc-check: ... + make live-postgres-check: ... + - type: checkboxes + id: no_secrets + attributes: + label: No-secrets confirmation + options: + - label: I confirm this public support issue contains no secrets, raw payload bodies, raw signatures, private customer data, or production database URLs. + required: true diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..1bc7d95 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,42 @@ +version: 2 +updates: + - package-ecosystem: gomod + directory: / + schedule: + interval: weekly + open-pull-requests-limit: 5 + labels: + - dependencies + - go + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + open-pull-requests-limit: 5 + labels: + - dependencies + - github-actions + - package-ecosystem: docker + directory: / + schedule: + interval: weekly + open-pull-requests-limit: 5 + labels: + - dependencies + - docker + - package-ecosystem: npm + directory: /sdk/typescript + schedule: + interval: weekly + open-pull-requests-limit: 5 + labels: + - dependencies + - typescript + - package-ecosystem: terraform + directory: /deploy/terraform/webhookery-helm + schedule: + interval: weekly + open-pull-requests-limit: 5 + labels: + - dependencies + - terraform diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..89a6382 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,22 @@ +## Summary + +Describe the change and the implemented behavior it affects. + +## Security Context + +- Context: `api | authz | session-auth | provider-webhook | cli | frontend | secrets-privacy | data | infra | library | none` +- Trust boundaries touched: +- Security invariants preserved: + +## Validation + +List the commands run and their results. Use project-owned targets such as +`make docs-check`, `make fast-check`, `make rc-check`, or `make finalize` when +they apply. + +## Sensitive Data Check + +- [ ] This PR does not include API keys, webhook secrets, bearer tokens, + private keys, raw provider signatures, raw payload bodies, customer data, + production database URLs, or private live-provider proof artifacts. +- [ ] Public docs and examples use placeholders or sanitized evidence only. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1236d3f..e03e609 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,5 +38,8 @@ jobs: - name: Fast repository checks run: make fast-check + - name: Coverage gate + run: make coverage-check + - name: Final quality gate run: make finalize diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..cd27b63 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,36 @@ +name: CodeQL + +on: + push: + branches: + - master + - main + pull_request: + schedule: + - cron: "23 4 * * 2" + workflow_dispatch: + +permissions: + contents: read + +jobs: + analyze: + name: CodeQL Analyze + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + steps: + - uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: go + queries: security-and-quality + + - name: Autobuild + uses: github/codeql-action/autobuild@v4 + + - name: Analyze + uses: github/codeql-action/analyze@v4 diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f9763c8..a1fc7e3 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -49,8 +49,37 @@ jobs: - name: Install SDK check tools run: npm install --global typescript - - name: Postgres integration tests - run: make postgres-integration-test + - name: Live Postgres quality gate + id: live_postgres_check + run: make live-postgres-check + + - name: Provider conformance evidence + id: provider_conformance + run: make provider-conformance-check + + - name: Performance smoke evidence + id: perf_smoke + run: make perf-smoke - name: DB-backed release-candidate checks + id: rc_check run: make rc-check + + - name: Capture integration evidence + if: always() + env: + LIVE_POSTGRES_CHECK_OUTCOME: ${{ steps.live_postgres_check.outcome }} + PROVIDER_CONFORMANCE_OUTCOME: ${{ steps.provider_conformance.outcome }} + PERF_SMOKE_OUTCOME: ${{ steps.perf_smoke.outcome }} + RC_CHECK_OUTCOME: ${{ steps.rc_check.outcome }} + RESTORE_DRILL_STATUS: skipped_not_configured + BRANCH_PROTECTION_STATUS: not_checked_by_workflow + EXTERNAL_REVIEW_STATUS: not_completed_or_not_attached + run: scripts/integration_evidence.sh + + - name: Upload integration evidence + if: always() + uses: actions/upload-artifact@v4 + with: + name: integration-evidence + path: integration-evidence/ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f78ec41..b3ad816 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ on: workflow_dispatch: permissions: - contents: read + contents: write id-token: write packages: write @@ -55,9 +55,21 @@ jobs: - name: Release acceptance run: make release-acceptance + - name: Provider conformance evidence + run: make provider-conformance-check + + - name: Provider proof metadata + run: make provider-proof-check + + - name: Performance smoke evidence + run: make perf-smoke + - name: Release-candidate checks run: make rc-check + - name: Coverage gate + run: make coverage-check + - uses: docker/setup-buildx-action@v3 - name: Login to GitHub Container Registry @@ -110,13 +122,14 @@ jobs: upload-artifact: false - name: Trivy image scan - uses: aquasecurity/trivy-action@0.31.0 + uses: aquasecurity/trivy-action@v0.36.0 with: image-ref: ${{ env.IMAGE_NAME }}@${{ steps.build.outputs.digest }} format: table exit-code: "1" ignore-unfixed: true severity: HIGH,CRITICAL + version: v0.70.0 - name: Capture release evidence summary env: @@ -135,24 +148,53 @@ jobs: printf '\n' printf '%s\n' "## Checks" printf '%s\n' "- make release-acceptance" + printf '%s\n' "- make provider-conformance-check" + printf '%s\n' "- make provider-proof-check" + printf '%s\n' "- make perf-smoke with local Postgres service" printf '%s\n' "- make rc-check with local Postgres service" + printf '%s\n' "- make coverage-check" printf '%s\n' "- Docker build and push" printf '%s\n' "- Cosign keyless image signing" printf '%s\n' "- SBOM generation" printf '%s\n' "- Trivy HIGH/CRITICAL image scan" printf '\n' printf '%s\n' "## Checksums" - sha256sum openapi.yaml sdk/openapi.yaml source.spdx.json image.spdx.json + sha256sum openapi.yaml sdk/openapi.yaml coverage.out source.spdx.json image.spdx.json + printf '\n' + printf '%s\n' "## Maturity Evidence" + printf '%s\n' "- Performance smoke output: tmp/perf-smoke/perf-smoke.json and tmp/perf-smoke/perf-smoke.md" + printf '%s\n' "- Provider conformance output: make provider-conformance-check" + printf '%s\n' "- Provider proof metadata output: make provider-proof-check" + printf '%s\n' "- Failure drill output: make rc-check with local fake providers/receivers" + printf '%s\n' "- Branch protection status: record manually or attach repository ruleset evidence" + printf '%s\n' "- External review status: attach completed review package or accepted-risk record" printf '\n' printf '%s\n' "## Non-Claims" printf '%s\n' "This evidence does not claim exactly-once delivery, provider-side event completeness, compliance certification, external timestamping, live-provider acceptance, or legal evidentiary certification." } > release-evidence/release-evidence.md + - name: Prepare release assets + run: scripts/release_assets.sh "${GITHUB_REF_NAME}" release-assets "${GITHUB_SHA}" + + - name: Publish GitHub release assets + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + if ! gh release view "${GITHUB_REF_NAME}" >/dev/null 2>&1; then + gh release create "${GITHUB_REF_NAME}" \ + --title "Webhookery ${GITHUB_REF_NAME}" \ + --notes-file release-assets/release-notes.md \ + --prerelease + fi + gh release upload "${GITHUB_REF_NAME}" release-assets/* --clobber + - name: Upload release evidence uses: actions/upload-artifact@v4 with: name: release-evidence path: | release-evidence/ + release-assets/ + tmp/perf-smoke/ source.spdx.json image.spdx.json diff --git a/.github/workflows/scorecard-sarif.yml b/.github/workflows/scorecard-sarif.yml new file mode 100644 index 0000000..b2667c0 --- /dev/null +++ b/.github/workflows/scorecard-sarif.yml @@ -0,0 +1,34 @@ +name: OpenSSF Scorecard SARIF + +on: + schedule: + - cron: "41 3 * * 1" + workflow_dispatch: + +permissions: + contents: read + +jobs: + scorecard-sarif: + name: Scorecard SARIF + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + security-events: write + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Run OpenSSF Scorecard + uses: ossf/scorecard-action@v2.4.3 + with: + results_file: scorecard-results.sarif + results_format: sarif + publish_results: true + + - name: Upload SARIF + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: scorecard-results.sarif diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..f1027b6 --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,29 @@ +name: OpenSSF Scorecard + +on: + branch_protection_rule: + schedule: + - cron: "37 3 * * 1" + workflow_dispatch: + +permissions: + contents: read + +jobs: + scorecard: + name: Scorecard + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Run OpenSSF Scorecard + uses: ossf/scorecard-action@v2.4.3 + with: + results_file: scorecard-results.sarif + results_format: sarif + publish_results: true diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 2079c0c..c84542c 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -51,13 +51,14 @@ jobs: upload-artifact: false - name: Trivy image scan - uses: aquasecurity/trivy-action@0.31.0 + uses: aquasecurity/trivy-action@v0.36.0 with: image-ref: webhookery:security format: table exit-code: "1" ignore-unfixed: true severity: HIGH,CRITICAL + version: v0.70.0 - name: Upload security artifacts uses: actions/upload-artifact@v4 diff --git a/.github/workflows/site-pages.yml b/.github/workflows/site-pages.yml new file mode 100644 index 0000000..49bdad6 --- /dev/null +++ b/.github/workflows/site-pages.yml @@ -0,0 +1,45 @@ +name: Site Pages + +on: + push: + branches: + - master + paths: + - "site/**" + - ".github/workflows/site-pages.yml" + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: site-pages + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + pages: write + id-token: write + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - uses: actions/checkout@v4 + + - name: Validate static site + run: make static-site-check + + - name: Configure Pages + uses: actions/configure-pages@v5 + + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: site + + - name: Deploy Pages + id: deployment + uses: actions/deploy-pages@v4 diff --git a/.gitignore b/.gitignore index dc73eb8..cc32b91 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ dist/ tmp/ release-evidence/ backups/ +live-proof-private/ +launch-metrics-private/ +examples/webhook-evidence-demo/output/ __pycache__/ sdk/typescript/.build/ .terraform/ diff --git a/.initial_design.md b/.initial_design.md new file mode 100644 index 0000000..6cbddb6 --- /dev/null +++ b/.initial_design.md @@ -0,0 +1,4728 @@ +# Initial Design Archive + +Status: historical design input and architecture rationale. + +This file is retained to preserve Webhookery's original product framing, +security posture, non-goals, and architecture tradeoffs. It is not the +canonical source for implemented behavior. Use current code, `openapi.yaml`, +`migrations/`, deployment profiles, and maintained docs for what the repository +actually implements. + +The prose below includes original prompt-review language and provider-specific +claims from planning work. Do not update provider behavior from memory. Before +changing provider-specific semantics or dated claims for Stripe, GitHub, +Shopify, Slack, CloudEvents, or OWASP SSRF guidance, check the current official +source and record the freshness context in the maintained documentation. + +Preserve this core framing when editing: + +- self-hosted, provider-aware, audit-grade webhook infrastructure +- durable capture before success +- explicit loss boundaries +- at-least-once delivery +- replay and reconciliation where providers permit it +- SSRF-safe outbound delivery +- audit evidence and reproducibility as product primitives + +--- + +## Original Planning Notes + +### Used libraries + +- https://github.com/aatuh/api-toolkit +- https://github.com/aatuh/randutil + + +Your prompt is **very strong**. It already forces the model away from “just POST JSON” and toward the real problem: durable capture, verification, replay, audit, fair scheduling, and self-hosted operational trust. + +I would **not run it as one giant prompt** if your goal is a usable implementation roadmap. It is so broad that many models will produce a huge but shallow “architecture encyclopedia.” I would use it as a **master specification prompt**, then split the work into focused passes: product scope, architecture, API model, database schema, security/threat model, and MVP backlog. + +## Verified constraints + +Your factual anchors check out, and they are exactly the right ones to ground the design. + +| Constraint | Why it matters | +|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **Stripe requires the raw request body for signature verification**, retries live webhook deliveries for up to **three days**, does **not guarantee ordering**, and warns that duplicate events can occur. | The platform must preserve exact raw bytes, deduplicate, avoid order-dependent processing, and never acknowledge before durable capture. ([Stripe Docs][1]) | +| **GitHub expects a 2xx response within 10 seconds** and does **not automatically redeliver failed deliveries**. | Fast durable ack is essential. For GitHub specifically, a failed ack is not enough; the product should include failed-delivery polling/redelivery tooling where credentials allow it. ([GitHub Docs][2]) | +| **Shopify retries failed webhook calls 8 times over 4 hours**, can remove failing Admin API webhook subscriptions, uses HMAC over the raw body, and recommends fast ack plus async processing. | Shopify support should treat subscription loss as a first-class incident condition, not merely a failed delivery. ([Shopify][3]) | +| **Slack signed requests use the raw body, timestamp, and HMAC SHA-256**, with a replay-window check. | The adapter layer needs provider-specific signature basestring rules, timestamp windows, and constant-time comparison. ([api.slack.com][4]) | +| **OpenAPI can describe webhooks** via the top-level `webhooks` field. | The product should generate customer-facing webhook docs and internal control-plane API docs from OpenAPI. ([OpenAPI Initiative Publications][5]) | +| **CloudEvents is a common event-envelope specification**, but not a complete webhook reliability model. | It is useful as a compatibility shape, not as a replacement for provider-specific raw evidence, delivery attempts, retries, and audit logs. ([cloudevents.io][6]) | +| **OWASP explicitly names custom webhooks/callback URLs as an SSRF risk.** | Customer-configured outbound URLs are hostile input until proven otherwise. URL validation, DNS/IP controls, redirect policy, and egress restrictions belong in MVP, not enterprise-only. ([OWASP Cheat Sheet Series][7]) | + +## The biggest correction I would make + +The line: + +> “Never lose a webhook” + +is commercially powerful, but technically dangerous. + +A more honest promise is: + +> **“Never acknowledge a webhook before durable capture; make every loss boundary explicit; make recovery and replay first-class.”** + +You cannot guarantee that a webhook is never lost before it reaches your ingress, during provider-side outage, during DNS/TLS failure, after retention expiry, or if the self-hosted operator misconfigures storage. What you *can* promise is stronger and more credible: + +```text +If the platform returns success, the raw request and verification evidence have been durably recorded. +If it cannot durably record the request, it returns failure and records the local failure if possible. +Every event has an inspectable lifecycle, replay path, and loss boundary. +``` + +That is the trust angle. + +## What I would add to your prompt + +Your prompt is already comprehensive, but I would add these requirements. + +### 1. Force the model to define acknowledgment semantics + +Add: + +```text +Define the exact conditions under which the platform may return 2xx to an inbound provider. + +At minimum, specify: +- Whether raw body, headers, request metadata, source identity, and verification result must be durably persisted before 2xx. +- Whether unverified events are stored, rejected, or quarantined. +- What response is returned if verification fails. +- What response is returned if durable storage is unavailable. +- What response is returned if queueing is unavailable but storage succeeded. +- How behavior differs for providers with automatic retry, manual redelivery, or no redelivery. +- Why 2xx must never mean “business processing succeeded.” +``` + +This is central. A webhook gateway lives or dies by the meaning of `202 Accepted`. + +### 2. Add a “loss boundary” section + +Add: + +```text +Include a loss-boundary analysis. + +For each failure point, state whether the event is: +- not received by the platform, +- received but not durably captured, +- durably captured but not normalized, +- normalized but not routed, +- routed but not delivered, +- delivered but not acknowledged by the customer endpoint, +- replayable, +- reconstructable, +- permanently lost. + +Be explicit about what the platform can prove and what it cannot prove. +``` + +This will prevent magical thinking. + +### 3. Require ADR-style decisions + +Add: + +```text +For major design choices, include ADR-style records: +- Decision. +- Alternatives considered. +- Why rejected. +- Operational consequences. +- Security consequences. +- Self-hosted consequences. +``` + +Example ADRs should cover PostgreSQL-first storage, object storage for raw bodies, exactly-once rejection, plugin sandboxing, SSRF strategy, and queue fairness. + +### 4. Add provider reconciliation + +Your prompt talks about retries and replay, but not enough about **reconciliation**. + +Add: + +```text +Design provider reconciliation jobs where provider APIs allow it. + +Examples: +- Stripe event reconciliation by event ID or time window. +- GitHub failed delivery listing/redelivery where credentials and webhook type permit it. +- Shopify app-level monitoring for failing subscriptions and subscription removal risk. + +Explain when reconciliation is possible, when it is impossible, and how the platform reports unrecoverable gaps. +``` + +This matters because “replay” only works for what you captured. Reconciliation is how you recover what you did not capture, when the provider supports it. + +### 5. Add evidence limitations + +Add: + +```text +Explain what audit evidence can prove and cannot prove. + +For example: +- It can prove the platform received specific bytes at a specific time according to its clock. +- It can prove a signature verified using a configured key version. +- It can prove which routing/transformation/delivery decisions were made. +- It cannot prove the provider actually intended the request if the provider secret was compromised. +- It cannot prove event non-existence at the provider. +- It cannot prove downstream business processing succeeded unless the downstream system provides evidence. +``` + +This makes the product more trustworthy, not less. + +### 6. Add competitive positioning + +There are already serious players in webhook infrastructure: Convoy describes itself as an open-source webhook gateway for ingesting, persisting, debugging, delivering, and managing events; Svix focuses heavily on webhook sending/deliverability; Hookdeck positions around reliable webhook infrastructure and replay/debugging. ([GitHub][8]) + +So your differentiation should not be “webhooks with retries.” It should be one of these: + +| Wedge | Why it is stronger | +|-------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------| +| **Self-hosted-first, evidence-grade webhook control plane** | Appeals to regulated, privacy-sensitive, integration-heavy teams. | +| **Inbound + outbound unified lifecycle** | Many tools are stronger on either receiving or sending; combining both with shared audit/replay is valuable. | +| **Provider-aware inbox** | Stripe/GitHub/Shopify/Slack behavior is encoded, not left as generic HTTP. | +| **Audit/reproducibility as product primitive** | This is more enterprise-defensible than simple retry/log UI. | +| **SSRF-hardened outbound delivery gateway** | Many teams underestimate this risk until security review. | + +### 7. Add an MVP constraint + +Your current prompt asks for almost everything. Add a hard constraint: + +```text +After designing the complete system, produce a brutally scoped MVP that can be built by a small senior backend team in 8-12 weeks. + +The MVP must include: +- Durable inbound capture. +- Raw body preservation. +- Stripe, GitHub, Shopify, Slack, and generic HMAC adapters. +- Signature verification. +- Deduplication. +- Basic routing. +- Outbound signed delivery. +- Retry with exponential backoff and jitter. +- Delivery attempts. +- Manual replay. +- Dead-letter queue. +- Event search. +- Basic audit log. +- SSRF-safe endpoint validation and delivery. +- PostgreSQL-first deployment. +- Docker Compose self-hosting. +- Minimal admin UI. + +The MVP must exclude: +- Arbitrary code plugins. +- Full workflow engine. +- Kafka requirement. +- Multi-region active-active. +- Complex transformation language. +- Marketplace. +- Exactly-once claims. +``` + +This turns the output into an implementation plan instead of a thesis. + +## The prompt should be split into passes + +I would use this sequence. + +### Pass 1: Strategy and scope + +```text +Using the master webhook infrastructure prompt, produce only: +1. Executive summary. +2. Product scope and non-goals. +3. Market positioning. +4. MVP wedge. +5. Critical risks. +6. Why exactly-once and “never lose” must be carefully worded. +7. Prioritized feature map. +``` + +### Pass 2: Architecture + +```text +Using the approved scope, design: +1. Inbound flow. +2. Outbound flow. +3. Relay flow. +4. Archive-only flow. +5. Storage architecture. +6. Queue/scheduler architecture. +7. Failure semantics. +8. Loss boundaries. +9. Self-hosted deployment topology. +10. Mermaid lifecycle diagrams. +``` + +### Pass 3: API and data model + +```text +Using the approved architecture, design: +1. REST resource model. +2. Endpoint groups. +3. Representative request/response examples. +4. Error model. +5. OpenAPI structure. +6. Database tables. +7. Indexes. +8. Retention policy. +9. Idempotency and dedupe constraints. +``` + +Do not ask for an example for every single endpoint in one pass. That often produces repetitive filler. Ask for representative examples plus a complete endpoint index. + +### Pass 4: Security and audit + +```text +Using the approved API and architecture, design: +1. Threat model. +2. SSRF-safe endpoint handling. +3. Provider signature verification. +4. Secret storage and rotation. +5. Tenant isolation. +6. Audit event model. +7. Tamper-evidence options. +8. Evidence export packages. +9. Security test plan. +10. Unsafe modes and how they are labeled. +``` + +### Pass 5: Build plan + +```text +Using all approved previous outputs, produce: +1. 8-12 week MVP backlog. +2. Engineering milestones. +3. Database migration sequence. +4. API implementation order. +5. Test strategy. +6. Acceptance criteria. +7. Operational runbooks. +8. Open questions before implementation. +``` + +## A tighter master prompt preamble + +I would prepend this to your prompt: + +```text +Important instruction: + +Do not produce a generic encyclopedia of webhook concepts. Produce a design that makes hard implementation decisions. + +For every major feature, state: +- Why it exists. +- What exact failure it handles. +- What data must be stored. +- What API/resource exposes it. +- What database/index support it. +- What audit evidence it creates. +- What security risk it introduces. +- Whether it belongs in MVP, v1, v2, enterprise, or future. + +Be explicit about loss boundaries. Do not claim “never lose” as an absolute guarantee. The correct reliability promise is: never return success before durable capture, make every state observable, and make replay/reconciliation possible where the provider permits it. + +Prefer concrete defaults over vague configurability. When there are alternatives, choose one for MVP and explain why. +``` + +## The strongest product framing + +I would frame the product as: + +> **A self-hosted webhook evidence and delivery control plane for teams that cannot afford invisible integration failures.** + +That is better than “webhook relay” because it captures the premium angle: evidence, trust, replay, auditability, and self-hosted control. + +The primitive is not “event bus.” The primitive is: + +```text +raw request -> verified evidence -> canonical event -> routing decision -> delivery job -> delivery attempts -> replayable lifecycle +``` + +## Critical implementation decisions I would force early + +| Decision | Recommended default | +|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Inbound ack | Return `202` only after raw request, headers, source ID, verification result, and event record are durably persisted. | +| Storage | PostgreSQL-first MVP; object storage optional for large raw payloads. | +| Queue | PostgreSQL-backed durable jobs first; Redis/NATS/Kafka optional later. | +| Raw payload | Store exact bytes or exact object-storage blob hash. Never rely only on parsed JSON. | +| Verification | Provider adapter verifies raw body and headers before routing. Failed verification goes to rejected/quarantine depending on policy. | +| Delivery semantics | At-least-once only. No exactly-once marketing. | +| Dedupe | Store duplicates visibly; suppress duplicate processing by policy. | +| Replay | Replay creates new delivery jobs linked to original event; it does not mutate original history. | +| Ordering | Default unordered; optional per-subject or per-endpoint ordering with documented head-of-line blocking. | +| SSRF | Block private, loopback, link-local, metadata, multicast, and reserved ranges by default; re-resolve before connect; validate redirects; enforce egress policy. | +| Plugins | Declarative adapters in MVP; arbitrary code plugins later, sandboxed and signed. | +| Transformations | Minimal deterministic mapping/redaction in MVP; avoid full scripting engine early. | +| UI | API-first; UI is an operator/customer console over the same resources. | + +## One important architectural nuance + +Your prompt says both: + +> Store before processing. + +and: + +> Verify before trusting. + +That is correct, but the implementation should be precise: + +```text +Capture and store untrusted raw evidence before business processing. +Verify before parsing, normalizing, routing, transforming, or delivering as trusted event data. +``` + +So the platform may store an unverified request as evidence, but it must not treat the payload as a trusted event until the adapter verifies it. + +## Commercial self-hosted angle + +The self-hosted opportunity is real, but the bar is high because webhook infrastructure is trust-critical and already has capable competitors. The best commercial packaging is probably: + +| Edition | What belongs there | +|------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| +| Community | Core inbound/outbound, PostgreSQL, basic adapters, retries, replay, logs, basic UI, Docker Compose. | +| Paid self-hosted | Advanced adapters, HA workers, advanced retention, SSO, RBAC, audit exports, Helm, support, upgrade tooling. | +| Enterprise | Air-gapped builds, compliance evidence packs, tamper-evident audit log, custom adapter certification, multi-region guidance, LTS, private support SLAs. | + +Avoid crippleware. Durable capture, signature verification, dedupe, retries, replay, DLQ, and SSRF protection must be free/core. Those are not upsell features; they are the product’s moral floor. + +## Bottom line + +Use your prompt as the **master spec**, but split execution into multiple passes. The concept is strongest when positioned not as “yet another webhook tool,” but as: + +> **Self-hosted, provider-aware, audit-grade webhook infrastructure for reliable receiving, delivery, replay, and evidence.** + +The most important changes are: + +1. Replace absolute “never lose” with durable-ack/loss-boundary language. +2. Add explicit inbound acknowledgment semantics. +3. Add provider reconciliation, especially for systems like GitHub where failed deliveries are not automatically retried. +4. Force ADRs and MVP scoping. +5. Treat SSRF, raw body preservation, dedupe, audit logs, and replay as non-negotiable MVP requirements. + +[1]: https://docs.stripe.com/webhooks?utm_source=chatgpt.com "Receive Stripe events in your webhook endpoint" +[2]: https://docs.github.com/en/webhooks/using-webhooks/handling-webhook-deliveries?utm_source=chatgpt.com "Handling webhook deliveries" +[3]: https://shopify.dev/docs/apps/build/webhooks/verify-deliveries?utm_source=chatgpt.com "Verify webhook deliveries" +[4]: https://api.slack.com/authentication/verifying-requests-from-slack?utm_source=chatgpt.com "Verifying requests from Slack | Slack Developer Docs" +[5]: https://spec.openapis.org/oas/v3.2.0.html "OpenAPI Specification v3.2.0" +[6]: https://cloudevents.io/ "CloudEvents |" +[7]: https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html "Server Side Request Forgery Prevention - OWASP Cheat Sheet Series" +[8]: https://github.com/frain-dev/convoy?utm_source=chatgpt.com "frain-dev/convoy: The Cloud Native Webhooks Gateway" + + +# Reference design: self-hosted webhook control plane + +I’ll call the product **Webhook Control Plane** in this design. It is a REST-first platform for receiving, verifying, storing, routing, delivering, replaying, auditing, and debugging webhooks. + +Real-world webhook behavior justifies this design: Stripe requires raw request bodies for signature verification, retries live webhook deliveries for up to three days, warns that duplicate events can occur, and does not guarantee event ordering. ([Stripe Docs][1]) GitHub recommends HMAC-SHA256 verification with `X-Hub-Signature-256`, warns not to modify payloads before verification, and does not automatically redeliver failed webhook deliveries. ([GitHub Docs][2]) Shopify expects quick `200 OK` responses, treats non-2xx and redirects as errors, has a one-second connection timeout and five-second total request timeout, and retries failed webhooks eight times over four hours. ([Shopify][3]) Slack signs requests with HMAC-SHA256 over a timestamped raw body, recommends rejecting old timestamps, and retries failed Events API deliveries three times by default. ([Slack Developer Docs][4]) Customer-supplied callback URLs are a classic SSRF risk, and OWASP explicitly calls out webhook/callback URLs as an SSRF-enabling pattern. ([OWASP Cheat Sheet Series][5]) + +--- + +# 1. Executive summary + +## What the platform is + +Webhook Control Plane is a **durable webhook inbox, relay, and outbound delivery gateway**. + +It supports two symmetric but distinct modes: + +1. **Inbound inbox and relay**: third-party providers send webhooks into the platform. The platform verifies, stores, normalizes, deduplicates, routes, retries, replays, quarantines, and exposes them through REST APIs. + +2. **Outbound delivery gateway**: your product emits internal events into the platform. The platform turns those events into customer-facing webhooks with signatures, retries, endpoint policies, delivery logs, replay, and tenant controls. + +The core object is not “a message in a queue.” The core object is **evidence of an event and every decision made about it**. + +## Problem solved + +Webhook infrastructure usually fails in invisible ways: + +* Provider says it sent the event. +* Your app says it never received it. +* Customer says your webhook failed. +* Logs expired. +* Signature verification broke because middleware parsed the body. +* A retry storm overwhelmed workers. +* A replay redelivered more than intended. +* A malicious endpoint URL targeted internal infrastructure. +* No one knows which route, version, adapter, or transformation produced the delivery. + +This product makes every webhook lifecycle visible, reproducible, and auditable. + +## Who buys or uses it + +Primary buyers and users: + +| Persona | Need | +|-----------------------------------|------------------------------------------------------------------------------------------| +| API product teams | Customer-facing webhook delivery with logs, retries, signatures, schemas, and replay. | +| Integration-heavy SaaS teams | Reliable intake from Stripe, GitHub, Shopify, Slack, partners, and internal systems. | +| Platform engineering | Shared webhook infrastructure instead of every team building its own fragile handler. | +| SRE / operations | Debuggable queues, retries, dead-lettering, replay, fairness, and SLOs. | +| Security / compliance | Signature verification, audit trails, tenant isolation, SSRF controls, secret rotation. | +| Support teams | Customer-visible event timelines and delivery evidence. | +| Self-hosted / regulated customers | Full control over data, retention, network boundaries, secrets, and deployment topology. | + +## What makes it self-hosted-first + +Self-hosted-first means: + +* A single-node deployment is useful. +* PostgreSQL-only mode is production-respectable for moderate workloads. +* Object storage, Redis, NATS, or Kafka are optional scaling layers, not mandatory SaaS dependencies. +* All core control-plane capabilities work without a hosted cloud service. +* Secrets stay local. +* Audit logs and raw payloads stay under customer control. +* Air-gapped installs are supported. +* The product has explicit backup, restore, migration, upgrade, and rollback procedures. +* Licensing does not cripple the essential reliability and security features. + +## Why webhooks are harder than “just POST JSON” + +Webhooks are hostile distributed systems disguised as HTTP callbacks: + +* Providers retry differently. +* Some providers do not automatically redeliver. +* Some providers require fast acknowledgements. +* Duplicates are normal. +* Ordering is often not guaranteed. +* Signature verification often depends on the exact raw body. +* Different providers encode event IDs, types, account IDs, and API versions differently. +* Customer endpoints fail, hang, redirect, return huge responses, rate-limit, or disappear. +* Customer-provided URLs create SSRF exposure. +* Operator replays can accidentally multiply side effects. +* Debugging requires historical evidence, not just current queue state. + +## Why promise durable evidence and replay instead of exactly-once delivery + +The product must **never promise exactly-once delivery**. Across provider retries, HTTP timeouts, network ambiguity, customer endpoint behavior, and replay operations, exactly-once delivery is not a truthful product claim. + +The honest promise is: + +> **At-least-once delivery with durable storage, idempotency, deduplication, replay, auditability, and explicit delivery semantics.** + +The platform can guarantee that once a webhook is durably accepted, it has a traceable lifecycle. It cannot guarantee that a remote customer endpoint did or did not commit side effects when a TCP connection failed after receiving the body. + +## How inbound and outbound relate + +Inbound and outbound share the same core machinery: + +| Capability | Inbound | Outbound | +|---------------------|-----------------------------------------|-------------------------------------| +| Store raw evidence | Provider request | Product-emitted event | +| Verify authenticity | Provider signature | Producer API key / internal signing | +| Normalize | Provider adapter envelope | Product event envelope | +| Route | Source → internal/customer destinations | Event type → subscriptions | +| Deliver | Relay to internal/customer endpoints | Customer webhook delivery | +| Retry | Downstream relay attempts | Customer endpoint attempts | +| Replay | From raw or normalized event | From stored event | +| Audit | Verification, dedupe, routing, replay | Signing, delivery, retries, replay | + +The same platform should expose both as one consistent REST control plane. + +--- + +# 2. Product scope and non-goals + +## In scope + +| Area | Included | +|-------------------|--------------------------------------------------------------------------| +| Inbound receiving | Provider-specific and generic webhook ingress. | +| Raw preservation | Raw bytes, canonicalized headers, request metadata, payload hashes. | +| Verification | HMAC, JWT, provider adapters, timestamp windows, multi-secret rotation. | +| Normalization | Versioned provider adapters and canonical event envelope. | +| Deduplication | Provider-specific and custom dedupe keys, visible duplicates. | +| Durable storage | Events, raw payload metadata, deliveries, attempts, audit logs. | +| Routing | Sources, routes, subscriptions, filters, fanout. | +| Transformation | Versioned deterministic transformations, redaction, field mapping. | +| Outbound delivery | Signed HTTP webhooks with retries, rate limits, logs, replay. | +| Replay | Single, bulk, dry-run, failed-only, endpoint-specific, config-versioned. | +| Dead-lettering | Terminal failures with release and replay controls. | +| Quarantine | Suspicious, unverifiable, malformed, or policy-blocked events. | +| Schema registry | Event types, JSON Schema, compatibility, examples, changelogs. | +| Security | RBAC, tenant isolation, secrets, SSRF defense, audit logs. | +| Observability | Metrics, logs, traces, dashboards, alerts, SLOs. | +| Self-hosting | Single binary, Docker Compose, Kubernetes, air-gapped, backups. | +| API-first UI | Admin UI backed entirely by public/private REST APIs. | +| SDK/CLI | Producer, consumer, signature verification, local testing, replay tools. | + +## Out of scope + +| Area | Reason | +|----------------------------------|-------------------------------------------------------------------| +| General workflow automation | Becomes Zapier/Make/n8n; distracts from reliability and evidence. | +| Arbitrary long-running workflows | Becomes Temporal; changes product boundaries and state model. | +| General pub/sub event bus | Becomes Kafka/NATS-as-a-service; not webhook-specific enough. | +| Business process orchestration | Too broad and dangerous for MVP. | +| Full ETL platform | Transformations should be bounded, deterministic, and auditable. | +| Unbounded scripting by users | Security, determinism, resource, and supply-chain risks. | +| Exactly-once delivery | Not truthful across HTTP, third-party retries, and replay. | +| Global event ordering | Expensive, throughput-hostile, and usually false for providers. | +| Silent best-effort delivery | Violates the core promise. | +| Hidden managed dependency | Conflicts with self-hosted-first positioning. | + +## Tempting but dangerous for MVP + +| Tempting feature | Why dangerous | +|----------------------------------------|-----------------------------------------------------------------------| +| Visual workflow builder | Bloats scope and invites arbitrary side effects. | +| Arbitrary JS/Python transformations | Security and reproducibility risk unless sandboxed very carefully. | +| Kafka-first architecture | Raises ops burden for self-hosted users. | +| Multi-region active-active | Hard to make correct before the core lifecycle is stable. | +| Complex ordering guarantees | Causes head-of-line blocking and misleading expectations. | +| Full marketplace of adapters | Supply-chain risk before plugin security is mature. | +| “Exactly-once webhooks” marketing | False promise; creates legal and trust risk. | +| Provider API reconciliation automation | Useful later, but each provider has unique semantics and permissions. | + +## Why not start as Kafka, Zapier, Temporal, or workflow automation + +* **Kafka** solves durable log streaming, not provider signature verification, raw body preservation, customer-facing logs, replay receipts, SSRF-safe endpoint delivery, or webhook UX. +* **Zapier-style automation** optimizes ease of connecting apps, not auditable delivery semantics. +* **Temporal** solves workflow state machines, not webhook inbox/delivery evidence. +* **A generic event bus** does not explain why a Stripe request failed verification, which Shopify delivery was deduped, or why a customer endpoint was blocked by SSRF policy. + +The narrow wedge is better: **trusted webhook infrastructure**. + +--- + +# 3. Core design principles + +| Principle | Concrete design rule | +|--------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------| +| Store before processing | Accept only after raw request metadata and payload are durably written. | +| Verify before trusting | Treat unverified events as untrusted; never route to side-effecting destinations unless policy explicitly allows unsafe mode. | +| Respond fast to providers | Verify minimally, persist durably, enqueue, return 2xx; complex work is async. | +| Preserve raw payloads | Store exact raw bytes or immutable object reference before parsing. | +| Normalize without destroying original data | Canonical envelope points to raw payload and provider-specific fields. | +| At-least-once delivery | Remote deliveries may happen multiple times; consumers must use idempotency. | +| Idempotency everywhere | Ingestion, delivery, replay, API actions, secret rotation, bulk jobs. | +| Deterministic deduplication | Dedupe key is explicit, versioned, auditable, and reproducible. | +| Replay as first-class | Replay is an API resource with dry-run, rate limits, receipts, and audit logs. | +| Versioned routing rules | Route decisions store `route_version_id` and match explanation. | +| Versioned provider adapters | Adapter version is stored on every normalized event. | +| Versioned transformations | Delivery payload records transformation version and input hash. | +| Explicit retry policies | No hidden default; every delivery references a policy version. | +| Explicit ordering semantics | Default unordered; optional scoped ordering with documented trade-offs. | +| Tenant isolation | Every object is tenant-scoped; workers enforce fair scheduling. | +| Safe customer URLs | SSRF checks at configuration and delivery time. | +| No hidden decisions | Verification, dedupe, route matching, transform, retry, DLQ, quarantine all emit evidence. | +| Traceable lifecycle | Every event has a timeline from receipt to final delivery states. | + +--- + +# 4. Webhook modes and flows + +## A. Inbound provider webhook flow + +```mermaid +sequenceDiagram + participant P as Provider + participant I as Ingress API + participant DB as Durable Store + participant A as Adapter + participant Q as Fair Scheduler + participant W as Delivery Worker + participant D as Destination + + P->>I: POST /ingest/{tenant}/{source} + I->>I: Capture raw body + headers + I->>A: Verify signature using raw bytes + I->>DB: Store raw payload, request metadata, verification evidence + I->>A: Normalize envelope + I->>DB: Store event + dedupe outcome + I-->>P: Fast 2xx after durable acceptance + DB->>Q: Enqueue route evaluation / deliveries + Q->>W: Fairly schedule by tenant/source/endpoint + W->>D: Deliver signed webhook / relay request + W->>DB: Store attempt, response, next retry or terminal state +``` + +Detailed steps: + +1. Receive HTTP request. +2. Enforce method, size, header, content-type, and tenant/source lookup limits. +3. Capture: + + * raw body bytes, + * raw header map, + * canonical header map, + * query string, + * remote IP / trusted proxy chain, + * TLS metadata if available, + * receive timestamp. +4. Run provider adapter verification: + + * signature header exists, + * timestamp within replay window if provider supports it, + * HMAC/JWT/signature check using exact raw bytes, + * constant-time comparison. +5. Store raw payload and verification evidence. +6. Normalize provider-specific request into canonical envelope. +7. Compute dedupe key. +8. Insert event record. +9. If duplicate: + + * store duplicate receipt, + * optionally suppress route/delivery, + * return configured acknowledgement. +10. Return fast 2xx after durable persistence. +11. Evaluate routes/subscriptions with versioned rules. +12. Create delivery jobs. +13. Deliver downstream with retries. +14. Dead-letter terminal failures. +15. Quarantine malformed, unsafe, suspicious, or unverifiable events if configured. +16. Expose timeline, logs, raw payload, normalized payload, dedupe result, route explanation, and replay API. + +## B. Outbound product event delivery flow + +```mermaid +sequenceDiagram + participant App as Product Backend + participant API as Event Ingestion API + participant DB as Durable Store + participant R as Route/Subscription Engine + participant W as Delivery Worker + participant C as Customer Endpoint + + App->>API: POST /v1/events + API->>API: Authenticate producer + validate schema + API->>DB: Store event + idempotency key + API-->>App: 202 Accepted / 201 Created + DB->>R: Find active subscriptions + R->>DB: Create delivery jobs + W->>W: Build payload + sign delivery + W->>C: POST signed event + C-->>W: HTTP response + W->>DB: Attempt log + retry/dead-letter state +``` + +Steps: + +1. Product emits event using API key, mTLS, or internal trusted producer identity. +2. Platform validates: + + * tenant, + * event type, + * schema version, + * idempotency key, + * payload size, + * producer permission. +3. Platform stores event. +4. Subscription engine finds matching endpoints. +5. Delivery job is created per endpoint/subscription. +6. Payload is transformed if configured. +7. Delivery is signed. +8. Worker sends HTTP request with timeout, no unsafe redirects, SSRF checks. +9. Response status/body headers are captured with truncation. +10. Retry, DLQ, endpoint disablement, and replay policies apply. + +## C. Relay mode + +Relay mode combines inbound and outbound: + +```text +External provider → inbound source → verified stored event → normalized envelope +→ route/subscription fanout → internal service or customer endpoint delivery +``` + +Relay destinations can receive: + +* original raw payload, +* provider-normalized envelope, +* transformed product-specific payload, +* CloudEvents-compatible payload, +* legacy compatibility payload. + +## D. Archive-only mode + +Archive-only mode: + +* verifies and stores webhooks, +* normalizes if possible, +* records dedupe outcomes, +* emits metrics and audit logs, +* creates no deliveries. + +Use cases: + +* compliance capture, +* migration shadowing, +* provider debugging, +* incident forensics, +* test environment validation. + +## E. Test/sandbox mode + +Test/sandbox supports: + +* synthetic provider events, +* provider signature test vectors, +* endpoint tester, +* dry-run route matching, +* transformation preview, +* replay into sandbox endpoints, +* secret verification tests, +* “would deliver to” explanations, +* local tunnel receiver. + +Test-mode events must be clearly marked: + +```json +{ + "test_mode": true, + "environment": "sandbox", + "synthetic": true +} +``` + +They must never silently mix with production events. + +--- + +# 5. Core resource model + +All primary resources have: + +```json +{ + "id": "resource_id", + "tenant_id": "ten_123", + "created_at": "2026-05-25T10:00:00Z", + "updated_at": "2026-05-25T10:00:00Z", + "created_by": "usr_123", + "state": "active", + "version": 3 +} +``` + +## Resource catalog + +| Resource | Purpose | Key fields | Relationships / lifecycle | Indexes | Retention | Security | +|-------------------------------|------------------------------------|--------------------------------------------------------------------------------|---------------------------------------------------------------|--------------------------------------------------------------------------------|-------------------------------------|----------------------------------------------------------| +| `tenants` | Isolation boundary | `id`, `name`, `plan`, `region`, `limits`, `settings` | owns all resources; `active/suspended/deleted` | `name`, `state` | soft-delete metadata | root isolation; never join without tenant predicate | +| `users` | Human actors | `id`, `tenant_id`, `email`, `name`, `mfa`, `state` | member of tenant/org; `invited/active/disabled` | `tenant_id,email` | retain audit actor refs | RBAC; PII minimization | +| `api_keys` | Machine auth | `id`, `prefix`, `hash`, `scopes`, `expires_at`, `last_used_at` | belongs tenant/user/service account; `active/revoked/expired` | `prefix`, `tenant_id,state` | keep revoked metadata | store hash only; show once | +| `sources` | Inbound webhook origin | `id`, `tenant_id`, `provider`, `adapter_id`, `mode`, `ingress_slug`, `state` | has secrets, adapter config, routes | `tenant_id,provider`, `ingress_slug` | keep disabled sources for evidence | ingress slug not secret unless explicitly secret-bearing | +| `provider_adapters` | Provider parsing/verification | `id`, `name`, `version`, `capabilities`, `schema` | referenced by sources/events | `name,version` | immutable versions | signed releases; approval workflow | +| `connector/adapters registry` | Discover adapters | `adapter_id`, `versions`, `status`, `docs`, `risk_level` | install/enable/disable | `name`, `status` | keep metadata | trust boundaries for plugins | +| `endpoints` | Outbound destination URL | `id`, `url`, `method`, `tls_policy`, `ssrf_policy_id`, `state`, `health_score` | used by subscriptions/deliveries | `tenant_id,state`, `host_hash` | preserve historical URL hash | SSRF validation; redact secrets in URL | +| `endpoint_secrets` | Signing secrets for deliveries | `id`, `endpoint_id`, `secret_hash/ref`, `algorithm`, `valid_from/to` | rotating; `active/grace/expired/revoked` | `endpoint_id,state` | metadata retained | KMS/envelope encryption; never log | +| `event_types` | Contract registry | `name`, `description`, `owner`, `version_policy` | has schemas/examples | `tenant_id,name` | retain deprecated | publish permissions | +| `event_schemas` | JSON Schema / CloudEvents schema | `id`, `event_type`, `version`, `schema`, `compatibility` | `draft/active/deprecated/retired` | `event_type,version` | immutable active versions | schema may expose PII examples; RBAC | +| `events` | Canonical event record | envelope fields, status, dedupe status | points to raw/normalized payload; has deliveries | `tenant_id,received_at`, `type`, `provider_event_id`, `dedupe_key`, `trace_id` | configurable; metadata often longer | tenant scoped; field-level redaction | +| `raw_payloads` | Exact request bytes or object ref | `sha256`, `size`, `content_type`, `storage_uri`, `encrypted` | one or more receipts/events | `sha256`, `event_id` | shortest if PII-heavy | encrypted; privileged access | +| `normalized_envelopes` | Canonical event JSON | `event_id`, `version`, `json`, `hash` | immutable per adapter version | `event_id`, `hash` | usually same as event | redaction-aware | +| `subscriptions` | Customer interest in events | `endpoint_id`, `event_types`, `filter_id`, `state` | creates deliveries | `tenant_id,state`, `endpoint_id` | retain history | customer can view own | +| `routes` | Inbound/relay routing rules | `source_id`, `priority`, `match`, `destination`, `version` | versioned; `draft/active/archived` | `source_id,priority` | immutable versions | change approval for prod | +| `filters` | Predicate fragments | `expression`, `language`, `version` | used by routes/subscriptions | `tenant_id,name,version` | immutable active | expression sandbox limits | +| `transformations` | Payload mapping/redaction | `language`, `spec`, `version`, `input_schema`, `output_schema` | used by routes/subscriptions | `tenant_id,name,version` | immutable active | sandbox; no network | +| `deliveries` | Delivery job per event/endpoint | `event_id`, `endpoint_id`, `state`, `next_attempt_at`, `attempt_count` | has attempts; terminal states | `state,next_attempt_at`, `tenant_id,endpoint_id`, `event_id` | longer than attempts | customer-visible subset | +| `delivery_attempts` | One HTTP try | request hash, response status, latency, failure class | belongs delivery | `delivery_id,attempt_no`, `tenant_id,created_at` | response bodies shorter | truncate/redact response | +| `retry_policies` | Retry behavior | `schedule`, `jitter`, `max_attempts`, `max_duration`, `rate_limit` | referenced by endpoints/routes | `tenant_id,name,version` | immutable versions | admin-controlled defaults | +| `replay_jobs` | Replay operation | `scope`, `mode`, `dry_run`, `state`, `rate_limit`, `actor` | creates replay deliveries | `tenant_id,state`, `created_at` | retain receipts | privileged; approvals | +| `dead_letter_entries` | Terminal failures | `delivery_id/event_id`, `reason`, `release_state` | can release/replay | `tenant_id,state,reason` | configurable | privileged release | +| `quarantine_entries` | Suspicious/unsafe events | reason, evidence, review decision | release, reject, delete | `tenant_id,state,reason` | depends compliance | security review role | +| `idempotency_keys` | API idempotency records | `key_hash`, `method`, `path`, `request_hash`, `response_ref` | active/expired/conflict | unique `tenant_id,key_hash,scope` | TTL + audit | hash key; no raw secrets | +| `deduplication_rules` | Event duplicate rules | `scope`, `expression`, `window`, `action`, `version` | source/adapter/event type | `tenant_id,source_id,type` | immutable versions | changes audited | +| `signing_keys` | Platform signing keys | `kid`, `algorithm`, `public_key`, `state` | signs deliveries/JWTs | `kid,state` | retain public keys | private in KMS/HSM | +| `verification_keys` | Inbound verify secrets/public keys | `source_id`, `kind`, `secret_ref`, `valid_from/to` | rotating | `source_id,state` | metadata retained | encrypted; dual-secret windows | +| `audit_events` | Durable audit evidence | actor, action, resource, before/after hash, IP | append-only | `tenant_id,created_at`, `actor`, `resource_id` | longest | tamper-evident option | +| `metrics` | Time-series rollups | counters, histograms, dimensions | from events/attempts/workers | `tenant_id,metric,time` | downsample | no raw payloads | +| `alerts` | Alert configs and firings | condition, threshold, channels, state | metrics-driven | `tenant_id,state` | firing history retained | alert channel secrets | +| `retention_policies` | Data lifecycle | resource, duration, deletion mode, legal hold | tenant/source/endpoint | `tenant_id,resource` | policy object | admin-only | +| `worker_nodes` | Runtime worker status | node_id, version, queues, heartbeat | ephemeral | `last_seen,state` | short | operator-only | +| `config_versions` | Immutable config snapshots | resource_id, version, hash, body | referenced by decisions | `resource_id,version` | long | audit-critical | + +--- + +# 6. REST API surface + +## API conventions + +Base paths: + +```text +/v1/control/... management APIs +/v1/events... product event ingestion and search +/v1/ingest... webhook ingestion +/v1/ops... health, metrics, workers +/.well-known/... OpenAPI, JWKS, webhook docs +``` + +Authentication: + +| Area | Auth | +|-----------------------------|-------------------------------------------------------------| +| Management APIs | Bearer API key or OIDC session with RBAC. | +| Product event ingestion | Producer API key, mTLS, or internal trusted identity. | +| Provider-specific ingress | Provider signature or configured source secret. | +| Generic ingress | HMAC/JWT/API key/header/secret URL depending source config. | +| Health liveness | unauthenticated. | +| Readiness, workers, metrics | operator auth unless explicitly public inside cluster. | + +Common headers: + +```http +Authorization: Bearer whcp_live_... +Idempotency-Key: ik_... +X-Request-Id: req_... +Content-Type: application/json +``` + +Pagination: + +```text +?limit=50&cursor=cur_abc&sort=-created_at +``` + +List response: + +```json +{ + "data": [], + "next_cursor": "cur_next", + "has_more": true +} +``` + +Errors use RFC 9457-style Problem Details with extensions. RFC 9457 defines a machine-readable problem detail format for HTTP APIs and obsoletes RFC 7807. ([RFC Editor][6]) + +--- + +## Endpoint contract catalog + +The examples below use compact bodies. Every operation should be represented in the OpenAPI document with full JSON Schemas, examples, auth schemes, error responses, and webhook callback documentation. OpenAPI 3.1 supports describing webhooks as provider-initiated operations. ([OpenAPI Documentation][7]) + +### Source management + +#### `POST /v1/sources` + +Create inbound source. + +```http +POST /v1/sources +Authorization: Bearer ... +Idempotency-Key: ik_create_source_1 +Content-Type: application/json + +{ + "name": "Stripe production", + "provider": "stripe", + "adapter": "stripe@2026-01-01", + "mode": "inbox_relay", + "environment": "production", + "ack_policy": "after_persist", + "verification": { + "type": "stripe_signature", + "secret": "whsec_..." + } +} +``` + +Response `201`: + +```json +{ + "id": "src_123", + "tenant_id": "ten_123", + "provider": "stripe", + "state": "active", + "ingress_url": "https://hooks.example.com/v1/ingest/ten_123/src_123", + "adapter_version": "stripe@2026-01-01" +} +``` + +Status codes: `201`, `400`, `401`, `403`, `409`, `422`. +Errors: invalid adapter, invalid verification config, duplicate name. +Auth: tenant admin or integration admin. +Idempotency: required for safe retries. + +#### `GET /v1/sources` + +List sources. + +Request: + +```http +GET /v1/sources?provider=stripe&state=active&limit=50 +Authorization: Bearer ... +``` + +Response `200`: + +```json +{ + "data": [ + { + "id": "src_123", + "name": "Stripe production", + "provider": "stripe", + "state": "active" + } + ], + "next_cursor": null, + "has_more": false +} +``` + +Auth: source read. +Pagination: cursor. +Errors: `401`, `403`. + +#### `GET /v1/sources/{source_id}` + +Response `200`: + +```json +{ + "id": "src_123", + "name": "Stripe production", + "provider": "stripe", + "adapter": "stripe@2026-01-01", + "state": "active", + "ingress_url": "https://hooks.example.com/v1/ingest/ten_123/src_123", + "verification_summary": { + "type": "stripe_signature", + "active_secret_count": 1 + } +} +``` + +Errors: `404`, `403`. + +#### `PATCH /v1/sources/{source_id}` + +Request: + +```json +{ + "name": "Stripe prod", + "state": "paused", + "ack_policy": "after_persist" +} +``` + +Response `200`: updated source. +Errors: invalid transition, active route dependency. +Auth: source admin. +Idempotency: recommended. + +#### `DELETE /v1/sources/{source_id}` + +Soft-delete or archive. + +Request: + +```json +{ + "mode": "archive", + "reason": "migration complete" +} +``` + +Response `202`: + +```json +{ + "id": "src_123", + "state": "archiving" +} +``` + +Errors: active deliveries exist unless force policy. +Auth: tenant admin. + +#### `POST /v1/sources/{source_id}/secrets:rotate` + +Request: + +```json +{ + "new_secret": "whsec_new...", + "grace_period_seconds": 86400, + "activate_at": "2026-05-25T12:00:00Z" +} +``` + +Response `200`: + +```json +{ + "source_id": "src_123", + "active_secret_count": 2, + "old_secret_expires_at": "2026-05-26T12:00:00Z" +} +``` + +Errors: unsupported adapter, weak secret, invalid overlap. +Auth: secrets admin. +Audit: mandatory. + +#### `POST /v1/sources/{source_id}/signature-tests` + +Request: + +```json +{ + "headers": { + "Stripe-Signature": "t=1710000000,v1=..." + }, + "raw_body_base64": "eyJpZCI6ImV2dF8xMjMifQ==" +} +``` + +Response `200`: + +```json +{ + "verified": true, + "matched_key_id": "vkey_123", + "timestamp_age_seconds": 12, + "adapter_version": "stripe@2026-01-01" +} +``` + +Errors: invalid signature, expired timestamp, missing raw body. +Auth: source admin. + +--- + +### Endpoint management + +#### `POST /v1/endpoints` + +Create customer/internal destination. + +```json +{ + "name": "Customer webhook", + "url": "https://customer.example.com/webhooks/acme", + "method": "POST", + "payload_format": "canonical_json", + "retry_policy_id": "rpol_default", + "ssrf_policy_id": "ssrf_default", + "tls_policy": { + "min_version": "TLS1.2", + "verify_certificate": true + } +} +``` + +Response `201`: + +```json +{ + "id": "end_123", + "state": "active", + "url_host": "customer.example.com", + "health_score": 100 +} +``` + +Errors: invalid URL, blocked private IP, duplicate endpoint. +Auth: endpoint admin. +Idempotency: required. + +#### `GET /v1/endpoints` + +Filters: `state`, `host`, `health_min`, `created_after`. +Response: paginated endpoints. + +#### `GET /v1/endpoints/{endpoint_id}` + +Response includes URL, policy refs, health, active subscriptions, last delivery summary. + +#### `PATCH /v1/endpoints/{endpoint_id}` + +Request: + +```json +{ + "url": "https://customer.example.com/webhooks/v2", + "state": "active", + "timeout_ms": 10000 +} +``` + +Response: updated endpoint. +Errors: SSRF blocked, invalid transition. + +#### `DELETE /v1/endpoints/{endpoint_id}` + +Soft-delete endpoint; active subscriptions become disabled or require `cascade=true`. + +#### `POST /v1/endpoints/{endpoint_id}/secrets:rotate` + +Request: + +```json +{ + "algorithm": "hmac_sha256", + "grace_period_seconds": 86400 +} +``` + +Response: + +```json +{ + "endpoint_id": "end_123", + "new_secret_once": "whsec_out_...", + "active_secret_count": 2 +} +``` + +Secret is returned once only. + +#### `POST /v1/endpoints/{endpoint_id}:test` + +Request: + +```json +{ + "event_type": "com.example.test", + "payload": { + "message": "hello" + }, + "dry_run": false +} +``` + +Response: + +```json +{ + "delivery_id": "del_test_123", + "attempt_id": "att_123", + "status": "succeeded", + "http_status": 200, + "latency_ms": 84 +} +``` + +Errors: endpoint disabled, URL blocked, timeout. + +#### `POST /v1/endpoints:validate-url` + +Request: + +```json +{ + "url": "https://customer.example.com/webhook", + "ssrf_policy_id": "ssrf_default" +} +``` + +Response: + +```json +{ + "allowed": true, + "resolved_ips": ["203.0.113.10"], + "blocked_reasons": [] +} +``` + +Errors: invalid URL. +Auth: endpoint admin. + +--- + +### Subscription management + +#### `POST /v1/subscriptions` + +```json +{ + "endpoint_id": "end_123", + "event_types": ["invoice.paid", "customer.created"], + "filter_id": "flt_123", + "payload_format": "canonical_json", + "state": "active" +} +``` + +Response `201`: + +```json +{ + "id": "sub_123", + "endpoint_id": "end_123", + "state": "active", + "event_types": ["invoice.paid", "customer.created"] +} +``` + +Errors: unknown event type, incompatible schema version, endpoint disabled. +Idempotency: required. + +#### `GET /v1/subscriptions` + +Filters: endpoint, event type, state. + +#### `GET /v1/subscriptions/{subscription_id}` + +Response: subscription, filter summary, delivery stats. + +#### `PATCH /v1/subscriptions/{subscription_id}` + +Update event types, filter, state, payload format. Creates new config version. + +#### `DELETE /v1/subscriptions/{subscription_id}` + +Soft-delete with audit. + +--- + +### Route management + +#### `POST /v1/routes` + +```json +{ + "source_id": "src_123", + "name": "Stripe payments to billing", + "priority": 100, + "match": { + "event_types": ["payment_intent.succeeded"], + "expression": "$.data.object.amount > 0" + }, + "destination": { + "type": "endpoint", + "endpoint_id": "end_123" + }, + "transformation_id": "trn_123", + "state": "draft" +} +``` + +Response `201`: + +```json +{ + "id": "rte_123", + "version": 1, + "state": "draft" +} +``` + +#### `POST /v1/routes/{route_id}:activate` + +```json +{ + "expected_version": 1, + "change_reason": "release billing webhook" +} +``` + +Response: + +```json +{ + "id": "rte_123", + "state": "active", + "version": 1 +} +``` + +Errors: version conflict, unsafe transformation, missing approval. + +#### `POST /v1/routes/{route_id}:dry-run` + +```json +{ + "event_id": "evt_123" +} +``` + +Response: + +```json +{ + "matched": true, + "explanation": [ + { + "rule": "event_types", + "result": true + }, + { + "rule": "$.data.object.amount > 0", + "result": true + } + ], + "would_create_deliveries": [ + { + "endpoint_id": "end_123" + } + ] +} +``` + +#### Other route endpoints + +```text +GET /v1/routes +GET /v1/routes/{route_id} +PATCH /v1/routes/{route_id} +DELETE /v1/routes/{route_id} +GET /v1/routes/{route_id}/versions +GET /v1/routes/{route_id}/versions/{version} +``` + +All route changes create config versions and audit events. + +--- + +### Filters and transformations + +```text +POST /v1/filters +GET /v1/filters +GET /v1/filters/{filter_id} +PATCH /v1/filters/{filter_id} +POST /v1/filters/{filter_id}:test + +POST /v1/transformations +GET /v1/transformations +GET /v1/transformations/{transformation_id} +PATCH /v1/transformations/{transformation_id} +POST /v1/transformations/{transformation_id}:preview +POST /v1/transformations/{transformation_id}:activate +``` + +Transformation preview request: + +```json +{ + "event_id": "evt_123", + "input": null +} +``` + +Response: + +```json +{ + "output": { + "id": "evt_123", + "type": "payment_intent.succeeded" + }, + "input_hash": "sha256:...", + "output_hash": "sha256:...", + "deterministic": true +} +``` + +Errors: execution timeout, forbidden function, schema mismatch. + +--- + +### Event ingestion + +#### `POST /v1/events` + +Product event ingestion. + +```json +{ + "id": "evt_product_123", + "type": "invoice.paid", + "source": "com.example.billing", + "subject": "invoice/inv_123", + "occurred_at": "2026-05-25T09:59:30Z", + "schema_version": "2026-05-01", + "data": { + "invoice_id": "inv_123", + "amount": 4200, + "currency": "EUR" + } +} +``` + +Response `202`: + +```json +{ + "id": "evt_123", + "state": "accepted", + "deduplication_key": "product:com.example.billing:evt_product_123", + "trace_id": "trc_123" +} +``` + +Status: `201` if new, `200` if idempotent duplicate, `202` if accepted async. +Errors: schema validation failed, duplicate conflict, unauthorized producer. + +#### `POST /v1/events:batch` + +Request: + +```json +{ + "events": [ + { + "id": "evt_product_123", + "type": "invoice.paid", + "source": "com.example.billing", + "data": {} + } + ] +} +``` + +Response: + +```json +{ + "accepted": 1, + "rejected": 0, + "results": [ + { + "index": 0, + "event_id": "evt_123", + "status": "accepted" + } + ] +} +``` + +Partial failure allowed only if `atomic=false`. + +--- + +### Provider-specific ingestion + +```text +POST /v1/ingest/{tenant_id}/{source_id} +POST /v1/ingest/{tenant_slug}/{source_slug} +POST /v1/ingest/stripe/{source_id} +POST /v1/ingest/github/{source_id} +POST /v1/ingest/shopify/{source_id} +POST /v1/ingest/slack/{source_id} +POST /v1/ingest/generic/{source_id} +POST /v1/ingest/cloudevents/{source_id} +``` + +Example Stripe ingress: + +```http +POST /v1/ingest/stripe/src_123 +Stripe-Signature: t=1710000000,v1=... +Content-Type: application/json + +{"id":"evt_123","type":"payment_intent.succeeded","data":{"object":{}}} +``` + +Response after durable acceptance: + +```json +{ + "received": true, + "event_id": "evt_123", + "duplicate": false, + "trace_id": "trc_123" +} +``` + +Status: + +| Status | Meaning | +|--------|---------------------------------------------------------------------------------------| +| `200` | accepted and provider expects `200`. | +| `202` | accepted async if provider-compatible. | +| `204` | accepted with empty body. | +| `400` | malformed request. | +| `401` | invalid signature. | +| `413` | payload too large. | +| `415` | unsupported content type. | +| `429` | rate limited before durable acceptance. Dangerous; use only when necessary. | +| `503` | storage unavailable before persistence. Provider should retry if it supports retries. | + +--- + +### Event search and retrieval + +```text +GET /v1/events +GET /v1/events/{event_id} +GET /v1/events/{event_id}/timeline +GET /v1/events/{event_id}/raw +GET /v1/events/{event_id}/raw:download +GET /v1/events/{event_id}/normalized +GET /v1/events/{event_id}/deliveries +GET /v1/events/{event_id}/audit +``` + +Search example: + +```http +GET /v1/events?type=payment_intent.succeeded&source_id=src_123&received_after=2026-05-25T00:00:00Z&limit=25 +Authorization: Bearer ... +``` + +Response: + +```json +{ + "data": [ + { + "id": "evt_123", + "type": "payment_intent.succeeded", + "source_id": "src_123", + "received_at": "2026-05-25T10:00:00Z", + "signature_verified": true, + "dedupe_status": "unique", + "delivery_summary": { + "succeeded": 2, + "failed": 0, + "pending": 0 + } + } + ], + "next_cursor": null +} +``` + +Raw retrieval response: + +```json +{ + "event_id": "evt_123", + "raw_payload_hash": "sha256:...", + "content_type": "application/json", + "size_bytes": 2048, + "body_base64": "eyJpZCI6..." +} +``` + +Security: raw access requires elevated permission and is audit-logged. + +--- + +### Delivery logs and attempts + +```text +GET /v1/deliveries +GET /v1/deliveries/{delivery_id} +GET /v1/deliveries/{delivery_id}/attempts +GET /v1/delivery-attempts/{attempt_id} +POST /v1/deliveries/{delivery_id}:retry +POST /v1/deliveries/{delivery_id}:cancel +``` + +Manual retry request: + +```json +{ + "reason": "customer fixed endpoint", + "force": false +} +``` + +Response: + +```json +{ + "delivery_id": "del_123", + "state": "scheduled", + "next_attempt_at": "2026-05-25T10:01:00Z" +} +``` + +Attempt detail response: + +```json +{ + "id": "att_123", + "delivery_id": "del_123", + "attempt_no": 3, + "started_at": "2026-05-25T10:00:00Z", + "duration_ms": 312, + "request": { + "method": "POST", + "url_redacted": "https://customer.example.com/webhooks", + "headers_redacted": { + "Webhook-Event-Id": "evt_123" + }, + "body_hash": "sha256:..." + }, + "response": { + "status": 500, + "headers_redacted": { + "content-type": "application/json" + }, + "body_truncated": "{\"error\":\"temporary\"}", + "body_hash": "sha256:..." + }, + "classification": "temporary_failure" +} +``` + +--- + +### Replay + +```text +POST /v1/replay-jobs:dry-run +POST /v1/replay-jobs +GET /v1/replay-jobs +GET /v1/replay-jobs/{replay_job_id} +POST /v1/replay-jobs/{replay_job_id}:pause +POST /v1/replay-jobs/{replay_job_id}:resume +POST /v1/replay-jobs/{replay_job_id}:cancel +GET /v1/replay-jobs/{replay_job_id}/receipts +``` + +Dry-run request: + +```json +{ + "scope": { + "event_ids": ["evt_123"], + "endpoint_ids": ["end_123"] + }, + "mode": "current_config", + "only_failed": false, + "dedupe_behavior": "create_replay_attempts_only" +} +``` + +Response: + +```json +{ + "would_replay_events": 1, + "would_create_deliveries": 1, + "warnings": [], + "sample": [ + { + "event_id": "evt_123", + "endpoint_id": "end_123", + "route_version": 4, + "transformation_version": 2 + } + ] +} +``` + +Replay creation response: + +```json +{ + "id": "rpl_123", + "state": "scheduled", + "scope_hash": "sha256:...", + "rate_limit_per_minute": 100, + "created_by": "usr_123" +} +``` + +Errors: replay too broad, missing permission, endpoint disabled, rate limit, unsafe config drift. + +--- + +### Dead-letter and quarantine + +```text +GET /v1/dead-letter +GET /v1/dead-letter/{entry_id} +POST /v1/dead-letter/{entry_id}:release +POST /v1/dead-letter:bulk-release + +GET /v1/quarantine +GET /v1/quarantine/{entry_id} +POST /v1/quarantine/{entry_id}:approve +POST /v1/quarantine/{entry_id}:reject +POST /v1/quarantine/{entry_id}:delete +``` + +Dead-letter release request: + +```json +{ + "reason": "endpoint fixed", + "target": "same_endpoint", + "rate_limit_per_minute": 10 +} +``` + +Response: + +```json +{ + "entry_id": "dlq_123", + "state": "released", + "replay_job_id": "rpl_456" +} +``` + +Quarantine approve request: + +```json +{ + "reason": "verified manually", + "route_after_release": true +} +``` + +--- + +### Schema and event type registry + +```text +POST /v1/event-types +GET /v1/event-types +GET /v1/event-types/{event_type} +PATCH /v1/event-types/{event_type} + +POST /v1/event-types/{event_type}/schemas +GET /v1/event-types/{event_type}/schemas +GET /v1/event-types/{event_type}/schemas/{schema_version} +POST /v1/event-types/{event_type}/schemas/{schema_version}:validate +POST /v1/event-types/{event_type}/schemas/{schema_version}:check-compatibility +POST /v1/event-types/{event_type}/schemas/{schema_version}:deprecate +GET /v1/event-types/{event_type}/examples +POST /v1/event-types/{event_type}/examples +``` + +Schema validation request: + +```json +{ + "data": { + "invoice_id": "inv_123", + "amount": 4200 + } +} +``` + +Response: + +```json +{ + "valid": true, + "errors": [] +} +``` + +Compatibility response: + +```json +{ + "compatible": false, + "level": "breaking", + "changes": [ + { + "path": "$.required", + "message": "required field currency added" + } + ] +} +``` + +--- + +### Adapter discovery and configuration + +```text +GET /v1/adapters +GET /v1/adapters/{adapter_name} +GET /v1/adapters/{adapter_name}/versions +GET /v1/adapters/{adapter_name}/versions/{version} +POST /v1/adapters/custom +POST /v1/adapters/custom/{adapter_id}:test +POST /v1/adapters/custom/{adapter_id}:submit-for-review +POST /v1/adapters/custom/{adapter_id}:approve +``` + +Adapter response: + +```json +{ + "name": "stripe", + "versions": [ + { + "version": "2026-01-01", + "capabilities": ["verify", "normalize", "dedupe", "test_events"] + } + ] +} +``` + +--- + +### Secret rotation and keys + +```text +GET /v1/signing-keys +POST /v1/signing-keys +POST /v1/signing-keys/{kid}:rotate +POST /v1/signing-keys/{kid}:revoke +GET /.well-known/jwks.json + +GET /v1/verification-keys +POST /v1/verification-keys +POST /v1/verification-keys/{key_id}:rotate +POST /v1/verification-keys/{key_id}:revoke +``` + +Signing key creation: + +```json +{ + "algorithm": "ed25519", + "use": "delivery_signing" +} +``` + +Response: + +```json +{ + "kid": "key_2026_05", + "algorithm": "ed25519", + "state": "active", + "public_key_jwk": {} +} +``` + +--- + +### Health, readiness, workers, metrics + +```text +GET /healthz +GET /readyz +GET /livez +GET /v1/ops/workers +GET /v1/ops/workers/{worker_id} +GET /v1/ops/queues +GET /v1/ops/metrics +GET /v1/ops/metrics/prometheus +GET /v1/ops/storage +GET /v1/ops/config +``` + +Readiness response: + +```json +{ + "ready": true, + "checks": { + "postgres": "ok", + "object_storage": "ok", + "queue": "ok", + "migrations": "ok" + } +} +``` + +Metrics response can be JSON or Prometheus exposition. + +--- + +### Audit and admin + +```text +GET /v1/audit-events +GET /v1/audit-events/{audit_event_id} +POST /v1/audit-events:export + +GET /v1/admin/config +PATCH /v1/admin/config +GET /v1/admin/retention-policies +POST /v1/admin/retention-policies +PATCH /v1/admin/retention-policies/{policy_id} + +GET /openapi.json +GET /openapi.yaml +``` + +Audit export request: + +```json +{ + "from": "2026-05-01T00:00:00Z", + "to": "2026-05-25T00:00:00Z", + "format": "jsonl", + "include_hash_chain": true +} +``` + +Response: + +```json +{ + "export_id": "exp_123", + "state": "ready", + "download_url": "/v1/audit-exports/exp_123:download", + "sha256": "sha256:..." +} +``` + +--- + +# 7. Ingestion API design + +## Generic webhook ingestion endpoint + +```text +POST /v1/ingest/{tenant_id}/{source_id} +POST /v1/ingest/{tenant_slug}/{source_slug} +``` + +Supports: + +* HMAC header verification, +* JWT-signed provider, +* API key header, +* mTLS, +* unsigned unsafe mode, +* CloudEvents structured/binary mode, +* custom adapter. + +## Provider-specific ingestion endpoints + +Provider-specific paths improve setup and docs: + +```text +POST /v1/ingest/stripe/{source_id} +POST /v1/ingest/github/{source_id} +POST /v1/ingest/shopify/{source_id} +POST /v1/ingest/slack/{source_id} +``` + +Provider-specific paths still map to a configured `source_id`. They are not magic global endpoints. + +## Multi-tenant ingress URLs + +Supported models: + +| Model | Example | Pros | Cons | +|------------------------|-------------------------------------|---------------------|-----------------------------------------| +| Path tenant/source IDs | `/v1/ingest/ten_123/src_123` | Explicit, simple | IDs leak existence. | +| Slugs | `/hooks/acme/stripe-prod` | Human-friendly | Collision management. | +| Random ingress token | `/hooks/in_6Yp...` | Hard to guess | Token in URL must be treated as secret. | +| Custom domain | `https://hooks.customer.com/stripe` | Enterprise-friendly | TLS/domain ops. | + +## Secret-bearing URLs vs header auth + +Secret-bearing URLs are allowed only as a compatibility mode. + +Rules: + +* Mark as `credential_location=url`. +* Redact token in logs, UI, metrics, and audit details. +* Never put full ingress URL in error messages. +* Store only hash of URL token. +* Prefer provider signatures or header-based auth. + +## Raw body preservation + +The ingress server must: + +* read bytes once, +* compute streaming hash, +* store exact bytes or immutable object reference, +* verify against exact bytes, +* parse only after raw persistence, +* record decompression behavior. + +Do not let framework JSON middleware consume or mutate the body before verification. This is mandatory for providers such as Stripe, GitHub, Slack, and Shopify-style HMAC verification because signatures are computed over the actual payload bytes or unmodified payload. ([Stripe Docs][1]) + +## Header canonicalization + +Store two forms: + +```json +{ + "headers_raw": [ + ["Stripe-Signature", "t=...,v1=..."], + ["Content-Type", "application/json"] + ], + "headers_canonical": { + "stripe-signature": ["t=...,v1=..."], + "content-type": ["application/json"] + } +} +``` + +Rules: + +* Preserve duplicate headers. +* Canonical map lowercases names. +* Do not trim values before signature verification unless provider spec requires it. +* Redact configured sensitive headers. + +## Body size limits + +Defaults: + +| Limit | Default | +|-------------------|-----------------------------------------:| +| Raw body max | 2 MiB community, configurable to 25 MiB+ | +| Header total max | 64 KiB | +| Single header max | 16 KiB | +| Multipart max | disabled by default | +| Compression ratio | max 20:1 | +| Parse depth | JSON depth 64 | +| Field count | configurable | + +Oversized payload: + +* before persistence: `413 Payload Too Large`; +* after partial streaming: abort, record rejected receipt if feasible; +* never queue partial body as valid event. + +## Content-type handling + +Supported: + +* `application/json`, +* `application/cloudevents+json`, +* `application/x-www-form-urlencoded` for Slack-style interactions, +* `text/plain` for generic raw, +* provider-specific content types. + +Unsupported types return `415`. + +## Compression + +Inbound compression: + +* Accept `gzip` only if source allows. +* Store both: + + * compressed raw bytes hash, + * decompressed logical body hash. +* Verification uses provider-defined bytes. Usually this is the body as delivered to the app after HTTP decompression ambiguity is resolved by ingress configuration. To avoid ambiguity, provider-specific adapters should reject compressed requests unless documented. + +## Multipart/form payloads + +Disabled by default. If enabled: + +* store full raw multipart body, +* enforce part count and part size, +* never parse file uploads into memory, +* signatures must verify raw multipart bytes, +* transformations cannot read binary parts unless explicitly allowed. + +## Fast acknowledgement behavior + +Ack modes: + +| Mode | Meaning | +|----------------------------|-----------------------------------------------------------------------| +| `after_persist` | Default. Return 2xx after raw event and event metadata are committed. | +| `after_verify_and_persist` | Return 2xx only if signature verified and stored. | +| `after_enqueue` | Return 2xx after delivery jobs are created. Riskier for slow routes. | +| `sync_downstream` | Wait for relay delivery. Only for special internal sources. | + +Default should be `after_verify_and_persist`. + +## Backpressure + +Backpressure levels: + +| Condition | Behavior | +|---------------------------------------|---------------------------------------------------------------------| +| Worker queue deep but storage healthy | Accept and persist; delay delivery. | +| Tenant quota exceeded | `429` before accepting, or accept into quarantine depending policy. | +| Global ingestion saturation | shed unsafe/test traffic first, then low-priority tenants. | +| PostgreSQL unavailable | return `503`; do not acknowledge as accepted. | +| Object storage unavailable | if raw payload storage required and no DB fallback, return `503`. | +| Queue unavailable | persist event and create DB-backed pending job; do not lose event. | + +## Preventing event loss + +The minimal safe transaction: + +```text +BEGIN + insert raw_payload metadata or object ref + insert provider_receipt + insert event or duplicate_receipt + insert outbox row for async processing +COMMIT +return 2xx +``` + +Workers read from the durable outbox. If Redis/NATS/Kafka is unavailable, the DB outbox remains the source of truth. + +--- + +# 8. Provider adapter system + +## Built-in adapter matrix + +| Adapter | Signature verification | Required headers | Raw body requirement | Timestamp window | Event ID extraction | Type extraction | Account/source extraction | API version | Dedupe strategy | Normalization | Test support | Common failures | Warning | +|---------------------------|-----------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------------------------|----------------------------------------------------------------|--------------------------------------------------------|-----------------------|-------------------------------------------|-----------------------------------------------------|--------------------------------------------------------------------------------------|--------------------------------------|----------------------------|----------------------------------------------------------------------------|------------------------------------------------------------| +| Stripe | HMAC-SHA256 over `timestamp.raw_body`, header `Stripe-Signature`, `v1` only | `Stripe-Signature` | exact raw JSON body | default 5 min; configurable | `id` | `type` | account/context if present; source config | event API version from payload/config | `stripe:{account}:{event.id}`; secondary semantic key `type:data.object.id` optional | Stripe event → canonical envelope | Stripe CLI/test events | wrong secret, parsed body, old timestamp, duplicate event, unordered event | Never rely on ordering; handle duplicates. | +| GitHub | HMAC-SHA256 using webhook secret | `X-Hub-Signature-256`, `X-GitHub-Event`, `X-GitHub-Delivery` | unmodified payload | no provider timestamp by default; optional receive-window only | `X-GitHub-Delivery` | `X-GitHub-Event` | repository/org/app fields | GitHub Enterprise/version from headers if available | `github:{hook_scope}:{delivery_guid}` | event + delivery metadata | ping event | no secret configured, legacy SHA-1, timeout, no auto redelivery | Manual redelivery window may be limited; persist yourself. | +| Shopify | HMAC-SHA256 base64 over raw body using app secret | `X-Shopify-Hmac-Sha256`, `X-Shopify-Topic`, `X-Shopify-Shop-Domain`, `X-Shopify-Webhook-Id`, `X-Shopify-Event-Id` | exact raw body | no signed timestamp; optional receive-window | `X-Shopify-Webhook-Id`; correlate `X-Shopify-Event-Id` | `X-Shopify-Topic` | shop domain | API version header if present | delivery dedupe by webhook id; action correlation by event id | topic/shop/admin metadata → envelope | test webhook | timeout, app secret mismatch, multiple subscriptions for same topic | Must ack fast; provider may delete failing subscription. | +| Slack | HMAC-SHA256 over `v0:timestamp:raw_body` | `X-Slack-Signature`, `X-Slack-Request-Timestamp` | raw body before JSON/form parse | 5 min default | `event_id` for Events API, fallback body hash | `type` / `event.type` | `team_id`, `api_app_id`, enterprise id | none | `slack:{team}:{event_id}` | Slack event wrapper → envelope | URL verification challenge | timestamp skew, form body parsed, challenge handling | Events API is best-effort; ack within expected window. | +| Generic HMAC | Configurable HMAC | configured signature/timestamp headers | exact configured body | configurable | JSONPath/header | JSONPath/header | JSONPath/header | JSONPath/header | configured expression | configurable mapping | test vectors required | canonicalization ambiguity | Unsafe if canonicalization unclear. | +| Generic JWT-signed | JWS/JWT validation | `Authorization` or configured header | body may be detached payload or claim hash | `iat/exp/nbf` | claim/JSONPath | claim/JSONPath | claim/JSONPath | claim | `iss:jti` or configured | claims + body | test vector | alg confusion, missing kid, expired token | Reject `alg=none`. | +| Generic unsigned | none | optional static token | raw stored | none | hash or JSONPath | JSONPath/header | source config | optional | `source:body_hash` by default | best effort | yes | spoofing, replay | Must be clearly labeled unsafe. | +| Generic CloudEvents | Optional HMAC/JWT/mTLS; parse CloudEvents | `ce-id`, `ce-source`, `ce-type` in binary mode or structured body | preserve raw | optional | CloudEvents `id` | CloudEvents `type` | CloudEvents `source` | `specversion` | `source:id` | canonical almost direct | SDK fixtures | invalid specversion, missing fields | Do not force every provider into CloudEvents. | +| Internal trusted producer | API key/OIDC/mTLS; optional body signature | `Authorization`, optional `Idempotency-Key` | raw stored if requested | token expiry | supplied `id` | supplied `type` | producer identity | schema version | `tenant:source:id` | product event → envelope | SDK fixtures | schema mismatch, duplicate key | Auth is not proof of business validity. | + +CloudEvents is useful for interoperability because it standardizes required context attributes such as `id`, `source`, `specversion`, and `type`, and defines duplicate assumptions around `source + id`; this design borrows from it but does not force all provider payloads into pure CloudEvents. ([GitHub][8]) + +## Custom adapter system + +### Declarative adapters + +A declarative adapter definition: + +```json +{ + "name": "acme-hmac", + "version": "2026-05-01", + "verification": { + "type": "hmac_sha256", + "signature_header": "X-Acme-Signature", + "timestamp_header": "X-Acme-Timestamp", + "signed_payload": "{{timestamp}}.{{raw_body}}", + "encoding": "hex", + "replay_window_seconds": 300 + }, + "extractors": { + "provider_event_id": "$.id", + "type": "$.event_type", + "occurred_at": "$.created_at", + "account_id": "$.account.id" + }, + "deduplication": { + "key": "acme:{{account_id}}:{{provider_event_id}}", + "window_seconds": 31536000 + }, + "normalization": { + "source": "acme/{{account_id}}", + "subject": "$.resource.id", + "data": "$" + } +} +``` + +Declarative adapters are preferred for MVP because they are reviewable, deterministic, and sandboxable. + +### Code-based adapter plugins + +Allowed only after plugin isolation exists. + +Requirements: + +* WASM sandbox or separate process. +* No filesystem by default. +* No network by default. +* CPU and memory limits. +* Deterministic execution mode. +* Signed plugin packages. +* SBOM and provenance. +* Review workflow. +* Version pinning. +* Test vectors required. +* No dynamic dependency installation at runtime. + +### Adapter interfaces + +```go +type Verifier interface { + Verify(ctx Context, raw []byte, headers HeaderMap, config Config) VerificationResult +} + +type Extractor interface { + Extract(ctx Context, raw []byte, parsed ParsedPayload, headers HeaderMap) ExtractedFields +} + +type Normalizer interface { + Normalize(ctx Context, input AdapterInput) CanonicalEnvelope +} +``` + +### Test vector requirements + +Every adapter version must include: + +* valid request, +* invalid signature, +* expired timestamp, +* missing header, +* malformed body, +* duplicate event, +* minimum event, +* maximum realistic event, +* provider test event, +* normalization expected output, +* dedupe expected key. + +### Approval workflow + +```text +draft → automated tests → security review → staging approval → active → deprecated → retired +``` + +Custom adapters automatically inherit: + +* raw storage, +* audit logs, +* replay, +* metrics, +* dedupe records, +* delivery attempts, +* route explanations, +* retention policies. + +--- + +# 9. Event envelope and normalization + +## Canonical envelope + +```json +{ + "envelope_version": "2026-05-01", + "id": "evt_01HX...", + "tenant_id": "ten_123", + "source_id": "src_123", + "source": "stripe/acct_123", + "provider": "stripe", + "provider_event_id": "evt_1P...", + "type": "payment_intent.succeeded", + "subject": "payment_intent/pi_123", + "account_id": "acct_123", + "received_at": "2026-05-25T10:00:00.000Z", + "occurred_at": "2026-05-25T09:59:58.000Z", + "api_version": "2025-04-30", + "schema_version": "stripe:2025-04-30", + "adapter_version": "stripe@2026-01-01", + "raw_payload_id": "raw_123", + "raw_payload_hash": "sha256:abc...", + "normalized_payload_hash": "sha256:def...", + "signature_verified": true, + "verification": { + "method": "hmac_sha256", + "key_id": "vkey_123", + "timestamp": "2026-05-25T10:00:00Z", + "replay_window_seconds": 300, + "result": "verified" + }, + "deduplication_key": "stripe:acct_123:evt_1P...", + "dedupe_status": "unique", + "trace_id": "trc_123", + "causation_id": null, + "correlation_id": "order_123", + "replay_of": null, + "test_mode": false, + "metadata": { + "provider_request_id": "req_123", + "ingress_ip": "203.0.113.10" + }, + "data": { + "object": { + "id": "pi_123" + } + }, + "provider_specific": { + "stripe_livemode": true + } +} +``` + +## Raw vs normalized vs canonical + +| Layer | Meaning | +|-------------------|---------------------------------------------------------------------------------------------| +| Raw | Exact bytes and headers as received. Used for signature verification and forensic evidence. | +| Parsed | Provider payload parsed into JSON/form/etc. Not authoritative for signatures. | +| Normalized | Adapter-produced representation with consistent fields. | +| Canonical | Platform event envelope used for routing, search, delivery, replay. | +| Provider-specific | Data that should be preserved but not promoted to canonical fields. | +| Tenant-specific | Internal metadata, route decisions, transformations, customer visibility rules. | + +## Hash calculation + +Recommended hashes: + +```text +raw_payload_hash = sha256(raw_body_bytes) +canonical_headers_hash = sha256(canonical_json(headers_raw_preserving_duplicates)) +normalized_payload_hash = sha256(canonical_json(normalized_envelope_without_mutable_fields)) +delivery_request_hash = sha256(method + url_host + headers_redacted_canonical + body_bytes) +delivery_response_hash = sha256(status + headers_redacted_canonical + truncated_body_bytes) +``` + +Use canonical JSON serialization for normalized hashes: + +* sorted object keys, +* UTF-8, +* no insignificant whitespace, +* stable number formatting. + +## Envelope versioning + +Rules: + +* `envelope_version` is mandatory. +* Old envelopes remain readable and replayable. +* New fields must be additive. +* Breaking envelope changes require a new version and migration view. +* Replay can use: + + * original envelope, + * migrated envelope, + * current re-normalization from raw. + +--- + +# 10. Delivery model + +## Delivery job + +A delivery job is created per `(event, endpoint, subscription/route)`. + +```json +{ + "id": "del_123", + "event_id": "evt_123", + "endpoint_id": "end_123", + "subscription_id": "sub_123", + "route_id": "rte_123", + "route_version": 4, + "transformation_id": "trn_123", + "transformation_version": 2, + "retry_policy_id": "rpol_default", + "state": "scheduled", + "attempt_count": 0, + "next_attempt_at": "2026-05-25T10:00:01Z" +} +``` + +## HTTP behavior + +| Setting | Default | +|-------------------------|--------------------------------------------------------| +| Method | `POST` | +| Timeout | connect 2 s, total 10 s | +| Body | canonical JSON envelope or configured transformed body | +| Redirects | disabled by default | +| TLS | required for internet endpoints | +| TLS minimum | TLS 1.2 | +| Certificate validation | required | +| mTLS | optional per endpoint | +| Proxy | optional controlled egress proxy | +| Compression | off by default for delivery | +| Response body capture | first 16 KiB default | +| Response header capture | redacted and size-limited | + +## Delivery headers + +Example outbound request: + +```http +POST /webhooks/acme HTTP/1.1 +Host: customer.example.com +Content-Type: application/json +User-Agent: WebhookControlPlane/1.0 +Webhook-Event-Id: evt_123 +Webhook-Delivery-Id: del_123 +Webhook-Attempt-Id: att_123 +Webhook-Timestamp: 2026-05-25T10:00:00Z +Webhook-Retry-Count: 2 +Webhook-Idempotency-Key: evt_123:end_123:sub_123 +Webhook-Signature: t=1716631200,v1=... +Traceparent: 00-... +``` + +Payload: + +```json +{ + "id": "evt_123", + "type": "invoice.paid", + "source": "com.example.billing", + "subject": "invoice/inv_123", + "occurred_at": "2026-05-25T09:59:30Z", + "data": { + "invoice_id": "inv_123" + } +} +``` + +## Signature design + +Default HMAC: + +```text +signed_payload = timestamp + "." + raw_delivery_body +signature = hex(hmac_sha256(endpoint_secret, signed_payload)) +Webhook-Signature = t=,v1= +``` + +Support: + +* HMAC-SHA256 default, +* Ed25519 for asymmetric signing, +* JWT/JWS for enterprises, +* multi-secret grace period, +* key ID with `kid=` when using key registry. + +## Response code handling + +| Response | Classification | +|----------------------------|------------------------------------------------------| +| `200-299` | success | +| `300-399` | failure unless redirects explicitly enabled and safe | +| `400`, `404`, `410`, `422` | permanent by default, configurable | +| `401`, `403` | permanent or auth-misconfigured | +| `408`, `425`, `429` | temporary; honor `Retry-After` for `429` | +| `500-599` | temporary | +| timeout | temporary | +| TLS/DNS/connect error | temporary or policy-blocked | +| SSRF block | quarantine/security failure | + +## Dead-letter behavior + +A delivery enters DLQ when: + +* max attempts exhausted, +* max retry duration exceeded, +* endpoint disabled, +* permanent failure configured as terminal, +* transformation keeps failing, +* URL becomes blocked by SSRF policy, +* payload too large for endpoint policy. + +DLQ entry must include reason, policy version, last attempt, and release options. + +## Quarantine behavior + +Quarantine is for events/deliveries that should not be trusted or delivered without review: + +* invalid but suspicious signature, +* unsafe unsigned source, +* SSRF-blocked endpoint, +* adapter parser anomaly, +* transformation tried forbidden operation, +* schema conflict with high-risk event type, +* tenant isolation violation attempt. + +--- + +# 11. Retry, backoff, and scheduling + +## Default retry policy + +Production default: + +```json +{ + "name": "default", + "max_attempts": 12, + "max_duration_seconds": 259200, + "initial_delay_seconds": 10, + "backoff": { + "type": "exponential", + "factor": 2.0, + "max_delay_seconds": 21600, + "jitter": "full" + }, + "timeout_ms": 10000, + "honor_retry_after": true, + "retry_on": ["408", "409", "425", "429", "5xx", "timeout", "network_error"], + "do_not_retry_on": ["400", "401", "403", "404", "410", "413", "422"] +} +``` + +## Backoff with jitter + +Use full jitter: + +```text +cap = min(max_delay, initial_delay * factor^attempt) +delay = random(0, cap) +``` + +This avoids synchronized retry storms. + +## Scheduling dimensions + +Fairness must apply across: + +* tenant, +* endpoint, +* source, +* subscription, +* replay job, +* priority class, +* worker pool. + +Suggested algorithm: + +```text +Global scheduler + → per-priority queue + → weighted fair tenant scheduler + → per-tenant endpoint scheduler + → per-endpoint concurrency limiter +``` + +Use Deficit Round Robin or Weighted Fair Queuing. + +## Limits + +| Limit | Purpose | +|---------------------------|---------------------------------------------| +| per-endpoint concurrency | prevent hammering broken customer endpoints | +| per-tenant concurrency | prevent noisy tenant starvation | +| per-source concurrency | prevent provider spike dominating relay | +| global replay concurrency | prevent replay from starving live traffic | +| per-host concurrency | avoid overloading same host | +| per-worker lease | crash-safe job ownership | +| rate-limit tokens | explicit endpoint/customer policy | + +## Replay scheduling + +Replay jobs must: + +* run at lower priority than live deliveries by default, +* have per-job and per-tenant rate limits, +* support pause/resume/cancel, +* emit progress metrics, +* never bypass endpoint concurrency, +* optionally isolate into separate worker pool. + +## Poison message handling + +A poison event is one that repeatedly fails before HTTP delivery, usually due to transformation, schema, or adapter bugs. + +Rules: + +* after `N` deterministic failures, stop retrying transformation; +* create DLQ/quarantine entry; +* include deterministic input hash and failing config version; +* do not let poison items block ordered queues forever without operator visibility. + +## Crash-safe retry behavior + +Delivery claiming: + +```sql +UPDATE deliveries +SET state = 'in_progress', + locked_by = :worker_id, + lock_expires_at = now() + interval '60 seconds' +WHERE id = :id + AND state = 'scheduled' + AND next_attempt_at <= now() +RETURNING *; +``` + +On worker crash, lease expires and another worker resumes. + +--- + +# 12. Idempotency and deduplication + +## Definitions + +| Term | Meaning | +|---------------|----------------------------------------------------------------------------------| +| Duplicate | Same provider/business event received more than once. | +| Retry | Same delivery job attempted again after failure or ambiguous outcome. | +| Redelivery | Provider or platform sends the same event again. | +| Replay | Operator/API intentionally reprocesses stored event data. | +| Idempotency | Repeating the same operation does not produce unintended extra effects. | +| Deduplication | Detecting duplicate events/requests and choosing whether to suppress processing. | + +## Ingestion idempotency + +For product event ingestion: + +```text +unique(tenant_id, producer_id, idempotency_key) +unique(tenant_id, source, producer_event_id) +``` + +Repeated request with same idempotency key and same request hash returns the original response. + +Same idempotency key with different request hash returns `409 idempotency_conflict`. + +## Provider event deduplication + +Dedupe key examples: + +| Provider | Primary dedupe key | +|--------------|-----------------------------------------| +| Stripe | `stripe:{account_id}:{event.id}` | +| GitHub | `github:{scope}:{X-GitHub-Delivery}` | +| Shopify | `shopify:{shop}:{X-Shopify-Webhook-Id}` | +| Slack | `slack:{team_id}:{event_id}` | +| CloudEvents | `cloudevents:{source}:{id}` | +| Generic HMAC | configured expression | +| Unsigned | `source:{raw_payload_hash}` by default | + +Stripe documents both duplicate deliveries of the same event and cases where separate Event objects may represent the same underlying object/type, so the adapter should support both strict event-ID dedupe and optional semantic dedupe. ([Stripe Docs][1]) + +## Duplicate visibility + +Do not silently discard duplicates. Store: + +```json +{ + "receipt_id": "rcp_456", + "event_id": "evt_123", + "dedupe_status": "duplicate_suppressed", + "duplicate_of": "evt_123", + "received_at": "2026-05-25T10:02:00Z", + "raw_payload_hash": "sha256:..." +} +``` + +## Suppression vs storage + +| Mode | Behavior | +|------------------------|------------------------------------------------------------------| +| `store_and_suppress` | default; record duplicate, do not route. | +| `store_and_route` | useful for debugging or providers where duplicate is meaningful. | +| `reject_duplicate` | return duplicate status to trusted producers only. | +| `quarantine_collision` | if dedupe key same but payload hash materially different. | + +## Dedupe windows + +| Record type | Default | +|----------------------------|--------------------------------------:| +| provider event IDs | 90 days minimum | +| high-risk financial events | permanent or compliance retention | +| raw body hash dedupe | configurable | +| idempotency API keys | 24 h to 7 days | +| replay idempotency | replay job lifetime + audit retention | + +## Collision handling + +If `dedupe_key` matches but `raw_payload_hash` differs: + +* mark `dedupe_status = collision`; +* do not suppress silently; +* quarantine if provider claims immutable event IDs; +* show both receipts; +* emit alert. + +--- + +# 13. Ordering and causality + +## Default semantics + +Default delivery semantics: + +```text +Events are delivered at least once. +Events are not globally ordered. +Retries of one event may occur after later events. +Replays may intentionally create new delivery attempts out of original order. +``` + +This must be documented prominently. Stripe explicitly does not guarantee event ordering, which is a good baseline assumption for webhook infrastructure. ([Stripe Docs][1]) + +## Optional ordering modes + +| Mode | Scope | Trade-off | +|----------------------|-------------------------------|-------------------------------------------| +| unordered | default | best throughput | +| per-subject | `tenant + endpoint + subject` | good balance | +| per-endpoint | `tenant + endpoint` | can block all events for endpoint | +| per-tenant | `tenant` | severe throughput loss | +| custom partition key | expression | powerful but dangerous if low cardinality | + +## Head-of-line blocking + +If event `A` for subject `s1` fails and ordering is per-subject, event `B` for `s1` waits. Events for `s2` continue. + +Mitigations: + +* max block duration, +* skip-to-DLQ with audit, +* operator release, +* partition health view, +* per-subject queue depth metrics. + +## Sequence numbers + +The platform can assign: + +```json +{ + "platform_sequence": 102030, + "subject_sequence": 18, + "endpoint_sequence": 991 +} +``` + +But these are platform receive/delivery sequences, not provider truth. + +## Causation and correlation + +Use: + +* `trace_id`: platform trace. +* `correlation_id`: business transaction. +* `causation_id`: event that caused this event. +* `replay_of`: original event or delivery. +* `provider_event_id`: provider’s own ID. +* `subject`: resource affected. + +--- + +# 14. Routing, filtering, fanout, and transformations + +## Route rule model + +```json +{ + "id": "rte_123", + "version": 4, + "priority": 100, + "source_id": "src_123", + "match": { + "providers": ["stripe"], + "event_types": ["payment_intent.succeeded"], + "subjects": ["payment_intent/*"], + "headers": { + "stripe-account": "acct_123" + }, + "expression": "$.data.object.amount > 0" + }, + "destination": { + "type": "endpoint", + "endpoint_id": "end_123" + }, + "fanout": { + "mode": "all_matches" + }, + "transformation_id": "trn_123" +} +``` + +## Matching types + +* exact event type, +* prefix/wildcard event type, +* provider, +* source, +* tenant, +* subject, +* account, +* headers, +* JSONPath-like payload field, +* schema version, +* test mode, +* verification status. + +## Fanout + +Fanout modes: + +| Mode | Behavior | +|-------------------------|------------------------------------------| +| `first_match` | first route by priority wins | +| `all_matches` | create delivery for every matching route | +| `explicit_destinations` | fixed destination list | +| `subscription_fanout` | event type subscriptions | + +Prevent accidental duplicate fanout with route dry-run and duplicate destination warnings. + +## Transformations + +Safe MVP transformation types: + +| Type | Example | +|-------------------|---------------------------------------------| +| field projection | include only `id`, `type`, `data.object.id` | +| rename | `data.object.id → payment_intent_id` | +| redaction | remove `customer.email` | +| static enrichment | add tenant-safe metadata | +| template | JSON template with deterministic variables | + +Dangerous transformations: + +* network calls, +* database calls, +* randomness, +* current time except provided deterministic context, +* filesystem, +* unbounded loops, +* arbitrary package imports. + +## Transformation failure + +On failure: + +* do not deliver partial payload; +* classify deterministic vs transient; +* record input hash, transformation version, error; +* retry only if failure is transient; +* DLQ/quarantine deterministic failures; +* allow preview and dry-run. + +--- + +# 15. Schema and contract management + +## Event type registry + +Event type: + +```json +{ + "name": "invoice.paid", + "owner": "billing", + "description": "Invoice payment completed", + "visibility": "public", + "versioning": "semantic", + "default_schema_version": "2026-05-01" +} +``` + +## JSON Schema + +Schema resource: + +```json +{ + "event_type": "invoice.paid", + "version": "2026-05-01", + "schema_format": "json_schema", + "schema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "required": ["invoice_id", "amount", "currency"], + "properties": { + "invoice_id": { "type": "string" }, + "amount": { "type": "integer", "minimum": 0 }, + "currency": { "type": "string" } + } + } +} +``` + +OpenAPI 3.1 aligns with JSON Schema 2020-12, which is useful for generating SDKs, docs, and validation from the same contract. ([openapis.org][9]) + +## Compatibility rules + +| Change | Default classification | +|--------------------|--------------------------------------------------------| +| add optional field | compatible | +| add required field | breaking | +| remove field | breaking | +| widen enum | compatible for consumers, maybe breaking for producers | +| narrow enum | breaking | +| type change | breaking | +| add event type | compatible | +| remove event type | breaking | +| rename field | breaking unless alias period | + +## Consumer subscription by version + +Consumers can choose: + +```json +{ + "event_type": "invoice.paid", + "schema_versions": ["2026-05-01"], + "compatibility": "compatible_minor" +} +``` + +## Deprecation lifecycle + +```text +draft → active → deprecated → sunset_scheduled → retired +``` + +Every deprecation needs: + +* replacement, +* sunset date, +* affected subscriptions, +* changelog, +* sample migration, +* alerting. + +--- + +# 16. Security model + +## Authentication + +| Surface | Auth | +|---------------------------|-----------------------------------------| +| Management API | OIDC/session or API key | +| Producer API | API key, OAuth client credentials, mTLS | +| Inbound provider | provider signature/JWT/HMAC | +| Worker internal API | mTLS/service account | +| Metrics | operator auth or internal-only | +| Webhook endpoint delivery | signed by platform | + +## Authorization + +RBAC roles: + +| Role | Capabilities | +|------------------|------------------------------------------------| +| owner | all tenant actions | +| admin | config, endpoints, routes | +| developer | read events, create test sources, run dry-runs | +| operator | retry, replay, DLQ release within policy | +| security | quarantine, secrets metadata, audit export | +| support | customer-visible logs only | +| auditor | read audit/evidence exports | +| billing/customer | own endpoint logs/subscriptions | + +ABAC constraints: + +* tenant ID, +* environment, +* source ownership, +* endpoint ownership, +* production vs sandbox, +* PII access, +* replay risk level. + +## Secret handling + +Rules: + +* Never store plaintext secrets in PostgreSQL unless encrypted with envelope encryption. +* Prefer KMS/HSM/Vault. +* Show generated secrets once. +* Store hash for lookup if needed. +* Redact in logs. +* Dual-secret grace periods. +* Audit every reveal, rotation, revocation. + +## Signature verification + +Requirements: + +* raw body access, +* timestamp replay window where available, +* constant-time comparison, +* algorithm allowlist, +* reject legacy algorithms by default, +* support key rotation, +* record verification result and key ID, +* do not route unverified events unless explicitly unsafe mode. + +## IP allowlists + +Provider IP allowlists are defense-in-depth only: + +* useful at load balancer/firewall, +* not a substitute for signatures, +* can change, +* can be bypassed in misconfigured networks, +* self-hosted customers must control updates. + +## Unsafe modes + +Unsafe mode examples: + +* unsigned provider, +* secret in URL, +* HTTP endpoint, +* redirect-following, +* private IP destination. + +UI/API must label unsafe modes loudly: + +```json +{ + "risk_level": "unsafe", + "warnings": [ + "This source does not authenticate sender identity.", + "Events from this source must not trigger high-risk side effects." + ] +} +``` + +--- + +# 17. SSRF-safe endpoint handling + +Customer-provided URLs are untrusted input. + +## URL validation + +Accept only: + +```text +https://host[:port]/path?query +``` + +Default reject: + +* `http://` for production internet endpoints, +* `file://`, +* `ftp://`, +* `gopher://`, +* `dict://`, +* `ldap://`, +* `unix://`, +* embedded credentials `https://user:pass@host`, +* empty host, +* IP literal unless explicitly allowed, +* non-standard ports unless policy allows. + +## DNS resolution strategy + +At endpoint creation: + +1. Parse URL with a real URL parser, not regex. +2. Normalize host using IDNA. +3. Resolve A and AAAA records. +4. Reject blocked IP ranges. +5. Store validation result. + +At delivery time: + +1. Re-resolve hostname shortly before connect. +2. Reject if any selected IP is blocked. +3. Connect to validated IP while preserving original SNI/Host. +4. Enforce that the connected peer IP matches allowed resolution. +5. Cache DNS briefly; do not trust stale “safe” result indefinitely. + +OWASP warns about URL mishandling, callback URLs, redirects, DNS rebinding, and TOCTOU-style issues in SSRF prevention guidance. ([OWASP Cheat Sheet Series][5]) + +## Blocked IP ranges + +Block by default: + +| Range type | +|-----------------------------------------------------------------------| +| IPv4 private: `10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16` | +| IPv4 loopback: `127.0.0.0/8` | +| IPv4 link-local: `169.254.0.0/16` | +| IPv4 multicast/reserved | +| IPv6 loopback: `::1/128` | +| IPv6 unique local: `fc00::/7` | +| IPv6 link-local: `fe80::/10` | +| IPv6 multicast | +| IPv4-mapped IPv6 private forms | +| cloud metadata IPs, especially `169.254.169.254` and IPv6 equivalents | +| Kubernetes/service CIDRs if configured | +| internal corporate CIDRs if configured | + +## Redirects + +Default: no redirects. + +If enabled: + +* max 1 or 2 hops, +* revalidate every `Location`, +* only `https`, +* no downgrade, +* no host change unless allowed, +* no private IP, +* audit redirect chain. + +## Proxy and egress + +Recommended production: + +* dedicated egress proxy, +* network policies blocking metadata services, +* no direct pod access to internal networks, +* DNS through controlled resolver, +* egress logs, +* per-tenant egress policy for enterprise. + +## Admin override + +Admin override model: + +```json +{ + "allow_private_network": true, + "allowed_cidrs": ["10.20.0.0/16"], + "requires_mtls": true, + "approval_ticket": "SEC-1234", + "expires_at": "2026-06-25T00:00:00Z" +} +``` + +Overrides require: + +* security role, +* reason, +* expiry, +* audit event, +* optional two-person approval. + +## SSRF bypass tests + +Test cases: + +* decimal IP: `2130706433`, +* octal/hex IP, +* IPv6 mapped IPv4, +* mixed-case schemes, +* trailing dots, +* IDNA/punycode, +* redirect to private IP, +* DNS rebinding simulation, +* CNAME to private IP, +* metadata service hostnames, +* embedded credentials, +* path confusion, +* CRLF in URL, +* oversized hostnames. + +--- + +# 18. Auditability and evidence + +## Audit event model + +```json +{ + "id": "aud_123", + "tenant_id": "ten_123", + "occurred_at": "2026-05-25T10:00:00Z", + "actor": { + "type": "user", + "id": "usr_123", + "ip": "203.0.113.50", + "user_agent": "Mozilla/5.0" + }, + "action": "delivery.retry_requested", + "resource": { + "type": "delivery", + "id": "del_123" + }, + "reason": "customer fixed endpoint", + "before_hash": "sha256:...", + "after_hash": "sha256:...", + "config_version": 4, + "request_id": "req_123", + "trace_id": "trc_123", + "metadata": {} +} +``` + +## Event lifecycle log + +Lifecycle entries: + +```text +received +raw_persisted +signature_verified / verification_failed +normalized +dedupe_unique / duplicate_suppressed / collision +route_evaluated +delivery_created +delivery_attempt_started +delivery_attempt_succeeded / failed +retry_scheduled +dead_lettered +replayed +quarantined +released +retention_deleted +``` + +## Evidence hashes + +Record: + +* raw payload hash, +* canonical headers hash, +* normalized envelope hash, +* route config hash, +* transformation input/output hash, +* delivery request hash, +* delivery response hash, +* audit event hash. + +## Hash chaining + +Optional tamper-evident chain: + +```text +audit_hash_n = sha256(canonical_audit_event_n + audit_hash_n-1) +``` + +Periodically anchor: + +* to object storage immutable bucket, +* external timestamping service, +* customer SIEM, +* append-only log. + +## Evidence package + +Export: + +```json +{ + "event": {}, + "raw_payload": { + "sha256": "sha256:...", + "body_base64": "..." + }, + "normalized_envelope": {}, + "verification_evidence": {}, + "route_decisions": [], + "transformations": [], + "deliveries": [], + "attempts": [], + "audit_events": [], + "hash_manifest": {} +} +``` + +## What audit can prove + +Can prove: + +* what bytes the platform stored, +* which verification result was computed, +* which config versions were used, +* which delivery attempts were made, +* what response the platform observed, +* who initiated replays/retries/config changes, +* whether evidence was altered after hashing if tamper-evidence is enabled. + +Cannot prove: + +* that provider truly originated an unsigned event, +* that customer endpoint committed or did not commit side effects, +* that a remote server did not lie in its response, +* that a user outside the platform did not misuse copied secrets, +* that deleted payloads under retention policy still exist. + +--- + +# 19. Reproducibility and replay model + +## Replay modes + +| Mode | Input | Config | +|-----------------------------------|----------------------------|------------------------------------------------| +| `from_raw_original_config` | raw payload | original adapter/route/transformation versions | +| `from_raw_current_config` | raw payload | current adapter/config | +| `from_normalized_original_config` | stored normalized envelope | original route/transform | +| `from_normalized_current_config` | stored normalized envelope | current route/transform | +| `delivery_retry` | original delivery payload | original delivery config | +| `endpoint_specific` | event subset | selected endpoint | +| `failed_only` | failed deliveries | original or current config | +| `dry_run` | any | no side effects | + +## Replay controls + +* dry-run required for bulk replay above threshold, +* rate limit required, +* permission required, +* reason required, +* optional approval, +* idempotency key required, +* replay receipts generated, +* live traffic priority protected, +* endpoint state respected unless `force=true`, +* unsafe URLs revalidated. + +## Reproduce specific decisions + +| Decision | Required stored data | +|------------------------|----------------------------------------------------------------------------------------------------------------| +| Signature verification | raw body, raw headers, key ID/secret version, adapter version, timestamp policy. | +| Normalization | raw payload, adapter version, adapter config, parser version. | +| Deduplication | dedupe rule version, extracted fields, dedupe key, existing dedupe records. | +| Routing | normalized envelope, route versions, filter versions, route engine version. | +| Transformation | input envelope, transformation version, deterministic runtime version. | +| Delivery payload | event, route/subscription, transformation, payload format version. | +| Delivery signature | delivery body bytes, timestamp, signing key ID, algorithm. | +| Retry schedule | retry policy version, attempt timestamps, failure classifications, jitter seed if deterministic replay needed. | + +For deterministic replay, store a replay context: + +```json +{ + "adapter_version": "stripe@2026-01-01", + "route_engine_version": "2026-05-01", + "transformation_runtime_version": "wasm-v1.4.0", + "retry_policy_version": 3, + "random_seed": "seed_123" +} +``` + +--- + +# 20. Transparency and debugging + +## Event detail page/API must show + +* receipt timestamp, +* source/provider, +* raw headers, +* raw payload access, +* payload hash, +* signature result, +* key ID used, +* timestamp skew, +* adapter version, +* normalized envelope, +* dedupe key, +* duplicate/collision status, +* route match explanation, +* transformations and previews, +* delivery jobs, +* attempts, +* response status/body, +* next retry time, +* DLQ/quarantine reason, +* replay/retry controls, +* trace/correlation IDs, +* audit timeline. + +## Route match explanation + +```json +{ + "route_id": "rte_123", + "version": 4, + "matched": true, + "checks": [ + { + "name": "provider", + "expected": "stripe", + "actual": "stripe", + "result": true + }, + { + "name": "event_type", + "expected": ["payment_intent.succeeded"], + "actual": "payment_intent.succeeded", + "result": true + }, + { + "name": "expression", + "expression": "$.data.object.amount > 0", + "result": true + } + ] +} +``` + +## CLI debugging commands + +```bash +whcp events list --type invoice.paid --since 24h +whcp events get evt_123 --timeline +whcp events raw evt_123 --output payload.json +whcp deliveries list --event evt_123 +whcp deliveries retry del_123 --reason "fixed endpoint" +whcp replay dry-run --event evt_123 --endpoint end_123 +whcp replay create --failed-only --endpoint end_123 --since 24h --rate 100/min +whcp signatures verify --source src_123 --body payload.json --header "Stripe-Signature: ..." +whcp endpoints test end_123 +whcp routes explain --event evt_123 +``` + +--- + +# 21. Observability and SLOs + +## Metrics + +| Metric | Dimensions | +|---------------------------------|---------------------------------------| +| `ingress_requests_total` | tenant, source, provider, status | +| `ingress_persist_latency_ms` | tenant, source | +| `signature_verifications_total` | provider, result, reason | +| `events_accepted_total` | tenant, type, source | +| `events_duplicate_total` | tenant, provider, rule | +| `route_evaluations_total` | route, result | +| `deliveries_created_total` | tenant, endpoint | +| `delivery_attempts_total` | endpoint, status_class, failure_class | +| `delivery_latency_ms` | tenant, endpoint, event_type | +| `delivery_success_ratio` | endpoint, tenant | +| `retry_scheduled_total` | policy, failure_class | +| `dlq_entries_total` | reason, endpoint | +| `quarantine_entries_total` | reason, source | +| `queue_depth` | queue, tenant, priority | +| `worker_busy_ratio` | worker_pool | +| `worker_lease_expired_total` | worker_pool | +| `storage_errors_total` | component | +| `object_storage_latency_ms` | operation | +| `replay_events_total` | job, tenant | +| `ssrf_blocks_total` | reason | +| `audit_events_total` | action | + +## Logs + +Structured JSON logs: + +```json +{ + "level": "info", + "time": "2026-05-25T10:00:00Z", + "msg": "delivery attempt failed", + "tenant_id": "ten_123", + "event_id": "evt_123", + "delivery_id": "del_123", + "attempt_id": "att_123", + "failure_class": "http_500", + "trace_id": "trc_123" +} +``` + +Never log raw secrets or full payloads by default. + +## Traces + +Use OpenTelemetry-style traces: + +```text +ingress.receive +ingress.persist_raw +adapter.verify +adapter.normalize +dedupe.check +route.evaluate +delivery.sign +delivery.http_request +delivery.record_attempt +retry.schedule +``` + +## Recommended SLOs + +| SLO | Target | +|-----------------------------------|---------------------------------------------------------:| +| Ingestion API availability | 99.95% monthly | +| Durable persistence latency | p99 < 500 ms for payloads under 256 KiB | +| Provider ack latency | p99 < 1 s after receipt for normal load | +| First delivery scheduling latency | p95 < 5 s | +| First delivery attempt latency | p95 < 30 s for healthy endpoints | +| Delivery attempt recording | 99.99% attempts recorded | +| API availability | 99.9% self-hosted target, deployment-dependent | +| Replay job start latency | p95 < 60 s | +| Worker crash recovery | stuck jobs reclaimed < 2 min | +| Data durability | no accepted event lost; backup RPO defined by deployment | +| Audit event persistence | 99.99% for mutating actions | + +## Alert thresholds + +Examples: + +* ingestion 5xx > 1% for 5 minutes, +* persistence p99 > 2 seconds, +* queue depth age > 10 minutes, +* DLQ growth > baseline, +* endpoint success ratio < 90% over 15 minutes, +* verification failures spike by provider, +* worker heartbeat missing, +* replay consuming > configured live traffic budget, +* object storage errors, +* audit write failures, +* SSRF blocks spike. + +--- + +# 22. Data storage model + +## Core tables + +Minimum PostgreSQL tables: + +```text +tenants +users +api_keys +sources +source_secrets +provider_adapters +adapter_versions +endpoints +endpoint_secrets +event_types +event_schemas +events +raw_payloads +normalized_envelopes +provider_receipts +dedupe_records +subscriptions +routes +route_versions +filters +filter_versions +transformations +transformation_versions +deliveries +delivery_attempts +retry_policies +replay_jobs +replay_job_items +dead_letter_entries +quarantine_entries +idempotency_records +signing_keys +verification_keys +audit_events +retention_policies +outbox +worker_leases +metrics_rollups +alerts +config_versions +``` + +## PostgreSQL-only MVP + +Design: + +* raw payloads stored in `bytea` or compressed `bytea` for moderate sizes, +* JSONB for envelope and metadata, +* DB-backed outbox, +* workers use `FOR UPDATE SKIP LOCKED`, +* partition large tables by month/tenant if needed. + +Pros: + +* simplest self-hosted deployment, +* easiest backup/restore, +* transactional consistency, +* fewer moving parts. + +Cons: + +* large payloads bloat DB, +* high delivery volume may stress DB, +* time-series metrics are limited, +* queue throughput lower than dedicated systems. + +## PostgreSQL + object storage + +Design: + +* PostgreSQL stores metadata, hashes, object URI. +* S3-compatible storage holds raw payloads and evidence exports. + +Pros: + +* cheaper large payload retention, +* immutable buckets possible, +* better backups for payloads. + +Cons: + +* two-phase consistency issues, +* object storage outage affects ingestion unless DB fallback exists. + +Recommended v1. + +## PostgreSQL + Redis/NATS + +Use Redis/NATS as acceleration, not source of truth: + +* DB outbox remains authoritative. +* Redis/NATS carries wakeups and short-lived scheduling hints. +* Lost queue message is recoverable by polling DB. + +Pros: + +* better worker responsiveness, +* lower DB polling load. + +Cons: + +* more ops complexity. + +## Kafka/NATS as optional advanced backend + +Kafka/NATS can be enterprise-scale delivery backends, but should not be mandatory. + +Use when: + +* very high throughput, +* multi-consumer internal streaming, +* separate regional worker pools, +* existing platform team operates it. + +Do not make Kafka the only persistence mechanism for audit evidence. + +--- + +# 23. Self-hosted deployment and operations + +## Deployment modes + +| Mode | Components | +|-----------------|-------------------------------------------------------------------------------| +| Single binary | API, worker, scheduler, UI, PostgreSQL external or embedded dev DB. | +| Docker Compose | API, worker, Postgres, optional MinIO, optional Redis. | +| Kubernetes/Helm | deployments for API, workers, scheduler, UI; Postgres external; object store. | +| Air-gapped | signed images, offline adapter registry, offline docs, license file. | +| Local dev | single command, test provider simulator, local tunnel integration. | +| Production | HA API, multiple workers, managed Postgres, object storage, backups, metrics. | + +## Processes + +```text +whcp api +whcp worker --pool deliveries +whcp worker --pool ingestion +whcp scheduler +whcp migrate +whcp admin +``` + +## Database migrations + +Rules: + +* migrations are forward-compatible, +* expand/contract pattern, +* no destructive migration without backup gate, +* migrations record version and checksum, +* app checks migration compatibility on startup, +* rollback scripts for failed deploys where possible. + +## Backup and restore + +Backup: + +* PostgreSQL PITR, +* object storage versioning/replication, +* config export, +* KMS key backup procedure, +* audit export optional. + +Restore drills: + +* restore tenant subset, +* restore full cluster, +* verify hashes, +* replay from restored raw payload, +* rebuild delivery schedule from DB. + +## Horizontal scaling + +Scale independently: + +* ingress API replicas, +* management API replicas, +* delivery workers, +* replay workers, +* adapter/transformation sandbox workers, +* scheduler. + +Use leader election for singleton scheduling tasks, or design them as idempotent sharded workers. + +## Multi-region + +MVP: single writable region. + +Advanced: + +* region-local ingress with central durable replication, +* per-tenant home region, +* active/passive failover, +* no global ordering claims, +* replay region awareness, +* region-specific keys and data residency. + +## Upgrade strategy + +* SemVer. +* Adapter versions immutable. +* Config versions immutable. +* Envelope migration compatibility. +* Blue/green or rolling deployments. +* Worker version skew supported for at least one minor version. +* Downgrade only before irreversible migrations. + +--- + +# 24. API errors and problem details + +Problem shape: + +```json +{ + "type": "https://docs.example.com/errors/invalid-signature", + "title": "Invalid webhook signature", + "status": 401, + "detail": "The Stripe-Signature header did not match any active verification secret.", + "instance": "/v1/ingest/stripe/src_123/requests/req_123", + "code": "invalid_signature", + "request_id": "req_123", + "docs_url": "https://docs.example.com/errors/invalid-signature", + "fields": [ + { + "name": "headers.Stripe-Signature", + "reason": "signature_mismatch" + } + ], + "retryable": false +} +``` + +## Error catalog + +| Code | HTTP | Retryable | Notes | +|---------------------------------|--------:|-------------|-----------------------------| +| `validation_error` | 400 | false | malformed JSON/body/config | +| `authentication_error` | 401 | false | missing/invalid API key | +| `authorization_error` | 403 | false | RBAC/tenant denial | +| `source_not_found` | 404 | false | no source | +| `endpoint_not_found` | 404 | false | no endpoint | +| `invalid_signature` | 401 | false | verification failed | +| `expired_timestamp` | 401 | maybe | sender clock/replay | +| `duplicate_event` | 200/409 | false | depends endpoint | +| `payload_too_large` | 413 | false | limit exceeded | +| `unsupported_content_type` | 415 | false | unsupported media | +| `unsupported_provider` | 422 | false | adapter missing | +| `adapter_configuration_invalid` | 422 | false | bad adapter config | +| `schema_validation_failed` | 422 | false | payload violates schema | +| `route_evaluation_failed` | 500/422 | maybe | engine/config issue | +| `transformation_failed` | 422/500 | maybe | deterministic vs transient | +| `endpoint_url_blocked` | 422 | false | SSRF policy | +| `delivery_failed` | 502 | true | endpoint failure | +| `replay_rejected` | 409/422 | false | unsafe/broad/conflict | +| `rate_limit_exceeded` | 429 | true | include retry-after | +| `quota_exceeded` | 402/429 | false/maybe | plan limit | +| `storage_unavailable` | 503 | true | do not ack accepted | +| `queue_unavailable` | 202/503 | maybe | accepted if DB outbox works | +| `internal_error` | 500 | maybe | unknown | + +--- + +# 25. SDKs, CLI, and developer experience + +## SDKs + +### Go SDK + +Packages: + +```text +whcp/client +whcp/producer +whcp/consumer +whcp/signature +whcp/testing +``` + +Example producer: + +```go +client.Events.Create(ctx, whcp.Event{ + ID: "evt_product_123", + Type: "invoice.paid", + Source: "com.example.billing", + Subject: "invoice/inv_123", + Data: map[string]any{"invoice_id": "inv_123"}, +}, whcp.WithIdempotencyKey("invoice:inv_123:paid")) +``` + +### TypeScript/Node SDK + +```ts +await client.events.create({ + id: "evt_product_123", + type: "invoice.paid", + source: "com.example.billing", + subject: "invoice/inv_123", + data: { invoice_id: "inv_123" } +}, { + idempotencyKey: "invoice:inv_123:paid" +}); +``` + +Signature verification helper: + +```ts +const verified = verifyWebhookSignature({ + secret, + rawBody, + header: req.headers["webhook-signature"], + toleranceSeconds: 300 +}); +``` + +### Python SDK + +```python +client.events.create( + id="evt_product_123", + type="invoice.paid", + source="com.example.billing", + subject="invoice/inv_123", + data={"invoice_id": "inv_123"}, + idempotency_key="invoice:inv_123:paid", +) +``` + +## CLI + +```bash +whcp login +whcp sources create --provider stripe --name stripe-prod +whcp endpoints create --url https://example.com/webhooks +whcp events list --since 1h +whcp events tail --source src_123 +whcp replay dry-run --failed-only --since 24h +whcp tunnel --forward-to localhost:3000/webhooks +whcp mock stripe payment_intent.succeeded --source src_123 +whcp verify stripe --payload payload.json --signature "..." +``` + +## Documentation structure + +* quickstart, +* self-hosted install, +* concepts, +* inbound providers, +* outbound webhooks, +* signature verification, +* idempotency and dedupe, +* replay, +* SSRF and endpoint safety, +* schema registry, +* admin UI, +* API reference, +* OpenAPI spec, +* SDK guides, +* provider setup guides: + + * Stripe, + * GitHub, + * Shopify, + * Slack, + * generic HMAC, +* troubleshooting, +* migration from ad hoc webhooks, +* production checklist. + +## Collections + +Provide: + +* OpenAPI JSON/YAML, +* Postman collection, +* Bruno collection, +* provider test fixtures, +* Docker Compose examples, +* Terraform module later. + +--- + +# 26. Admin UI requirements + +The UI is a convenience layer over APIs. Anything operationally important must be API-first. + +## Required views + +| View | Must support | +|-------------------|------------------------------------------------------------------------------------| +| Sources | create, configure adapter, show ingress URL, verification status, secret rotation. | +| Endpoints | URL, SSRF status, health, secrets, test delivery. | +| Subscriptions | event types, filters, payload format, state. | +| Routes | priority, match rules, dry-run, version history, activation. | +| Events | search, timeline, raw/normalized payload, dedupe, verification. | +| Delivery logs | attempts, response status/body, retry schedule, manual retry. | +| Replay jobs | dry-run, create, progress, pause/resume/cancel, receipts. | +| Dead-letter queue | inspect, filter, release, bulk release. | +| Quarantine | review, approve, reject, delete. | +| Schemas | versions, compatibility checks, examples, docs. | +| Adapter config | installed adapters, versions, test vectors. | +| Secrets | rotation workflows, metadata only, no secret reveal except generated once. | +| Metrics dashboard | ingestion, delivery, queue, DLQ, endpoint health. | +| Audit log | filter/export, actor/resource/action. | +| Tenant usage | quotas, event counts, delivery counts, retention. | +| Endpoint health | success ratio, latency, circuit breaker state. | +| Worker health | worker nodes, queues, saturation, leases. | + +## UI guardrails + +* Production replays require reason. +* Large replay requires dry-run first. +* Unsafe endpoint/source modes display warnings. +* Raw payload access requires elevated permission and audit event. +* Route activation shows diff. +* Secret rotation shows grace period and affected sources/endpoints. +* Dead-letter bulk actions show exact count and scope. + +--- + +# 27. Testing strategy + +## Unit tests + +* HMAC verification, +* timestamp windows, +* constant-time comparison wrapper, +* provider extractors, +* dedupe key generation, +* canonical JSON hashing, +* route predicates, +* retry schedule computation, +* SSRF URL parser, +* RBAC decisions, +* problem error mapping. + +## Integration tests + +* API + PostgreSQL transaction boundaries, +* object storage raw payload write, +* DB outbox recovery, +* delivery worker HTTP server, +* endpoint response classifications, +* replay job execution, +* schema validation, +* secret rotation grace period. + +## End-to-end tests + +* Stripe-style valid event → route → endpoint delivery. +* GitHub invalid signature → rejected/quarantined. +* Shopify timeout simulation → retry behavior. +* Slack URL verification challenge. +* Product event → subscription fanout → customer logs. +* Bulk replay failed-only. + +## Provider signature test vectors + +Each provider adapter must test: + +* valid signature, +* wrong secret, +* body mutated, +* timestamp expired, +* multiple active secrets, +* missing header, +* malformed signature, +* Unicode payload. + +## SSRF tests + +Include: + +* private IPv4, +* private IPv6, +* IPv4-mapped IPv6, +* DNS rebinding, +* redirect to metadata IP, +* IDNA hostname, +* encoded URL confusion, +* unusual ports, +* embedded credentials. + +## Fuzz tests + +* URL parsing, +* header canonicalization, +* JSON parsing, +* adapter extraction, +* transformation expressions, +* problem details response generation. + +## Load tests + +Scenarios: + +* provider burst, +* many tenants small traffic, +* one noisy tenant, +* one broken endpoint, +* replay during live traffic, +* object storage latency, +* database write contention. + +## Chaos tests + +* worker crash mid-delivery, +* crash after endpoint response before attempt record, +* DB outage before ack, +* DB outage after ack, +* object storage outage, +* queue outage, +* DNS failure, +* TLS failure, +* slow customer endpoint, +* clock skew, +* migration interruption. + +--- + +# 28. Threat model + +| Threat / failure | Mitigations | +|--------------------------------|------------------------------------------------------------------------------------------------| +| Malicious webhook sender | signature verification, source secrets, IP allowlist defense-in-depth, rate limits. | +| Spoofed provider request | HMAC/JWT verification, raw body preservation, timestamp checks. | +| Replay attack | timestamp windows, dedupe keys, nonce/event ID tracking. | +| Compromised source secret | rotation, dual-secret grace, audit, anomaly alerts, source disable. | +| Malicious customer endpoint | SSRF protection, timeouts, response truncation, no secret leakage. | +| SSRF through endpoint URL | strict URL validation, DNS re-resolution, blocked CIDRs, redirect restrictions, egress policy. | +| DNS rebinding | re-resolve before connect, validate selected IP, short DNS cache, controlled resolver. | +| Cross-tenant data access | tenant-scoped queries, RBAC, tests, row-level security optional. | +| Noisy tenant starvation | weighted fair scheduling, per-tenant limits, replay priority isolation. | +| Broken provider adapter | version pinning, test vectors, rollback, quarantine on parser anomaly. | +| Broken transformation | sandbox, deterministic limits, preview, DLQ deterministic failures. | +| Duplicate provider events | dedupe records, idempotency, duplicate visibility. | +| Out-of-order events | default unordered docs, optional scoped ordering, consumer guidance. | +| Customer endpoint outage | retries, circuit breaker, DLQ, endpoint health scoring. | +| Retry storm | exponential backoff, jitter, per-host/endpoint limits, circuit breakers. | +| Worker crash | leases, idempotent attempt recording, durable outbox. | +| Database outage | no accepted ack before persistence; readiness fails. | +| Object storage outage | DB fallback or fail closed before ack. | +| Insider replay abuse | RBAC, approvals, reason required, audit, rate limits. | +| Audit log tampering | append-only logs, hash chaining, external export/anchoring. | +| Secret leakage | redaction, encryption, one-time reveal, scanning, scoped secrets. | +| Plugin supply-chain compromise | signed plugins, sandbox, SBOM, approval workflow, no network by default. | + +--- + +# 29. Feature prioritization + +## MVP but production-respectable + +Must include: + +* PostgreSQL-backed durable ingestion. +* Raw body preservation. +* Stripe, GitHub, Shopify, Slack adapters. +* Generic HMAC adapter. +* Signature verification. +* Source management. +* Endpoint management. +* Outbound delivery with HMAC signatures. +* Delivery attempts/logs. +* Retry policies with exponential backoff/jitter. +* Deduplication records. +* Idempotency keys. +* Manual retry. +* Single-event replay. +* Dead-letter queue. +* SSRF-safe endpoint validation. +* Basic RBAC. +* Audit logs for config/replay/retry/secret changes. +* OpenAPI spec. +* Docker Compose. +* Basic admin UI. +* CLI basics. + +## Production-grade v1 + +* Bulk replay with dry-run. +* Route versioning. +* Transformation versioning. +* Schema registry. +* Endpoint health scoring. +* Circuit breakers. +* Per-tenant fairness. +* Object storage support. +* Prometheus metrics. +* Kubernetes Helm chart. +* Secret rotation grace periods. +* Customer-visible logs. +* Quarantine. +* More robust retention policies. + +## High-trust/auditable v2 + +* Evidence package export. +* Hash-chained audit logs. +* Config snapshot replay. +* Adapter approval workflow. +* Deterministic transformation sandbox. +* Advanced route explanations. +* Tamper-evident object storage mode. +* SIEM export. +* Replay approvals. +* Field-level payload redaction. +* Per-customer audit views. + +## Enterprise/compliance v3 + +* SSO/SAML/OIDC enterprise. +* SCIM. +* Advanced RBAC/ABAC. +* mTLS for endpoint delivery and producers. +* HSM/KMS integrations. +* Air-gapped license/builds. +* HA reference architectures. +* Multi-region active/passive. +* Compliance evidence exports. +* Legal hold. +* Long-term support releases. +* Private adapter registry. + +## Future / nice-to-have + +* Visual route builder. +* Marketplace adapters. +* Provider reconciliation jobs. +* Advanced anomaly detection. +* Terraform provider. +* Event catalog portal. +* GraphQL read API. +* Kafka/NATS advanced backend. +* Multi-region active-active. +* Workflow-lite actions, only if tightly scoped. + +Essential security, durability, replay, audit, idempotency, dedupe, and signature verification must not be delayed to “nice-to-have.” + +--- + +# 30. Open-source and commercial packaging + +## Community edition + +Keep free: + +* self-hosted single-node and Docker Compose, +* core inbound/outbound webhooks, +* raw payload preservation, +* signature verification, +* built-in core adapters, +* retries, +* manual replay, +* DLQ, +* basic audit logs, +* SSRF protection, +* OpenAPI, +* CLI, +* basic UI. + +Avoid crippleware: the free version must be safe and reliable. + +## Paid self-hosted edition + +Good paid features: + +* advanced admin UI, +* schema registry advanced compatibility, +* bulk replay controls, +* object storage retention management, +* advanced metrics dashboards, +* customer-visible portal, +* more provider adapters, +* SSO for teams, +* priority support. + +## Enterprise edition + +Enterprise features: + +* SAML/OIDC/SCIM, +* advanced RBAC/ABAC, +* audit hash chaining, +* evidence exports, +* air-gapped builds, +* LTS releases, +* HA deployment guides, +* private adapter registry, +* HSM/KMS integrations, +* compliance reports, +* legal hold, +* tenant federation, +* mTLS policy management. + +## Licensing + +Sane options: + +* Apache 2.0 core + commercial enterprise modules, or +* AGPL core + commercial license, if business wants SaaS-protection. + +For developer trust, Apache 2.0 core is friendlier. Enterprise modules can be source-available or commercial. + +## Support model + +* Community: docs, GitHub discussions, best-effort. +* Paid: ticket support, upgrade help. +* Enterprise: SLAs, security advisories, architecture review, emergency hotfixes, LTS. + +--- + +# 31. Final deliverables + +## Complete feature checklist + +### Reliability + +* [ ] Durable raw event persistence before ack. +* [ ] DB outbox. +* [ ] At-least-once delivery. +* [ ] Retry policies. +* [ ] Backoff with jitter. +* [ ] DLQ. +* [ ] Replay. +* [ ] Worker leases. +* [ ] Crash recovery. +* [ ] Endpoint health. +* [ ] Circuit breakers. +* [ ] Fair scheduling. + +### Security + +* [ ] Raw-body HMAC verification. +* [ ] Timestamp replay windows. +* [ ] Constant-time compare. +* [ ] Secret rotation. +* [ ] RBAC. +* [ ] Tenant isolation. +* [ ] SSRF protection. +* [ ] TLS enforcement. +* [ ] mTLS optional. +* [ ] Payload/header size limits. +* [ ] PII redaction. +* [ ] Audit logs. +* [ ] Plugin sandbox. + +### Transparency + +* [ ] Event search. +* [ ] Event timeline. +* [ ] Verification result. +* [ ] Dedupe result. +* [ ] Route explanation. +* [ ] Transformation preview. +* [ ] Delivery attempts. +* [ ] Response body truncation. +* [ ] Retry schedule. +* [ ] Replay receipts. +* [ ] Evidence exports. + +### Reproducibility + +* [ ] Adapter versions. +* [ ] Route versions. +* [ ] Transformation versions. +* [ ] Retry policy versions. +* [ ] Config snapshots. +* [ ] Payload hashes. +* [ ] Delivery request/response hashes. +* [ ] Deterministic replay modes. + +--- + +## Recommended endpoint list + +Core: + +```text +POST /v1/events +POST /v1/events:batch +GET /v1/events +GET /v1/events/{event_id} +GET /v1/events/{event_id}/timeline +GET /v1/events/{event_id}/raw +GET /v1/events/{event_id}/normalized +GET /v1/events/{event_id}/deliveries + +POST /v1/ingest/{tenant_id}/{source_id} +POST /v1/ingest/stripe/{source_id} +POST /v1/ingest/github/{source_id} +POST /v1/ingest/shopify/{source_id} +POST /v1/ingest/slack/{source_id} +POST /v1/ingest/generic/{source_id} +POST /v1/ingest/cloudevents/{source_id} + +POST /v1/sources +GET /v1/sources +GET /v1/sources/{source_id} +PATCH /v1/sources/{source_id} +DELETE /v1/sources/{source_id} +POST /v1/sources/{source_id}/secrets:rotate +POST /v1/sources/{source_id}/signature-tests + +POST /v1/endpoints +GET /v1/endpoints +GET /v1/endpoints/{endpoint_id} +PATCH /v1/endpoints/{endpoint_id} +DELETE /v1/endpoints/{endpoint_id} +POST /v1/endpoints/{endpoint_id}:test +POST /v1/endpoints/{endpoint_id}/secrets:rotate +POST /v1/endpoints:validate-url + +POST /v1/subscriptions +GET /v1/subscriptions +GET /v1/subscriptions/{subscription_id} +PATCH /v1/subscriptions/{subscription_id} +DELETE /v1/subscriptions/{subscription_id} + +POST /v1/routes +GET /v1/routes +GET /v1/routes/{route_id} +PATCH /v1/routes/{route_id} +DELETE /v1/routes/{route_id} +POST /v1/routes/{route_id}:activate +POST /v1/routes/{route_id}:dry-run +GET /v1/routes/{route_id}/versions + +GET /v1/deliveries +GET /v1/deliveries/{delivery_id} +GET /v1/deliveries/{delivery_id}/attempts +GET /v1/delivery-attempts/{attempt_id} +POST /v1/deliveries/{delivery_id}:retry +POST /v1/deliveries/{delivery_id}:cancel + +POST /v1/replay-jobs:dry-run +POST /v1/replay-jobs +GET /v1/replay-jobs +GET /v1/replay-jobs/{replay_job_id} +POST /v1/replay-jobs/{replay_job_id}:pause +POST /v1/replay-jobs/{replay_job_id}:resume +POST /v1/replay-jobs/{replay_job_id}:cancel +GET /v1/replay-jobs/{replay_job_id}/receipts + +GET /v1/dead-letter +GET /v1/dead-letter/{entry_id} +POST /v1/dead-letter/{entry_id}:release +POST /v1/dead-letter:bulk-release + +GET /v1/quarantine +GET /v1/quarantine/{entry_id} +POST /v1/quarantine/{entry_id}:approve +POST /v1/quarantine/{entry_id}:reject + +POST /v1/event-types +GET /v1/event-types +GET /v1/event-types/{event_type} +POST /v1/event-types/{event_type}/schemas +GET /v1/event-types/{event_type}/schemas +POST /v1/event-types/{event_type}/schemas/{schema_version}:validate +POST /v1/event-types/{event_type}/schemas/{schema_version}:check-compatibility + +GET /v1/adapters +GET /v1/adapters/{adapter_name} +POST /v1/adapters/custom +POST /v1/adapters/custom/{adapter_id}:test + +GET /healthz +GET /readyz +GET /v1/ops/workers +GET /v1/ops/queues +GET /v1/ops/metrics +GET /v1/ops/metrics/prometheus + +GET /v1/audit-events +POST /v1/audit-events:export +GET /openapi.json +GET /openapi.yaml +GET /.well-known/jwks.json +``` + +--- + +## Recommended database tables + +```text +tenants +users +memberships +api_keys +sources +source_secrets +provider_adapters +adapter_versions +endpoints +endpoint_secrets +event_types +event_schemas +events +raw_payloads +normalized_envelopes +provider_receipts +dedupe_records +subscriptions +routes +route_versions +filters +filter_versions +transformations +transformation_versions +deliveries +delivery_attempts +retry_policies +replay_jobs +replay_job_items +replay_receipts +dead_letter_entries +quarantine_entries +idempotency_records +signing_keys +verification_keys +audit_events +config_versions +retention_policies +outbox +worker_leases +metrics_rollups +alerts +``` + +--- + +## Recommended event envelope fields + +```text +envelope_version +id +tenant_id +source_id +source +provider +provider_event_id +type +subject +account_id +received_at +occurred_at +api_version +schema_version +adapter_version +raw_payload_id +raw_payload_hash +normalized_payload_hash +signature_verified +verification.method +verification.key_id +verification.timestamp +verification.replay_window_seconds +verification.result +deduplication_key +dedupe_status +trace_id +causation_id +correlation_id +replay_of +test_mode +metadata +data +provider_specific +``` + +--- + +## Recommended delivery attempt fields + +```text +id +tenant_id +delivery_id +event_id +endpoint_id +attempt_no +state +started_at +completed_at +duration_ms +worker_id +request_method +request_url_redacted +request_headers_redacted +request_body_hash +signature_key_id +response_status +response_headers_redacted +response_body_truncated +response_body_hash +network_error +failure_class +retryable +next_attempt_at +trace_id +``` + +--- + +## Recommended audit event fields + +```text +id +tenant_id +occurred_at +actor.type +actor.id +actor.ip +actor.user_agent +action +resource.type +resource.id +request_id +trace_id +reason +before_hash +after_hash +config_version +metadata +previous_audit_hash +audit_hash +``` + +--- + +## Recommended first provider adapters + +1. Stripe. +2. GitHub. +3. Shopify. +4. Slack. +5. Generic HMAC. +6. Generic JWT/JWS. +7. Generic CloudEvents. +8. Internal trusted producer. +9. Generic unsigned, clearly unsafe and disabled by default in production. + +--- + +## Recommended MVP architecture + +```text +[Ingress/API/UI] + | + v +[PostgreSQL] + - raw payloads or bytea + - events + - outbox + - deliveries + - attempts + - audit + | + v +[Worker pool] + - route evaluation + - delivery + - retry scheduling + | + v +[Customer endpoints] + +Optional: +[MinIO/S3-compatible object storage] for raw payloads +[Prometheus/Grafana] for metrics +``` + +MVP principle: PostgreSQL is the source of truth. Any queue is optimization, not authority. + +--- + +## Recommended production architecture + +```text + ┌─────────────────────┐ +Providers --->│ HA Ingress/API │ +Products --->│ Management API │ + └─────────┬───────────┘ + │ + v + ┌─────────────────────┐ + │ PostgreSQL HA │ + │ metadata/outbox │ + └─────────┬───────────┘ + │ + v + ┌─────────────────────┐ + │ Object Storage │ + │ raw/evidence exports │ + └─────────────────────┘ + + ┌─────────────────────┐ + │ Scheduler │ + │ fair queues/leases │ + └─────────┬───────────┘ + │ + ┌─────────────┴─────────────┐ + v v + ┌─────────────────┐ ┌─────────────────┐ + │ Delivery workers│ │ Replay workers │ + └────────┬────────┘ └────────┬────────┘ + v v + ┌─────────────────┐ ┌─────────────────┐ + │ Egress proxy │ │ Sandbox runtime │ + │ SSRF controls │ │ transformations │ + └────────┬────────┘ └─────────────────┘ + v + Customer endpoints +``` + +--- + +## Explicit non-goals + +* Exactly-once delivery. +* Global event ordering. +* Generic Kafka replacement. +* Zapier-style workflow automation. +* Temporal-style durable workflows. +* Arbitrary unbounded user code. +* Silent best-effort delivery. +* Hidden retry decisions. +* Unsigned production webhooks by default. +* Unsafe customer URL delivery without explicit policy and audit. + +--- + +## Critical risks + +| Risk | Why it matters | +|-------------------------------------|--------------------------------------------------| +| Raw body accidentally mutated | Breaks signature verification and trust. | +| Ack before durable persistence | Violates “never lose accepted webhook.” | +| SSRF bypass | Can expose cloud metadata/internal services. | +| Replay abuse | Can multiply customer side effects. | +| Queue unfairness | One tenant or broken endpoint can starve others. | +| Transformation sandbox escape | Supply-chain/security incident. | +| Overpromising ordering/exactly-once | Trust and legal risk. | +| Provider adapter drift | Verification/extraction breaks silently. | +| Audit logs mutable/deletable | Evidence loses value. | +| Response body over-capture | PII/secret leakage from customer endpoints. | +| Retry storm | Self-inflicted outage. | +| PostgreSQL overload | Source of truth becomes bottleneck. | + +--- + +## Open questions before implementation + +1. What is the first target scale: events/day, peak RPS, payload size, tenants, endpoints? +2. Should raw payloads live in PostgreSQL for MVP, or should object storage be required in production? +3. Which customers need archive-only mode versus relay/delivery mode first? +4. What retention defaults are acceptable for raw payloads containing PII? +5. Which event schemas are public customer contracts versus internal-only? +6. Should transformations be declarative-only in v1? +7. What is the first enterprise auth requirement: SSO, SCIM, mTLS, or KMS? +8. Should customer-visible logs expose raw response bodies, or only redacted/truncated bodies? +9. What replay approval threshold is appropriate? +10. Which provider adapter versions should be pinned at launch? +11. Is multi-tenant SaaS hosting planned later, or is the product strictly self-hosted? +12. What compliance targets matter: SOC 2 evidence, HIPAA, PCI adjacency, GDPR, financial audit? +13. How long must dedupe records live for financial events? +14. Should endpoint URLs to private networks ever be allowed, and under what enterprise controls? +15. What is the minimum viable admin UI versus CLI/API-only for early adopters? + +[1]: https://docs.stripe.com/webhooks "https://docs.stripe.com/webhooks" +[2]: https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries "https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries" +[3]: https://shopify.dev/docs/apps/build/webhooks/verify-deliveries "https://shopify.dev/docs/apps/build/webhooks/verify-deliveries" +[4]: https://docs.slack.dev/authentication/verifying-requests-from-slack "https://docs.slack.dev/authentication/verifying-requests-from-slack" +[5]: https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html" +[6]: https://www.rfc-editor.org/rfc/rfc9457.html "https://www.rfc-editor.org/rfc/rfc9457.html" +[7]: https://learn.openapis.org/examples/v3.1/webhook-example.html "https://learn.openapis.org/examples/v3.1/webhook-example.html" +[8]: https://github.com/cloudevents/spec/blob/main/cloudevents/spec.md "https://github.com/cloudevents/spec/blob/main/cloudevents/spec.md" +[9]: https://www.openapis.org/blog/2021/02/18/openapi-specification-3-1-released "https://www.openapis.org/blog/2021/02/18/openapi-specification-3-1-released" diff --git a/.test.env.example b/.test.env.example index 742ae92..719ec5a 100644 --- a/.test.env.example +++ b/.test.env.example @@ -1,5 +1,6 @@ WEBHOOKERY_TEST_DATABASE_URL=postgres://webhookery:change-me@localhost:5432/webhookery_test?sslmode=disable WEBHOOKERY_TEST_REDIS_ADDR=localhost:6379 +WEBHOOKERY_RC_RESTORE_DATABASE_URL= WEBHOOKERY_TEST_MASTER_KEY_BASE64=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= WEBHOOKERY_RAW_STORAGE_MODE=postgres WEBHOOKERY_OBJECT_STORAGE_ENDPOINT=localhost:9000 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..64b4b11 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,64 @@ +# Changelog + +All notable public release changes for Webhookery are recorded here. + +Webhookery follows the stability policy in `docs/stability.md`. Release notes +must distinguish implemented behavior from future intent and must preserve the +canonical non-claims in `docs/security-promise.md`. + +## v0.1.0-rc1 - 2026-05-27 + +Release status: release candidate for controlled, single-region, self-hosted +evaluation. + +### Added + +- Durable webhook evidence and delivery control plane implemented in Go with + API, worker, scheduler, migration, admin, and CLI entrypoints. +- PostgreSQL-first persistence with migrations, raw payload evidence, delivery + attempts, replay, DLQ, quarantine, retention, evidence exports, and audit + chain verification. +- Provider-aware ingestion for Stripe, GitHub, Shopify, Slack, generic HMAC/JWT, + CloudEvents, and internal producer events, backed by local conformance vectors. +- Versioned routes, subscriptions, retry policies, schemas, adapter evidence, + normalized envelopes, deterministic transformations, and delivery payload + snapshots. +- Provider reconciliation evidence for supported fake/local test paths and + explicit unsupported/unrecoverable provider gap evidence. +- Operational readiness surfaces: production doctor, release-candidate checks, + performance smoke, provider conformance checks, backup/restore scripts, + observability examples, alerts, notifications, SIEM signal egress, and + deployment profiles. +- Enterprise/self-hosting support foundations: API keys, OIDC/SCIM identity + lifecycle, scoped RBAC/ABAC, producer OAuth, producer mTLS metadata, endpoint + mTLS delivery, local/Vault/AWS-KMS-style secret custody interfaces, and + commercial governance docs. + +### Release Evidence + +- Local release evidence is generated with `make release-acceptance`. +- Core release-candidate checks are generated with `make rc-check`. +- DB-backed checks run when `WEBHOOKERY_TEST_DATABASE_URL` points at a + disposable PostgreSQL database. +- Release workflow artifacts include source/image SBOMs, image digest, Trivy + HIGH/CRITICAL scan result, provider conformance output, performance smoke + output, and release evidence summary. + +### Known Limits + +- This release candidate is not a hosted service. +- It is not a compliance certification, legal evidence certification, or + external audit attestation. +- Local acceptance checks use fake/local providers and receivers only. +- Provider reconciliation remains limited by provider API capabilities and + configured credentials. +- Operators remain responsible for production PostgreSQL, object storage, + network policy, TLS, backups, monitoring, and incident response. + +### Non-Claims + +- No exactly-once delivery claim. +- No provider-side event completeness guarantee. +- No guarantee that downstream business processing succeeded. +- No multi-region active-active guarantee. +- No live-provider acceptance claim. diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..b1d17dd --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,11 @@ +* @aatuh + +/.github/ @aatuh +/cmd/ @aatuh +/internal/ @aatuh +/migrations/ @aatuh +/openapi.yaml @aatuh +/docs/security-promise.md @aatuh +/docs/release-evidence-template.md @aatuh +/docs/provider-conformance.md @aatuh +/docs/provider-proof-manifest.json @aatuh diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..14ccd43 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,15 @@ +# Code Of Conduct + +Webhookery uses the Contributor Covenant Code of Conduct, version 2.1, as the +baseline for public project participation: +. + +Project-specific reporting route: + +- Security-sensitive reports: follow `SECURITY.md`. +- Conduct or moderation concerns: contact the maintainer through the private + channel listed in `SECURITY.md`. + +Do not include API keys, webhook secrets, bearer tokens, raw provider +signatures, raw payload bodies, private keys, customer data, or exploit +payloads in public issues, pull requests, discussions, or examples. diff --git a/COMMERCIAL.md b/COMMERCIAL.md index b3e073b..1f0a024 100644 --- a/COMMERCIAL.md +++ b/COMMERCIAL.md @@ -21,6 +21,23 @@ before relying on it. | Release evidence package | Yes | No | SBOMs, vulnerability scan results, OpenAPI checksum, acceptance evidence, and hardening notes. | | Custom integration work | Yes | No | Provider adapters, evidence workflows, deployment hardening, or receiver integration work. | +## Starting Ranges + +These ranges are public planning anchors, not a quote. Final pricing depends on +scope, deployment risk, provider mix, support expectations, and written +agreement. + +| Offer | Starting range | +| --- | ---: | +| Commercial Evaluation | EUR 490-1,000 | +| Release Evidence Package | EUR 2,500-5,000 | +| Production Readiness Review | EUR 7,500-12,500 | +| Commercial License + Support | EUR 9,900-24,900 per year | +| Custom Integration / Provider Adapter | Fixed scope or EUR 150-250/hour | + +See `docs/commercial-evaluation.md`, `docs/production-readiness-review.md`, and +`docs/support-packages.md` for buyer-readable scope boundaries. + ## Commercial License Scope A commercial license is a separate written agreement. It does not remove or @@ -38,10 +55,13 @@ Commercial terms can cover: - self-hosted deployment review, - operational runbook review. -Commercial terms do not imply exactly-once delivery, provider-side event -completeness, compliance certification, external timestamping, legal/regulatory -approval, managed-service availability, or recovery guarantees beyond the -written agreement. +Commercial terms do not broaden the canonical non-claims in +`docs/security-promise.md` unless the written agreement explicitly narrows +scope for that customer, deployment, or engagement. + +No SLA, compliance certification, legal evidence certification, hosted service, +or provider-side completeness guarantee is included unless a written agreement +explicitly says so. ## Contact diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1de1393..cbcafcf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -29,9 +29,11 @@ Before opening a change: 3. Preserve durable-capture-before-success semantics, exact raw-byte provider verification, tenant isolation, replay auditability, SSRF-safe endpoint handling, secret redaction, and at-least-once delivery language. -4. Do not introduce exactly-once delivery claims, provider-side event - completeness guarantees, compliance certification claims, live-provider - acceptance-test dependencies, or arbitrary transformation scripting. +4. Do not introduce claims broader than `docs/security-promise.md`, live- + provider acceptance-test dependencies, or arbitrary transformation scripting. + +For documentation changes, use the checklist in +`docs/documentation-maintenance.md` before opening a pull request. Useful checks: diff --git a/Dockerfile b/Dockerfile index c3394cf..41bc6be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.26-alpine AS build +FROM golang:1.26.4-alpine AS build WORKDIR /src COPY go.mod go.sum* ./ RUN go mod download diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 1ec9c89..c50641f 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -21,7 +21,8 @@ The maintainer has final decision authority over: - contribution acceptance, - release evidence requirements, - trademark and naming permission, -- claims and non-claims, +- claims and non-claims, with `docs/security-promise.md` as the canonical + reference, - provider support boundaries. Large changes should preserve the project’s core invariants: @@ -36,7 +37,8 @@ Large changes should preserve the project’s core invariants: - customer endpoint URLs are hostile input until validated and revalidated, - secrets, raw payloads, bearer/session tokens, provider credentials, private keys, and unnecessary PII are not logged or exported, -- compliance and legal-evidence language stays conservative. +- compliance and legal-evidence language stays conservative and aligned with + `docs/security-promise.md`. ## Commercial Boundary @@ -45,6 +47,6 @@ exceptions, support agreements, release evidence packages, and self-hosted support packages are handled outside the public issue tracker unless the maintainer explicitly chooses otherwise. -Commercial work does not imply exactly-once delivery, provider-side event -completeness, compliance certification, legal evidentiary certification, -external timestamping, or managed-service availability. +Commercial work does not broaden the canonical non-claims in +`docs/security-promise.md` unless a signed agreement explicitly narrows scope +for that engagement. diff --git a/Makefile b/Makefile index b41e56d..56075bf 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,9 @@ GOLANGCI_LINT_VERSION ?= v2.11.4 GOSEC_VERSION ?= v2.25.0 GOVULNCHECK_VERSION ?= v1.2.0 FUZZTIME ?= 5s +COVERAGE_MIN ?= 35.0 -.PHONY: help tools fmt lint vuln gosec test test-race coverage openapi-check test-vectors-check crypto-inventory deployment-profile-check collections-check meta-files-check fuzz-smoke sdk-generate sdk-check docs-check release-acceptance rc-check compose-up compose-down migrate postgres-integration-test redis-integration-test fast-check finalize clean +.PHONY: help tools fmt lint vuln gosec test test-race coverage coverage-check openapi-check openapi-reference-generate openapi-reference-check test-vectors-check provider-conformance-check provider-proof-check crypto-inventory deployment-profile-check collections-check documentation-structure-check failure-drills-check demo-media-check static-site-check meta-files-check release-assets-check fuzz-smoke perf-smoke demo-media restore-drill sdk-generate sdk-check docs-check release-acceptance rc-check compose-up compose-down migrate live-postgres-check postgres-integration-test redis-integration-test fast-check finalize clean help: ## Show help @awk 'BEGIN {FS=":.*## "}; /^[a-zA-Z0-9_.-]+:.*## / { printf " %-16s %s\n", $$1, $$2 }' $(MAKEFILE_LIST) @@ -40,13 +41,44 @@ coverage: ## Run tests with coverage @$(GO) test ./... -coverprofile=coverage.out @$(GO) tool cover -func=coverage.out +coverage-check: ## Enforce the local coverage floor + @$(GO) test ./... -coverprofile=coverage.out >/dev/null + @total="$$( $(GO) tool cover -func=coverage.out | awk '/^total:/ { gsub("%", "", $$3); print $$3 }' )"; \ + awk -v total="$$total" -v min="$(COVERAGE_MIN)" 'BEGIN { if ((total + 0) < (min + 0)) { printf "coverage %.1f%% is below %.1f%%\n", total, min > "/dev/stderr"; exit 1 } }'; \ + printf 'coverage %.1f%% meets %.1f%% floor\n' "$$total" "$(COVERAGE_MIN)" + openapi-check: ## Validate OpenAPI source and route contract smoke tests @test -f openapi.yaml @$(GO) test ./internal/adapters/httpapi -run 'TestOpenAPI|TestRoute' +openapi-reference-generate: ## Regenerate rendered OpenAPI reference artifacts + @$(GO) run ./scripts/openapi_reference.go \ + -input openapi.yaml \ + -html docs/openapi/index.html \ + -matrix docs/reference/api-contract-matrix.md \ + -summary docs/reference/openapi.md + +openapi-reference-check: ## Validate rendered OpenAPI reference artifacts are current + @tmp_dir="$$(mktemp -d)"; \ + trap 'rm -rf "$$tmp_dir"' EXIT; \ + $(GO) run ./scripts/openapi_reference.go \ + -input openapi.yaml \ + -html "$$tmp_dir/index.html" \ + -matrix "$$tmp_dir/api-contract-matrix.md" \ + -summary "$$tmp_dir/openapi.md"; \ + cmp -s "$$tmp_dir/index.html" docs/openapi/index.html || (printf '%s\n' "docs/openapi/index.html is stale; run make openapi-reference-generate" >&2; exit 1); \ + cmp -s "$$tmp_dir/api-contract-matrix.md" docs/reference/api-contract-matrix.md || (printf '%s\n' "docs/reference/api-contract-matrix.md is stale; run make openapi-reference-generate" >&2; exit 1); \ + cmp -s "$$tmp_dir/openapi.md" docs/reference/openapi.md || (printf '%s\n' "docs/reference/openapi.md is stale; run make openapi-reference-generate" >&2; exit 1) + test-vectors-check: ## Validate committed public audit test vectors @$(GO) test ./internal/provider -run TestProviderSignatureVectors +provider-conformance-check: ## Validate provider conformance matrix and local vectors + @scripts/provider_conformance_check.sh + +provider-proof-check: ## Validate manual live-provider proof guide freshness + @scripts/provider_proof_check.sh + crypto-inventory: ## Check crypto inventory evidence exists @grep -q "Webhook-Signature" openapi.yaml @grep -q "HMAC-SHA256" docs/operations.md @@ -55,10 +87,14 @@ crypto-inventory: ## Check crypto inventory evidence exists deployment-profile-check: ## Check deployment profile evidence and non-claims @grep -q "/readyz" openapi.yaml @grep -q "no FIPS/NIST/CMVP certification" docs/operations.md + @test -f docs/deployment.md @test -f deploy/kubernetes/kustomization.yaml @test -f deploy/kubernetes/secret.example.yaml + @test -f deploy/kubernetes/networkpolicy.example.yaml @test -f deploy/helm/webhookery/Chart.yaml @test -f deploy/helm/webhookery/values.yaml + @test -f deploy/helm/webhookery/values-production.example.yaml + @test -f deploy/observability/prometheus-rules.example.yaml @test -f deploy/terraform/webhookery-helm/main.tf @test -f deploy/terraform/webhookery-helm/README.md @terraform fmt -check -recursive deploy/terraform @@ -67,20 +103,223 @@ deployment-profile-check: ## Check deployment profile evidence and non-claims @grep -q "WEBHOOKERY_DATABASE_URL" deploy/kubernetes/secret.example.yaml @grep -q "WEBHOOKERY_DATABASE_URL" deploy/helm/webhookery/values.yaml @grep -q "helm_release" deploy/terraform/webhookery-helm/main.tf + @grep -q "docs/deployment.md" deploy/kubernetes/README.md + @grep -q "networkpolicy.example.yaml" deploy/kubernetes/README.md + @grep -q "docs/deployment.md" deploy/helm/webhookery/README.md + @grep -q "values-production.example.yaml" deploy/helm/webhookery/README.md + @grep -q "docs/deployment.md" deploy/terraform/webhookery-helm/README.md @grep -q "not accepted as module variables" deploy/terraform/webhookery-helm/README.md @test -x scripts/release_acceptance.sh @test -x scripts/backup_postgres.sh @test -x scripts/restore_postgres.sh + @test -x scripts/restore_drill.sh @grep -q "backup_postgres.sh" docs/operations.md @grep -q "restore_postgres.sh" docs/operations.md collections-check: ## Check committed API client collections + @test -f collections/README.md @test -f collections/postman/webhookery.postman_collection.json @test -f collections/bruno/Webhookery/bruno.json + @grep -q "Postman" collections/README.md + @grep -q "Bruno" collections/README.md + @grep -q "Webhook-Signature" collections/README.md @grep -q "collection/v2.1.0/collection.json" collections/postman/webhookery.postman_collection.json @grep -q "/v1/events" collections/bruno/Webhookery/events-list.bru @grep -q "/v1/audit-chain:verify" collections/bruno/Webhookery/audit-chain-verify.bru +documentation-structure-check: ## Check canonical documentation structure + @test -f CHANGELOG.md + @test -f docs/index.md + @test -f docs/reference/source-of-truth.md + @test -f docs/reference/openapi.md + @test -f docs/openapi/index.html + @test -f docs/reference/api-contract-matrix.md + @test -f docs/reference/release-evidence-index.md + @test -f docs/reference/release-validation.md + @test -f docs/evaluator-quickstart.md + @test -f docs/why-webhookery.md + @test -f docs/evidence-bundle-profiles.md + @test -f docs/use-cases/stripe-payment-investigation.md + @test -f docs/use-cases/github-automation-webhooks.md + @test -f docs/use-cases/shopify-order-webhooks.md + @test -f docs/use-cases/internal-integration-replay.md + @test -f docs/demo-media-checklist.md + @test -f docs/releases/v0.1.0-rc1.md + @test -f docs/releases/v0.2.0-pilot.md + @test -f docs/release-evidence-sample.md + @test -f docs/production-rc-checklist.md + @test -f docs/commercial-evaluation.md + @test -f docs/production-readiness-review.md + @test -f docs/support-packages.md + @test -f docs/comparisons/build-vs-buy.md + @test -f docs/comparisons/hookdeck.md + @test -f docs/comparisons/svix.md + @test -f docs/comparisons/convoy.md + @test -f docs/articles/exactly-once-webhooks.md + @test -f docs/articles/webhook-incident-report.md + @test -f docs/articles/webhook-failure-modes.md + @test -f docs/articles/self-hosted-webhook-gateway-architecture.md + @test -f docs/articles/webhook-security-review-checklist.md + @test -f docs/launch-copy.md + @test -f docs/launch-metrics.md + @test -f docs/customer-discovery-notes-template.md + @test -f docs/pilot-feedback-template.md + @test -f docs/roadmap-intake-policy.md + @test -f docs/pilot-review-checklist.md + @test -f .github/ISSUE_TEMPLATE/evaluator-feedback.yml + @test -f docs/configuration.md + @test -f docs/feature-behavior.md + @test -f docs/security-promise.md + @test -f docs/error-codes.md + @test -f docs/stability.md + @test -f docs/performance-envelope.md + @test -f docs/provider-conformance.md + @test -f docs/provider-conformance.manifest.json + @test -f docs/provider-proof-manifest.json + @test -f docs/providers/stripe.md + @test -f docs/providers/github.md + @test -f docs/providers/shopify.md + @test -f docs/live-provider-proof/stripe.md + @test -f docs/live-provider-proof/github.md + @test -f docs/live-provider-proof/shopify.md + @test -f docs/live-provider-proof/stripe-redaction-policy.md + @test -f docs/live-provider-proof/samples/stripe-incident-report.redacted.md + @test -f docs/live-provider-proof/samples/github-incident-report.redacted.md + @test -f docs/live-provider-proof/samples/shopify-incident-report.redacted.md + @test -f docs/day-2-operations.md + @test -f docs/observability.md + @test -f docs/failure-drills.md + @test -f docs/documentation-maintenance.md + @test -f docs/cli.md + @test -f docs/deployment.md + @test -f docs/schema-migrations.md + @test -f docs/security-review-package.md + @test -f docs/external-review-package.md + @test -f docs/external-review-scope.md + @test -f docs/external-review-findings-template.md + @test -f docs/external-review-accepted-risks.md + @test -f docs/release-evidence-template.md + @test -f release/current.json + @grep -q "Documentation Map" docs/index.md + @grep -q "Source Of Truth" docs/reference/source-of-truth.md + @grep -q "OpenAPI Reference" docs/reference/openapi.md + @grep -q "Webhookery API Contract Matrix" docs/reference/api-contract-matrix.md + @grep -q "Release Evidence Index" docs/reference/release-evidence-index.md + @grep -q "Release Validation" docs/reference/release-validation.md + @grep -q "webhookery-current-release.v1" release/current.json + @grep -q "Why Webhookery" docs/why-webhookery.md + @grep -q "Evidence Bundle Profiles" docs/evidence-bundle-profiles.md + @grep -q "Stripe Payment Investigation" docs/use-cases/stripe-payment-investigation.md + @grep -q "GitHub Automation Webhooks" docs/use-cases/github-automation-webhooks.md + @grep -q "Shopify Order Webhooks" docs/use-cases/shopify-order-webhooks.md + @grep -q "Internal Integration Replay" docs/use-cases/internal-integration-replay.md + @grep -q "docs/evaluator-quickstart.md" README.md + @grep -q "docs/why-webhookery.md" README.md + @grep -q "docs/reference/api-contract-matrix.md" README.md + @grep -q "docs/reference/release-evidence-index.md" README.md + @grep -q "release/current.json" README.md + @grep -q "docs/evidence-bundle-profiles.md" README.md + @grep -q "examples/webhook-evidence-demo" README.md + @grep -q "site/index.html" README.md + @grep -q "docs/commercial-evaluation.md" README.md + @grep -q "docs/production-rc-checklist.md" README.md + @grep -q "docs/releases/v0.2.0-pilot.md" README.md + @grep -q "docs/customer-discovery-notes-template.md" README.md + @grep -q "Evaluator Quickstart" docs/evaluator-quickstart.md + @grep -q "Demo Media Checklist" docs/demo-media-checklist.md + @grep -q "v0.1.0-rc1" CHANGELOG.md + @grep -q "release candidate" docs/releases/v0.1.0-rc1.md + @grep -q "exactly-once delivery" docs/releases/v0.1.0-rc1.md + @grep -q "provider-side event completeness" docs/releases/v0.1.0-rc1.md + @grep -q "v0.2.0 Pilot Readiness Checklist" docs/releases/v0.2.0-pilot.md + @grep -q "make provider-proof-check" docs/releases/v0.2.0-pilot.md + @grep -q "raw payload bodies" .github/ISSUE_TEMPLATE/evaluator-feedback.yml + @grep -q "roadmap-intake-policy.md" .github/ISSUE_TEMPLATE/evaluator-feedback.yml + @grep -q "no secrets" .github/ISSUE_TEMPLATE/evaluator-feedback.yml + @grep -q "Commercial Evaluation" docs/commercial-evaluation.md + @grep -q "Production Readiness Review" docs/production-readiness-review.md + @grep -q "Support Packages" docs/support-packages.md + @grep -q "Build Vs Buy" docs/comparisons/build-vs-buy.md + @grep -q "Verification date: 2026-06-04" docs/comparisons/hookdeck.md + @grep -q "Verification date: 2026-06-04" docs/comparisons/svix.md + @grep -q "Verification date: 2026-06-04" docs/comparisons/convoy.md + @grep -q "Exactly-Once Webhooks" docs/articles/exactly-once-webhooks.md + @grep -q "Building A Webhook Incident Report" docs/articles/webhook-incident-report.md + @grep -q "Webhook Failure Modes" docs/articles/webhook-failure-modes.md + @grep -q "Self-Hosted Webhook Gateway Architecture" docs/articles/self-hosted-webhook-gateway-architecture.md + @grep -q "Webhook Security Review Checklist" docs/articles/webhook-security-review-checklist.md + @grep -q "Release Evidence Sample" docs/release-evidence-sample.md + @grep -q "Production RC Checklist" docs/production-rc-checklist.md + @grep -q "Launch Copy Templates" docs/launch-copy.md + @grep -q "Launch Metrics Plan" docs/launch-metrics.md + @grep -q "Customer Discovery Notes Template" docs/customer-discovery-notes-template.md + @grep -q "Pilot Feedback Template" docs/pilot-feedback-template.md + @grep -q "Roadmap Intake Policy" docs/roadmap-intake-policy.md + @grep -q "Pilot Review Checklist" docs/pilot-review-checklist.md + @grep -q "Configuration Reference" docs/configuration.md + @grep -q "WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK" docs/configuration.md + @grep -q "Feature Behavior Reference" docs/feature-behavior.md + @grep -q "Security Promise" docs/security-promise.md + @grep -q "WEBHOOKERY_PROVIDER_SIGNATURE_INVALID" docs/error-codes.md + @grep -q "Stability And Compatibility Policy" docs/stability.md + @grep -q "Performance Envelope" docs/performance-envelope.md + @grep -q "Provider Conformance Matrix" docs/provider-conformance.md + @grep -q "docs/live-provider-proof/stripe.md" README.md + @grep -q "docs/live-provider-proof/github.md" README.md + @grep -q "docs/live-provider-proof/shopify.md" README.md + @grep -q "docs/live-provider-proof/stripe.md" docs/evaluator-quickstart.md + @grep -q "docs/live-provider-proof/github.md" docs/evaluator-quickstart.md + @grep -q "docs/live-provider-proof/shopify.md" docs/evaluator-quickstart.md + @grep -q "Stripe Operator Guide" docs/providers/stripe.md + @grep -q "GitHub Operator Guide" docs/providers/github.md + @grep -q "Shopify Operator Guide" docs/providers/shopify.md + @grep -q "not provider certification" docs/live-provider-proof/stripe.md + @grep -q "not provider certification" docs/live-provider-proof/github.md + @grep -q "not provider certification" docs/live-provider-proof/shopify.md + @grep -q "provider-proof-v1" docs/provider-proof-manifest.json + @grep -q "Day-2 Operations Guide" docs/day-2-operations.md + @grep -q "Observability Examples" docs/observability.md + @grep -q "Failure Drills" docs/failure-drills.md + @grep -q "Provider Claim Freshness" docs/documentation-maintenance.md + @grep -q "Documentation Review Checklist" docs/documentation-maintenance.md + @grep -q "CLI" docs/cli.md + @grep -q "doctor pilot --no-network" docs/cli.md + @grep -q "Pilot Doctor Runbook" docs/operations.md + @grep -q "Deployment Posture" docs/deployment.md + @grep -q "Schema And Migration Operations" docs/schema-migrations.md + @grep -q "docs/security-promise.md" docs/documentation-maintenance.md + @grep -q "External Review Scope Template" docs/external-review-scope.md + @grep -q "External Review Package" docs/external-review-package.md + @grep -q "External Review Findings Template" docs/external-review-findings-template.md + @grep -q "External Review Accepted Risks" docs/external-review-accepted-risks.md + +failure-drills-check: ## Check failure-drill scripts and sanitized plan generation + @sh -n scripts/failure_drills.sh + @sh -n scripts/demo_media.sh + @sh -n scripts/restore_drill.sh + @test -x scripts/failure_drills.sh + @test -x scripts/demo_media.sh + @test -x scripts/restore_drill.sh + @scripts/failure_drills.sh list | grep -q "downstream-receiver-fails" + @tmp_dir="$$(mktemp -d)"; scripts/failure_drills.sh plan --output "$$tmp_dir" >/dev/null; grep -q "postgres-unavailable-before-capture" "$$tmp_dir/failure-drills.md"; rm -rf "$$tmp_dir" + +demo-media-check: ## Check demo media script and sanitized outline generation + @sh -n scripts/demo_media.sh + @test -x scripts/demo_media.sh + @tmp_dir="$$(mktemp -d)"; scripts/demo_media.sh plan --output "$$tmp_dir" >/dev/null; grep -q "Webhookery Demo Media Script" "$$tmp_dir/demo-script.md"; grep -q "Do not record" "$$tmp_dir/demo-script.md"; rm -rf "$$tmp_dir" + +static-site-check: ## Check static landing page assets + @test -f site/index.html + @test -f site/styles.css + @test -f .github/workflows/site-pages.yml + @grep -q "Self-hosted webhook evidence infrastructure" site/index.html + @grep -q "Try the self-hosted quickstart" site/index.html + @grep -q "Request commercial evaluation" site/index.html + @grep -q "Review commercial options" site/index.html + @grep -q "No exactly-once delivery claim" site/index.html + @grep -q "github-pages" .github/workflows/site-pages.yml + @! grep -qi "&2; exit 1) @git ls-files --cached --others --exclude-standard .golangci.yml | grep -qx ".golangci.yml" || (printf '%s\n' ".golangci.yml must be trackable" >&2; exit 1) +release-assets-check: ## Smoke-test release asset packaging metadata + @bash -n scripts/release_assets.sh + @tmp_dir="$$(mktemp -d)"; \ + trap 'rm -rf "$$tmp_dir"' EXIT; \ + WEBHOOKERY_RELEASE_ASSET_PLATFORMS=linux/amd64 scripts/release_assets.sh v0.0.0-local "$$tmp_dir" "$$(git rev-parse HEAD)" >/dev/null; \ + test -f "$$tmp_dir/webhookery_v0.0.0-local_linux_amd64.tar.gz"; \ + test -f "$$tmp_dir/SHA256SUMS"; \ + test -f "$$tmp_dir/openapi.yaml"; \ + test -f "$$tmp_dir/openapi.sha256"; \ + test -f "$$tmp_dir/migrations.sha256"; \ + test -f "$$tmp_dir/release-check-summary.txt"; \ + test -f "$$tmp_dir/webhookery-release-manifest.json"; \ + test -f "$$tmp_dir/webhookery-release-provenance.json"; \ + test -f "$$tmp_dir/webhookery-release-provenance.intoto.jsonl"; \ + (cd "$$tmp_dir" && sha256sum -c SHA256SUMS >/dev/null); \ + grep -q "webhookery-release-manifest.v1" "$$tmp_dir/webhookery-release-manifest.json"; \ + grep -q "not exactly-once delivery proof" "$$tmp_dir/webhookery-release-manifest.json" + fuzz-smoke: ## Run short CI-safe fuzz/property smoke tests @$(GO) test ./internal/canonicaljson -run '^$$' -fuzz=Fuzz -fuzztime=$(FUZZTIME) @$(GO) test ./internal/adapters/httpapi -run '^$$' -fuzz=Fuzz -fuzztime=$(FUZZTIME) @$(GO) test ./pkg/verifier -run '^$$' -fuzz=Fuzz -fuzztime=$(FUZZTIME) @$(GO) test ./internal/random -run '^$$' -fuzz=Fuzz -fuzztime=$(FUZZTIME) +perf-smoke: ## Run DB-backed local performance smoke and write sanitized evidence + @scripts/perf_smoke.sh + +demo-media: ## Prepare deterministic local demo media state + @scripts/demo_media.sh run + +restore-drill: ## Run destructive restore drill against WEBHOOKERY_RESTORE_DRILL_DATABASE_URL + @scripts/restore_drill.sh + release-acceptance: ## Run v3.3 release acceptance evidence checks @scripts/release_acceptance.sh @@ -135,8 +460,11 @@ sdk-check: ## Validate committed SDK artifacts are present and aligned @test -f sdk/openapi.yaml @cmp -s openapi.yaml sdk/openapi.yaml @test -f sdk/README.md + @test -f sdk/examples/evidence-workflow-go/main.go + @test -f sdk/typescript/examples/evidence-workflow.ts @test -f pkg/client/client.go @$(GO) test ./pkg/client + @$(GO) test ./sdk/examples/evidence-workflow-go @test -f sdk/python/webhookery/__init__.py @PYTHONPATH=sdk/python python3 -m unittest discover -s sdk/python/tests @test -f sdk/typescript/src/index.ts @@ -145,12 +473,20 @@ sdk-check: ## Validate committed SDK artifacts are present and aligned docs-check: ## Run non-mutating documentation-adjacent checks @$(MAKE) openapi-check + @$(MAKE) openapi-reference-check @$(MAKE) test-vectors-check + @$(MAKE) provider-conformance-check + @$(MAKE) provider-proof-check @$(MAKE) sdk-check @$(MAKE) crypto-inventory @$(MAKE) deployment-profile-check @$(MAKE) collections-check + @$(MAKE) documentation-structure-check + @$(MAKE) failure-drills-check + @$(MAKE) demo-media-check + @$(MAKE) static-site-check @$(MAKE) meta-files-check + @$(MAKE) release-assets-check compose-up: ## Start local dependencies and API @docker compose up --build @@ -161,10 +497,12 @@ compose-down: ## Stop local dependencies migrate: ## Run Postgres migrations using DATABASE_URL @$(GO) run ./cmd/whcp migrate -dir migrations up -postgres-integration-test: ## Run live Postgres migration and store integration tests +live-postgres-check: ## Run the live Postgres quality gate using WEBHOOKERY_TEST_DATABASE_URL @test -n "$$WEBHOOKERY_TEST_DATABASE_URL" || (printf '%s\n' "WEBHOOKERY_TEST_DATABASE_URL is required; start postgres with docker compose up -d postgres" >&2; exit 2) @$(GO) test ./internal/adapters/postgres -run 'TestPostgres|TestMigration' -count=1 +postgres-integration-test: live-postgres-check ## Compatibility alias for live-postgres-check + redis-integration-test: ## Run live Redis edge-store integration tests @test -n "$$WEBHOOKERY_TEST_REDIS_ADDR" || (printf '%s\n' "WEBHOOKERY_TEST_REDIS_ADDR is required; start redis with docker compose up -d redis" >&2; exit 2) @$(GO) test ./internal/adapters/redisstore -run 'TestRedisStoreIntegration' -count=1 @@ -172,11 +510,17 @@ redis-integration-test: ## Run live Redis edge-store integration tests fast-check: ## Run non-mutating checks @$(GO) test ./... @$(MAKE) openapi-check + @$(MAKE) openapi-reference-check @$(MAKE) test-vectors-check @$(MAKE) crypto-inventory @$(MAKE) deployment-profile-check @$(MAKE) collections-check + @$(MAKE) documentation-structure-check + @$(MAKE) failure-drills-check + @$(MAKE) demo-media-check + @$(MAKE) static-site-check @$(MAKE) meta-files-check + @$(MAKE) release-assets-check @$(MAKE) sdk-check finalize: ## Thorough validity check @@ -187,11 +531,17 @@ finalize: ## Thorough validity check @$(MAKE) test @$(MAKE) test-race @$(MAKE) openapi-check + @$(MAKE) openapi-reference-check @$(MAKE) test-vectors-check @$(MAKE) crypto-inventory @$(MAKE) deployment-profile-check @$(MAKE) collections-check + @$(MAKE) documentation-structure-check + @$(MAKE) failure-drills-check + @$(MAKE) demo-media-check + @$(MAKE) static-site-check @$(MAKE) meta-files-check + @$(MAKE) release-assets-check @$(MAKE) sdk-check clean: ## Clean local test artifacts diff --git a/README.md b/README.md index 5e01fd2..c4fd07a 100644 --- a/README.md +++ b/README.md @@ -1,243 +1,238 @@ # Webhookery -Webhookery is a self-hosted webhook evidence and delivery control plane. The -MVP implementation in this repository is PostgreSQL-first and API/CLI-first: -it captures raw webhook evidence before acknowledging providers, verifies -provider signatures using exact bytes, stores dedupe and audit evidence, and -delivers outbound webhooks with at-least-once semantics. - -This repository is now implementation-bearing. `.initial_design.md` remains the -product design reference; `openapi.yaml`, `migrations/`, `cmd/`, `internal/`, -and `pkg/` are the implementation sources for their areas. - -## Local Development +[![CI](https://github.com/aatuh/webhookery/actions/workflows/ci.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/ci.yml) +[![Security](https://github.com/aatuh/webhookery/actions/workflows/security.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/security.yml) +[![Integration](https://github.com/aatuh/webhookery/actions/workflows/integration.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/integration.yml) +[![Fuzz](https://github.com/aatuh/webhookery/actions/workflows/fuzz.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/fuzz.yml) +[![CodeQL](https://github.com/aatuh/webhookery/actions/workflows/codeql.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/codeql.yml) +[![OpenSSF Scorecard](https://github.com/aatuh/webhookery/actions/workflows/scorecard.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/scorecard.yml) +[![Release](https://github.com/aatuh/webhookery/actions/workflows/release.yml/badge.svg)](https://github.com/aatuh/webhookery/actions/workflows/release.yml) +[![License: AGPL-3.0-only](https://img.shields.io/badge/license-AGPL--3.0--only-blue.svg)](LICENSE) +![Go Version](https://img.shields.io/badge/go-1.25.11+-00ADD8.svg) +![OpenAPI](https://img.shields.io/badge/OpenAPI-214%20operations-brightgreen.svg) +![Coverage Gate](https://img.shields.io/badge/local%20coverage-35%25+-yellow.svg) + +Audit-grade webhook capture, replay, and evidence -- self-hosted. + +Webhookery durably captures provider webhooks before acknowledging them, +verifies signatures, records delivery attempts, supports governed replay, and +exports verifiable evidence when integrations fail. + +Website: + +The product promise is narrow by design: Webhookery must not return inbound +success before durable capture, loss boundaries must be explicit, and replay, +recovery, and audit evidence must be first-class. It is built for teams that +need to prove what arrived, what failed, what was replayed, and what evidence +remains without pretending that delivery can be exactly once. + +Start here if you are evaluating Webhookery: + +- Why Webhookery: `docs/why-webhookery.md` +- Evaluator walkthrough: `docs/evaluator-quickstart.md` +- Local evidence demo: `examples/webhook-evidence-demo/` +- Stripe proof guide: `docs/live-provider-proof/stripe.md` +- GitHub proof guide: `docs/live-provider-proof/github.md` +- Shopify proof guide: `docs/live-provider-proof/shopify.md` +- Static product page: `site/index.html` +- Rendered OpenAPI reference: `docs/openapi/index.html` +- API contract matrix: `docs/reference/api-contract-matrix.md` +- Release notes: `docs/releases/v0.1.0-rc1.md` +- Release evidence index: `docs/reference/release-evidence-index.md` +- Current public release metadata: `release/current.json` +- v0.2 pilot checklist: `docs/releases/v0.2.0-pilot.md` +- Pilot topology: `docs/pilot-topology.md` +- Commercial evaluation: `docs/commercial-evaluation.md` + +## Implementation Status + +This repository is implementation-bearing. The current codebase includes: + +- Go API, worker, scheduler, and `whcp` CLI entrypoints under `cmd/`. +- Domain, application, HTTP, persistence, provider, delivery, audit-chain, + SSRF, retry, transformation, and configuration code under `internal/`. +- Public helper packages under `pkg/client` and `pkg/verifier`. +- `openapi.yaml` as the canonical REST contract, with `sdk/openapi.yaml` as + the committed SDK-ready copy and `docs/openapi/index.html` as the rendered + reference artifact. +- PostgreSQL migrations under `migrations/`. +- Docker Compose, Kubernetes, Helm, and Terraform deployment profiles. +- SDK artifacts, Postman and Bruno smoke collections, and CI workflows. + +`.initial_design.md` is historical design input and architecture rationale. It +does not prove implemented behavior. Use code, OpenAPI, migrations, deployment +profiles, and canonical docs as the source of truth for current behavior. + +## Local Quickstart + +Prerequisites: Go, Docker, and Docker Compose. + +For the evidence-first path, run the failed-payment demo: ```bash -cp .env.example .env -docker compose up --build +docker compose up -d postgres +export WEBHOOKERY_TEST_DATABASE_URL='postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable' +examples/webhook-evidence-demo/run.sh ``` -The example bootstrap key for local development is `dev-bootstrap-key`. Create a -database-backed API key immediately and then remove or rotate the bootstrap -hash in production-style environments. +Expected result: `examples/webhook-evidence-demo/output/` contains a sanitized +incident report, evidence manifest, verification output, and local evidence +bundle for a failed downstream delivery followed by replay. -Useful commands: +For a short API smoke path: ```bash -make test -make fast-check -go run ./cmd/whcp migrate up -go run ./cmd/whcp api -go run ./cmd/whcp api-keys create --api-key dev-bootstrap-key --name local-operator --role owner --scopes '*' -go run ./cmd/whcp events list --base-url http://localhost:8080 --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp events get --event-id evt_... --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp events timeline --event-id evt_... --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp events normalized --event-id evt_... --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp events raw-export --event-id evt_... --output payload.bin --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp transformations create --name redact-email --operations-file operations.json --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp transformations dry-run --payload-file payload.json --operations-file operations.json -go run ./cmd/whcp audit export --include-timelines --include-payloads --reason "support case" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit export-status --export-id exp_... --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit download --export-id exp_... --output evidence.tar.gz --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit chain-head --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit anchor --reason "daily anchor" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit anchors --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit verify-bundle --file evidence.tar.gz -go run ./cmd/whcp retention create --resource-type raw_payload --retention-days 30 --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp retention update --policy-id ret_... --legal-hold --hold-reason "customer legal request" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp provider-connections create --name stripe-prod --provider stripe --credential "$STRIPE_API_KEY" --config source_id=src_stripe --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp provider-connections verify --connection-id pcn_... --reason "initial credential check" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp adapters create --name acme-hmac --kind declarative --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp adapters version-create --adapter-id pad_... --version 2026-05-01 --definition-file acme-adapter.json --reason "upload declarative adapter" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp adapters transition --adapter-id pad_... --version-id adv_... --action request_review --reason "ready for security review" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp sources update --source-id src_... --state disabled --reason "retire old webhook" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp endpoints update --endpoint-id end_... --url https://receiver.example/webhook --reason "move receiver" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp endpoints delete --endpoint-id end_... --reason "retire old receiver" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp subscriptions update --subscription-id sub_... --event-types invoice.paid,invoice.updated --reason "narrow fanout" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp subscriptions delete --subscription-id sub_... --reason "retire fanout" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp routes update --route-id rte_... --priority 10 --reason "prefer primary receiver" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp routes delete --route-id rte_... --reason "retire route" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp reconciliation-jobs dry-run --connection-id pcn_... --from 2026-05-25T00:00:00Z --to 2026-05-25T12:00:00Z --capture-missing --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp reconciliation-jobs create --connection-id pcn_... --capture-missing --route-recovered --reason "recover missing Stripe events" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp reconciliation-jobs items --job-id rec_... --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp retry-policies create --name standard --max-attempts 12 --max-duration-seconds 259200 --initial-delay-seconds 10 --max-delay-seconds 21600 --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp routes create --source-id src_... --endpoint-id end_... --event-types invoice.paid --retry-policy-id rtp_... -go run ./cmd/whcp routes versions --route-id rte_... --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp retry-policies update --retry-policy-id rtp_... --max-attempts 8 --reason "tune retries" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp retry-policies delete --retry-policy-id rtp_... --reason "retire retry policy" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp sources rotate-secret --source-id src_... --secret whsec_next --reason "scheduled rotation" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp endpoints rotate-secret --endpoint-id end_... --reason "scheduled rotation" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp endpoints create --name mtls-receiver --url https://receiver.example/webhook --mtls-client-cert-file client.crt --mtls-client-key-file client.key --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp schemas event-type-update --name invoice.paid --description "Invoice paid events" --reason "clarify contract" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp schemas schema-update --name invoice.paid --version 2026-05-01 --state deprecated --reason "replace with 2026-06-01" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp schemas validate --name invoice.paid --version 2026-05-01 --payload-file payload.json --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp schemas schema-get --name invoice.paid --version 2026-05-01 --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp schemas check-compat --name invoice.paid --version 2026-05-01 --new-schema-file schema-next.json --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp endpoints test --endpoint-id end_... --reason "verify receiver" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp deliveries retry --delivery-id del_... --reason "operator retry" -go run ./cmd/whcp replay-jobs create --event-id evt_... --config-mode original --rate-limit-per-minute 60 --require-approval --reason "customer replay request" -go run ./cmd/whcp replay-jobs approve --replay-job-id rpl_... --reason "approved replay window" -go run ./cmd/whcp ops metrics --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp ops rollups --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp ops storage --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp ops config --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp ops workers --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp ops queues --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp key-custody test -go run ./cmd/whcp producer-clients create --name billing-producer --source-id src_internal --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp producer-clients rotate-secret --client-id pcl_... --reason "scheduled rotation" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp producer-mtls-identities create --name billing-cert --source-id src_internal --cert-file producer.crt --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp producer-mtls-identities verify --identity-id pmi_... --cert-file producer.crt --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp alerts create --name dlq-open --rule-type dead_letter_open --threshold 1 --reason "page on DLQ growth" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp notification-channels create --name ops-webhook --url https://ops.example/hooks/webhookery --signing-secret "$SIGNAL_SECRET" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp alerts update --alert-id alr_... --channel-ids nch_... --reason "send DLQ pages" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp alerts firings --state open --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp alerts ack --firing-id alf_... --reason "operator investigating" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp notification-deliveries list --state failed --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp siem-sinks create --name audit-stream --url https://siem.example/ingest --signing-secret "$SIEM_SIGNAL_SECRET" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp siem-deliveries list --state failed --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp identity-providers create --name okta --issuer-url https://idp.example.com --client-id "$OIDC_CLIENT_ID" --client-secret "$OIDC_CLIENT_SECRET" --redirect-uri https://webhookery.example/v1/auth/oidc/callback --allowed-email-domains example.com --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp scim-tokens create --name okta-scim --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp role-bindings create --principal-type user --principal-id usr_... --role auditor --resource-family audit --environment production --reason "audit team access" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp access-policies create --name deny-prod-raw --action events:raw --effect deny --resource-family event --environment production --reason "limit raw payload exposure" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp authz explain --actor-id usr_... --action events:raw --resource-family event --resource-id evt_... --environment production --api-key "$WEBHOOKERY_API_KEY" -scripts/backup_postgres.sh backups -WEBHOOKERY_RESTORE_CONFIRM=restore scripts/restore_postgres.sh backups/webhookery-20260525T000000Z.dump -go run ./cmd/whcp doctor production -helm lint deploy/helm/webhookery -terraform fmt -check -recursive deploy/terraform -make release-acceptance -make rc-check -make collections-check +cp .env.example .env +docker compose up --build ``` -Set `WEBHOOKERY_ENABLE_UI=true` to expose the minimal operator console at `/`. -The UI keeps the entered API key in browser memory only and calls the same -tenant-scoped REST API as the CLI. - -Raw payload bodies are stored in PostgreSQL by default. To use S3-compatible -storage, set `WEBHOOKERY_RAW_STORAGE_MODE=s3` plus the -`WEBHOOKERY_OBJECT_STORAGE_*` variables. In S3 mode, inbound success requires -the object write and PostgreSQL metadata commit to both succeed. - -Webhook/source secrets, endpoint signing secrets, provider credentials, and -outbound mTLS private keys use local AES envelope encryption by default via -`WEBHOOKERY_SECRET_BOX_MODE=local` and `WEBHOOKERY_MASTER_KEY_BASE64`. -Operators that already run Vault Transit can set -`WEBHOOKERY_SECRET_BOX_MODE=vault-transit` with `WEBHOOKERY_VAULT_ADDR`, -`WEBHOOKERY_VAULT_TOKEN`, and `WEBHOOKERY_VAULT_TRANSIT_KEY`; PostgreSQL then -stores wrapped Vault ciphertext instead of locally encrypted ciphertext for -new secret writes. Operators using AWS KMS can set -`WEBHOOKERY_SECRET_BOX_MODE=aws-kms` with `WEBHOOKERY_AWS_REGION`, -`WEBHOOKERY_AWS_KMS_KEY_ID`, and optional `WEBHOOKERY_AWS_KMS_ENDPOINT`. -AWS KMS mode uses envelope encryption with generated data keys; it does not -directly KMS-encrypt large secret values and it does not automatically -re-encrypt rows written by local or Vault modes. `whcp key-custody test` -checks the configured mode without printing plaintext, ciphertext, or full key -ids. - -Run `go run ./cmd/whcp doctor production` before promoting a self-hosted -deployment. The doctor reads environment/configuration only, prints -`blocker`, `warning`, and `ok` findings, and returns non-zero when blockers -remain. It never prints database passwords, API keys, webhook secrets, Vault -tokens, AWS credentials, raw KMS key ids, or object-store credentials. -Warnings are operator review items; blockers are unsafe or incomplete -production settings. - -Product event producers can use API keys with `events:write`, OAuth client -credentials from `/v1/producer-clients`, or app-verified producer mTLS -identities from `/v1/producer-mtls-identities`. Source-bound producer -credentials must match the `source_id` in the `POST /v1/events` body. Producer -client secrets and OAuth access tokens are stored only as hashes, and mTLS -identity records store certificate metadata only, never private keys. Producer -mTLS requires app-side TLS configuration with -`WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE`; proxy-supplied certificate headers -are not trusted. - -Verified events also get canonical normalized envelope records. Routes and -subscriptions can reference deterministic JSON Pointer transformations; new -deliveries snapshot the exact outbound payload bytes and sign those stored -bytes. Transformation payloads may contain PII, so body reads and payload-body -exports require `events:raw`. - -Private adapter registry governance is available through `/v1/adapters` and -`whcp adapters`. Declarative adapter definitions and plugin package metadata -are tenant-scoped, versioned, hashed, audited, and moved through the approval -workflow before activation. Active declarative HMAC-SHA256 adapters can verify -inbound requests using exact raw bytes and configured replay windows. -Code-plugin packages are recorded for review only; Webhookery does not execute -arbitrary plugin code. - -Provider reconciliation jobs compare provider-side API evidence to local -Webhookery evidence when provider APIs permit it. Stripe event reconciliation -can capture recoverable missing events as `provider_api_reconciliation` -evidence; GitHub repository webhook reconciliation can compare delivery GUIDs -and request redelivery for failed deliveries. Shopify and Slack currently -record capability/limitation evidence instead of claiming generic missed-event -recovery. Recovered events are not marked as signed webhooks and route only -when `route_recovered=true`. - -Audit events are written through a tenant-scoped SHA-256 hash chain. Existing -audit rows are backfilled into deterministic chain entries during startup, and -new audit writes append the audit row and chain entry in one transaction. -Evidence exports include `audit_chain_proof.jsonl`; `whcp audit verify-bundle` -checks bundle file hashes and chain continuity locally. - -Alert notification channels send signed generic HTTPS callbacks for alert -open, acknowledged, and resolved transitions. Channel signing secrets are -encrypted at rest and never returned by API, CLI, or UI responses. Signal -payloads contain alert metadata only and are signed with -`Webhookery-Signal-Timestamp` plus `Webhookery-Signal-Signature`. - -SIEM sinks stream chained audit-event metadata as signed HTTPS JSONL batches. -Each sink cursor advances only after a successful delivery, so failed SIEM -egress retries without skipping audit-chain entries. SIEM payloads exclude raw -webhook bodies, provider headers, API keys, bearer tokens, and egress secrets. - -Retry scheduling records reproducibility evidence: deliveries carry a stored -`retry_seed`, and retryable attempts record the deterministic jitter delay and -next retry timestamp used by the worker. - -For local MinIO testing: +In another shell: ```bash -docker compose --profile object-storage up --build +curl -fsS http://localhost:8080/readyz +export WEBHOOKERY_API_KEY=dev-bootstrap-key +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" ``` -For Kubernetes, start from `deploy/kubernetes/README.md`. The manifests expect -external PostgreSQL and a separately managed `webhookery-secrets` Secret; they -do not install ingress, TLS, PostgreSQL, or object storage. - -Postman and Bruno request collections are committed under `collections/`. -The `pkg/client` package provides a small Go REST client for producer event -ingestion and audit-chain verification; `pkg/verifier` remains the receiver -signature verification helper. - -## Security Promise - -Webhookery does not promise exactly-once delivery. Inbound success means the -platform durably captured raw request evidence and verification metadata. Every -loss boundary, duplicate, replay, and delivery attempt is intended to remain -visible and auditable. +Expected result: readiness returns success, the CLI can authenticate with the +local bootstrap key, and audit-chain verification returns a JSON result. + +The local bootstrap key is for development only. Create a database-backed API +key immediately and remove or rotate the bootstrap hash before any +production-style use. + +Use `docs/evaluator-quickstart.md` for the guided evidence-loop walkthrough, +expected output, troubleshooting, and non-claims. + +## Short Smoke Paths + +- Local API and worker: `docker compose up --build`, then `/readyz`. +- Non-mutating docs and contract gate: `make docs-check`. +- Provider conformance matrix and local vectors: `make provider-conformance-check`. +- Manual provider-proof metadata: `make provider-proof-check`. +- Full repository gate: `make finalize`. +- Redacted production preflight: + `WEBHOOKERY_ENVIRONMENT=production go run ./cmd/whcp doctor production`. +- Redacted pilot preflight: + `go run ./cmd/whcp doctor pilot --no-network`. +- Disposable live database gate: + `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check`. +- Release-candidate acceptance: + `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check`. +- Postman and Bruno smoke collections: see `collections/`. ## Production RC Readiness -The production-respectable target for this repository is a single-region -self-hosted release candidate. Before promoting a deployment: - -1. Run `go run ./cmd/whcp doctor production`; production blockers must be - fixed before promotion. -2. Run `make finalize`; all unit, contract, SDK, vulnerability, gosec, and race - checks must pass. -3. Run `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check` against a - disposable PostgreSQL database; it should end with - `release-candidate acceptance checks passed`. -4. Run the restore drill with a separate disposable restore database by setting - `WEBHOOKERY_RC_RESTORE_DATABASE_URL=postgres://...` before `make rc-check`. -5. Confirm the operator runbooks in `docs/operations.md` match the deployed - storage, key-custody, TLS, and retention configuration. - -This RC scope does not claim exactly-once delivery, multi-region active-active -operation, external timestamping, compliance certification, managed provider -availability, or live third-party recovery guarantees. +Use `docs/operations.md`, `docs/day-2-operations.md`, `docs/stability.md`, and +`docs/release-evidence-template.md` as the canonical release-candidate +readiness path. The short version is: run the production doctor, `make +finalize`, live PostgreSQL checks against a disposable database, `make +rc-check`, and restore drills when migrations or evidence storage are touched. +Use `docs/observability.md` for starter Prometheus rules and dashboards. Do +not use live provider or customer credentials for local acceptance gates. + +Release-candidate details live in `docs/releases/v0.1.0-rc1.md`. Release +evidence requirements live in `docs/release-evidence-template.md`, with a +reader-facing example in `docs/release-evidence-sample.md`, the current public +artifact map in `docs/reference/release-evidence-index.md`, and a concise +operator checklist in `docs/production-rc-checklist.md`. + +## Security Promise And Non-Claims + +See `docs/security-promise.md` for the canonical promise and non-claims. +In short: inbound success means durable capture and verification metadata were +recorded; it does not mean downstream business processing succeeded. + +Examples in this repository use placeholders or local development values. Do +not put real API keys, provider credentials, webhook secrets, bearer tokens, +private keys, raw signatures, raw payload bodies, or customer data into docs, +commits, issues, support requests, or audit artifacts. + +Commercial license exceptions, evaluation packages, production-readiness +reviews, and support package boundaries are described in `COMMERCIAL.md`, +`docs/commercial-evaluation.md`, `docs/production-readiness-review.md`, and +`docs/support-packages.md`. + +## Primary Docs + +- `docs/index.md`: canonical documentation map by audience, purpose, and + source-of-truth boundary. +- `docs/why-webhookery.md`: product wedge and fit/non-fit explanation. +- `docs/configuration.md`: canonical environment variable and secret handling + reference. +- `docs/reference/source-of-truth.md`: public source-of-truth map for release, + API, workflow, deployment, and documentation artifacts. +- `docs/reference/openapi.md`: rendered OpenAPI and API contract matrix + reference. +- `docs/reference/api-contract-matrix.md`: generated operation matrix from + `openapi.yaml`. +- `docs/reference/release-evidence-index.md`: public release artifact map and + verification notes. +- `docs/reference/release-validation.md`: release validation and evidence + workflow. +- `docs/evaluator-quickstart.md`: guided local evaluator walkthrough. +- `examples/webhook-evidence-demo/`: deterministic local fake-provider and + fake-receiver evidence demo. +- `site/index.html`: static product landing page. +- `docs/operations.md`: operator runbooks and production RC procedures. +- `docs/failure-drills.md`: local and pilot failure-drill plan, script usage, + and restore-drill evidence rules. +- `docs/feature-behavior.md`: behavior reference for capture, routing, + delivery, replay, reconciliation, retention, identity, producer trust, and + SSRF. +- `docs/security-promise.md`: canonical durable-capture promise and + non-claims. +- `docs/error-codes.md`: stable API/CLI error-code reference. +- `docs/stability.md`: compatibility, support-window, migration, and + deprecation policy. +- `docs/performance-envelope.md`: performance smoke usage, capacity inputs, + storage growth, and sizing caveats. +- `docs/documentation-maintenance.md`: provider freshness and documentation + review checklist. +- `docs/provider-conformance.md`: provider matrix, local vector evidence, and + links to manual live-provider proof guides. +- `docs/providers/stripe.md` and `docs/providers/github.md`: operator guides + for the first flagship providers. +- `docs/providers/shopify.md`: operator guide for the first ecommerce + follow-up provider. +- `docs/live-provider-proof/stripe.md` and + `docs/live-provider-proof/github.md`: manual sanitized proof guides. +- `docs/live-provider-proof/shopify.md`: manual sanitized Shopify proof guide. +- `docs/deployment.md`: common self-hosted deployment posture. +- `docs/pilot-topology.md`: narrow supported topology for initial pilots. +- `docs/pilot-evidence-template.md`: sanitized pilot evidence packet template. +- `docs/evidence-bundle-profiles.md`: safe bundle profile policy for support, + security review, commercial evaluation, and internal forensics. +- `docs/use-cases/stripe-payment-investigation.md`, + `docs/use-cases/github-automation-webhooks.md`, + `docs/use-cases/shopify-order-webhooks.md`, and + `docs/use-cases/internal-integration-replay.md`: story-led evaluation + workflows tied to incident packets. +- `docs/schema-migrations.md`: schema review, migration ordering, and restore + compatibility guidance. +- `docs/security-review-package.md`: security reviewer artifact map. +- `docs/external-review-package.md`: external review package index. +- `docs/release-evidence-template.md`: canonical release evidence template. +- `docs/production-rc-checklist.md`: release-candidate readiness checklist. +- `docs/releases/v0.1.0-rc1.md`: first release-candidate notes. +- `docs/releases/v0.2.0-pilot.md`: pilot-readiness launch checklist. +- `docs/demo-media-checklist.md`: safe screenshots/video checklist. +- `docs/customer-discovery-notes-template.md`, + `docs/pilot-feedback-template.md`, `docs/roadmap-intake-policy.md`, and + `docs/pilot-review-checklist.md`: evaluator and pilot feedback discipline. +- `.github/ISSUE_TEMPLATE/evaluator-feedback.yml`: public sanitized feedback + issue form. +- `docs/commercial-evaluation.md`, `docs/production-readiness-review.md`, and + `docs/support-packages.md`: commercial evaluation and support boundaries. +- `docs/cli.md`: CLI command reference and moved command catalog. +- `sdk/README.md`: committed SDK artifact guidance. +- `collections/README.md`: Postman and Bruno smoke request usage. +- `deploy/kubernetes/README.md`, `deploy/helm/webhookery/README.md`, and + `deploy/terraform/webhookery-helm/README.md`: deployment profile notes. +- `SECURITY.md`, `CONTRIBUTING.md`, `GOVERNANCE.md`, `SUPPORT.md`, + `CODE_OF_CONDUCT.md`, `CODEOWNERS`, `COMMERCIAL.md`, and `TRADEMARKS.md`: + project policy docs. + +Run `make help` for the project-owned command list. Keep README examples short; +put detailed command workflows in the relevant canonical docs. diff --git a/RELEASE_EVIDENCE.md b/RELEASE_EVIDENCE.md index a4a5a26..a5f7d0b 100644 --- a/RELEASE_EVIDENCE.md +++ b/RELEASE_EVIDENCE.md @@ -1,31 +1,30 @@ # Release Evidence -Webhookery release evidence is an operator and reviewer artifact. It records -the exact commit, tag, image digest, checks, SBOMs, vulnerability scans, OpenAPI -checksum, migration state, production-doctor output, and acceptance evidence for -a release. - -The canonical template is: +This file is the release-evidence router. The canonical evidence artifact for +each tagged release is: - [`docs/release-evidence-template.md`](docs/release-evidence-template.md) -The local release gates are: +Copy that template for the release being reviewed and fill in commit, tag, +image digest, checks, SBOMs, vulnerability scans, OpenAPI checksums, migration +state, production-doctor output, performance smoke output, provider +conformance output, failure-drill output, branch protection status, external +review status, accepted-risk status, and acceptance evidence. + +Local acceptance gates start with: ```sh make release-acceptance make rc-check ``` -The release-candidate gate uses local Docker Compose services, fake providers, -and fake receivers. It must not require live Stripe, GitHub, Shopify, Slack, -AWS, Vault, or other third-party provider credentials. - Commercial release evidence packages may include signed release manifests, image digests, SBOMs, vulnerability scan outputs, OpenAPI checksums, migration checks, acceptance evidence, support notes, deployment hardening notes, and upgrade guidance. -Release evidence is not a certification. It makes no exactly-once delivery -claim, no provider-side event completeness guarantee, no compliance -certification claim, no external timestamping claim, no managed-service -availability claim, and no legal evidentiary certification claim. +Release evidence is not a certification. The canonical non-claims are in +[`docs/security-promise.md`](docs/security-promise.md): no exactly-once delivery, +no provider-side event completeness guarantee, no compliance certification, no +external timestamping, no managed-service availability, and no legal +evidentiary certification. diff --git a/SECURITY.md b/SECURITY.md index 02dc7fb..b993471 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -45,9 +45,7 @@ Out of scope: channels, - reports that rely on live third-party provider abuse rather than local reproduction or responsible provider disclosure, -- claims that Webhookery provides exactly-once delivery, provider-side event - completeness, compliance certification, external timestamping, or legal - evidentiary certification. +- claims broader than the canonical non-claims in `docs/security-promise.md`. ## Disclosure diff --git a/SUPPORT.md b/SUPPORT.md index 44c9f16..8483734 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -35,6 +35,6 @@ Contact Aatu Harju through LinkedIn: -Do not rely on an SLA, warranty, provider-side recovery guarantee, legal -evidence certification, regulated-use approval, or compliance certification -unless it is explicitly included in a signed agreement. +Do not rely on an SLA, warranty, provider-side recovery guarantee, regulated- +use approval, or any claim broader than `docs/security-promise.md` unless it is +explicitly included in a signed agreement. diff --git a/TRADEMARKS.md b/TRADEMARKS.md index 8b15761..4485768 100644 --- a/TRADEMARKS.md +++ b/TRADEMARKS.md @@ -16,11 +16,8 @@ You may not use the Webhookery name or project identity to imply: - endorsement by the project maintainer, - official release status for modified builds, - commercial support coverage, -- exactly-once delivery, -- provider-side event completeness, -- compliance approval, -- legal evidentiary certification, -- external timestamping or third-party audit certification. +- claims broader than the canonical Webhookery non-claims in + `docs/security-promise.md`. Modified distributions should use a clear name such as "Webhookery fork" or "based on Webhookery" and should not present themselves as official releases. diff --git a/cmd/whcp/client_helpers.go b/cmd/whcp/client_helpers.go new file mode 100644 index 0000000..d67fb3c --- /dev/null +++ b/cmd/whcp/client_helpers.go @@ -0,0 +1,483 @@ +package main + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" + + "webhookery/internal/evidence" +) + +func getJSON(baseURL, apiKey, path string) error { + endpoint, err := apiEndpoint(baseURL, path) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + return writeHTTPResponse(resp) +} + +func getJSONDecode(baseURL, apiKey, path string, dst any) error { + endpoint, err := apiEndpoint(baseURL, path) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemResponseError("request failed", resp.StatusCode, responseBody) + } + return json.Unmarshal(responseBody, dst) +} + +func postJSON(baseURL, apiKey, path string, body any) error { + raw, err := json.Marshal(body) + if err != nil { + return err + } + endpoint, err := apiEndpoint(baseURL, path) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, endpoint, bytes.NewReader(raw)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Content-Type", "application/json") + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + return writeHTTPResponse(resp) +} + +func postJSONDecode(baseURL, apiKey, path string, body, dst any) error { + raw, err := json.Marshal(body) + if err != nil { + return err + } + endpoint, err := apiEndpoint(baseURL, path) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, endpoint, bytes.NewReader(raw)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Content-Type", "application/json") + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemResponseError("request failed", resp.StatusCode, responseBody) + } + return json.Unmarshal(responseBody, dst) +} + +func patchJSON(baseURL, apiKey, path string, body any) error { + raw, err := json.Marshal(body) + if err != nil { + return err + } + endpoint, err := apiEndpoint(baseURL, path) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodPatch, endpoint, bytes.NewReader(raw)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Content-Type", "application/json") + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + return writeHTTPResponse(resp) +} + +func deleteJSON(baseURL, apiKey, path string, body any) error { + raw, err := json.Marshal(body) + if err != nil { + return err + } + endpoint, err := apiEndpoint(baseURL, path) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodDelete, endpoint, bytes.NewReader(raw)) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Content-Type", "application/json") + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + return writeHTTPResponse(resp) +} + +func downloadAuditExport(baseURL, apiKey, exportID, outputPath string) error { + endpoint, err := apiEndpoint(baseURL, "/v1/audit-exports/"+url.PathEscape(exportID)+":download") + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + if outputPath == "" { + outputPath = exportID + ".tar.gz" + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemResponseError("audit export download failed", resp.StatusCode, body) + } + return writePrivateFile(outputPath, body) +} + +func downloadIncidentReport(baseURL, apiKey, incidentID, format, outputPath string) error { + if strings.TrimSpace(incidentID) == "" { + return fmt.Errorf("incident-id is required") + } + format = strings.ToLower(strings.TrimSpace(format)) + if format == "" { + format = "markdown" + } + if format != "markdown" && format != "json" { + return fmt.Errorf("format must be markdown or json") + } + endpoint, err := apiEndpoint(baseURL, "/v1/incidents/"+url.PathEscape(incidentID)+"/report?format="+url.QueryEscape(format)) + if err != nil { + return err + } + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemResponseError("incident report download failed", resp.StatusCode, body) + } + if outputPath == "" || outputPath == "-" { + _, err = os.Stdout.Write(body) + return err + } + return writePrivateFile(outputPath, body) +} + +func writeHTTPResponse(resp *http.Response) error { + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + if len(body) > 0 { + if _, err := os.Stdout.Write(body); err != nil { + return err + } + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemResponseError("request failed", resp.StatusCode, body) + } + return nil +} + +func problemResponseError(prefix string, status int, body []byte) error { + var p struct { + Code string `json:"code"` + StableCode string `json:"stable_code"` + RequestID string `json:"request_id"` + } + _ = json.Unmarshal(body, &p) + code := strings.TrimSpace(p.StableCode) + if code == "" { + code = strings.TrimSpace(p.Code) + } + if code == "" { + code = "unknown_error" + } + if requestID := strings.TrimSpace(p.RequestID); requestID != "" { + return fmt.Errorf("%s with status %d (%s, request_id=%s)", prefix, status, code, requestID) + } + return fmt.Errorf("%s with status %d (%s)", prefix, status, code) +} + +func createAndDownloadIncidentExport(baseURL, apiKey, incidentID, reason, outputPath string) error { + var export struct { + ID string `json:"id"` + } + if err := postJSONDecode(baseURL, apiKey, "/v1/incidents/"+url.PathEscape(incidentID)+"/evidence-export", map[string]string{"reason": reason}, &export); err != nil { + return err + } + if strings.TrimSpace(export.ID) == "" { + return fmt.Errorf("incident evidence export response did not include id") + } + return downloadAuditExport(baseURL, apiKey, export.ID, outputPath) +} + +func verifyEvidenceBundleFile(path string) error { + if strings.TrimSpace(path) == "" { + return fmt.Errorf("file is required") + } + body, err := os.ReadFile(path) // #nosec G304,G703 -- CLI verifies an operator-selected local evidence bundle. + if err != nil { + return err + } + result, err := evidence.VerifyTarGzipBundle(body) + if err != nil { + return err + } + return json.NewEncoder(os.Stdout).Encode(result) +} + +func viewEvidenceBundleFile(path string) error { + if strings.TrimSpace(path) == "" { + return fmt.Errorf("file is required") + } + body, err := os.ReadFile(path) // #nosec G304,G703 -- CLI inspects an operator-selected local evidence bundle without printing file bodies. + if err != nil { + return err + } + view, err := evidence.InspectTarGzipBundle(body) + if err != nil { + return err + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(view) +} + +func exportRawPayload(baseURL, apiKey, eventID, reason, outputPath string) error { + if strings.TrimSpace(eventID) == "" { + return fmt.Errorf("event-id is required") + } + reason = strings.TrimSpace(reason) + if reason == "" { + return fmt.Errorf("reason is required") + } + endpoint, err := apiEndpoint(baseURL, "/v1/events/"+url.PathEscape(eventID)+"/raw") + if err != nil { + return err + } + parsedEndpoint, err := url.Parse(endpoint) + if err != nil { + return err + } + query := parsedEndpoint.Query() + query.Set("reason", reason) + parsedEndpoint.RawQuery = query.Encode() + endpoint = parsedEndpoint.String() + // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+apiKey) + // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. + resp, err := http.DefaultClient.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + var payload struct { + BodyBase64 string `json:"body_base64"` + } + if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { + return err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return fmt.Errorf("raw export failed with status %d", resp.StatusCode) + } + raw, err := base64.StdEncoding.DecodeString(payload.BodyBase64) + if err != nil { + return err + } + if outputPath == "" || outputPath == "-" { + _, err = os.Stdout.Write(raw) + return err + } + return writePrivateFile(outputPath, raw) +} + +func readRequiredOperatorFile(path, flagName string) (string, error) { + if strings.TrimSpace(path) == "" { + return "", fmt.Errorf("%s is required", flagName) + } + body, err := os.ReadFile(path) // #nosec G304,G703 -- CLI reads an operator-selected local file. + if err != nil { + return "", err + } + return string(body), nil +} + +func readOptionalOperatorFile(path string) (string, error) { + if strings.TrimSpace(path) == "" { + return "", nil + } + return readRequiredOperatorFile(path, "file") +} + +func writePrivateFile(outputPath string, body []byte) error { + if strings.TrimSpace(outputPath) == "" || outputPath == "-" { + return fmt.Errorf("output path is required") + } + if info, err := os.Lstat(outputPath); err == nil { // #nosec G304,G703 -- CLI checks an operator-selected path before writing. + if info.Mode()&os.ModeSymlink != 0 { + return fmt.Errorf("refusing to write through symlink: %s", outputPath) + } + } + return os.WriteFile(outputPath, body, 0o600) // #nosec G304,G306,G703 -- CLI writes operator-selected export files with private permissions. +} + +func apiEndpoint(baseURL, path string) (string, error) { + parsed, err := url.Parse(strings.TrimRight(baseURL, "/")) + if err != nil { + return "", err + } + if parsed.Scheme != "http" && parsed.Scheme != "https" { + return "", fmt.Errorf("base-url must use http or https") + } + if parsed.Host == "" || parsed.User != nil { + return "", fmt.Errorf("base-url must include a host and must not include credentials") + } + return parsed.String() + path, nil +} + +func splitCSV(value string) []string { + if value == "" { + return nil + } + parts := strings.Split(value, ",") + out := make([]string, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part != "" { + out = append(out, part) + } + } + return out +} + +func valueOrDefault(value, fallback int) int { + if value < 0 { + return fallback + } + return value +} + +func valueOrDefaultString(value, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return value +} + +func parseKeyValueCSV(value string) map[string]string { + out := map[string]string{} + for _, part := range splitCSV(value) { + key, val, ok := strings.Cut(part, "=") + if !ok { + continue + } + key = strings.TrimSpace(key) + if key == "" { + continue + } + out[key] = strings.TrimSpace(val) + } + return out +} + +func parseOptionalTime(value string) (time.Time, error) { + value = strings.TrimSpace(value) + if value == "" { + return time.Time{}, nil + } + parsed, err := time.Parse(time.RFC3339, value) + if err != nil { + return time.Time{}, fmt.Errorf("time must be RFC3339: %w", err) + } + return parsed.UTC(), nil +} + +func nullableCLITime(value time.Time) any { + if value.IsZero() { + return nil + } + return value +} diff --git a/cmd/whcp/commands_delivery_replay.go b/cmd/whcp/commands_delivery_replay.go new file mode 100644 index 0000000..1a5a4c1 --- /dev/null +++ b/cmd/whcp/commands_delivery_replay.go @@ -0,0 +1,212 @@ +package main + +import ( + "flag" + "fmt" + "net/url" + "os" + "strings" + + apppkg "webhookery/internal/app" +) + +func runDeliveries(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp deliveries ") + } + fs := flag.NewFlagSet("deliveries "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + deliveryID := fs.String("delivery-id", "", "delivery id") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/deliveries") + case "attempts": + return getJSON(*baseURL, *apiKey, "/v1/deliveries/"+url.PathEscape(*deliveryID)+"/attempts") + case "retry": + return postJSON(*baseURL, *apiKey, "/v1/deliveries/"+url.PathEscape(*deliveryID)+":retry", map[string]string{"reason": *reason}) + case "cancel": + return postJSON(*baseURL, *apiKey, "/v1/deliveries/"+url.PathEscape(*deliveryID)+":cancel", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp deliveries ") + } +} + +func runReplayJobs(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp replay-jobs ") + } + fs := flag.NewFlagSet("replay-jobs "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + replayJobID := fs.String("replay-job-id", "", "replay job id") + eventID := fs.String("event-id", "", "event id") + deliveryID := fs.String("delivery-id", "", "delivery id") + endpointID := fs.String("endpoint-id", "", "endpoint id") + reasonCode := fs.String("reason-code", "", "structured replay reason code") + reason := fs.String("reason", "", "operator reason") + configMode := fs.String("config-mode", apppkg.ReplayConfigCurrent, "current or original") + rateLimitPerMinute := fs.Int("rate-limit-per-minute", 0, "optional replay rate limit") + requireApproval := fs.Bool("require-approval", false, "create job in pending approval state") + approvalExpiresAtRaw := fs.String("approval-expires-at", "", "RFC3339 approval expiry for pending replay jobs") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/replay-jobs") + case "dry-run", "preview": + if strings.TrimSpace(*reasonCode) == "" { + return fmt.Errorf("reason-code is required") + } + if strings.TrimSpace(*reason) == "" { + return fmt.Errorf("reason is required") + } + path := "/v1/replay-jobs:dry-run" + if args[0] == "preview" { + path = "/v1/replay-jobs/preview" + } + return postJSON(*baseURL, *apiKey, path, map[string]any{"event_id": *eventID, "delivery_id": *deliveryID, "endpoint_id": *endpointID, "reason_code": *reasonCode, "reason": *reason, "config_mode": *configMode, "rate_limit_per_minute": *rateLimitPerMinute}) + case "create": + if strings.TrimSpace(*reasonCode) == "" { + return fmt.Errorf("reason-code is required") + } + if strings.TrimSpace(*reason) == "" { + return fmt.Errorf("reason is required") + } + approvalExpiresAt, err := parseOptionalTime(*approvalExpiresAtRaw) + if err != nil { + return err + } + if !approvalExpiresAt.IsZero() && !*requireApproval { + return fmt.Errorf("approval-expires-at requires require-approval") + } + body := map[string]any{"event_id": *eventID, "delivery_id": *deliveryID, "endpoint_id": *endpointID, "reason_code": *reasonCode, "reason": *reason, "config_mode": *configMode, "rate_limit_per_minute": *rateLimitPerMinute, "require_approval": *requireApproval} + if !approvalExpiresAt.IsZero() { + body["approval_expires_at"] = approvalExpiresAt + } + return postJSON(*baseURL, *apiKey, "/v1/replay-jobs", body) + case "approve": + return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":approve", map[string]string{"reason": *reason}) + case "pause": + return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":pause", map[string]string{"reason": *reason}) + case "resume": + return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":resume", map[string]string{"reason": *reason}) + case "cancel": + return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":cancel", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp replay-jobs ") + } +} + +func runReplayApprovalPolicies(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp replay-approval-policies ") + } + fs := flag.NewFlagSet("replay-approval-policies "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + policyID := fs.String("policy-id", "", "replay approval policy id") + scopeType := fs.String("scope-type", "", "tenant, source, or route") + scopeID := fs.String("scope-id", "", "source or route id") + defaultExpirySeconds := fs.Int("default-expiry-seconds", 0, "approval expiry seconds for policy-created pending jobs") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/replay-approval-policies") + case "create": + if strings.TrimSpace(*scopeType) == "" { + return fmt.Errorf("scope-type is required") + } + if strings.TrimSpace(*reason) == "" { + return fmt.Errorf("reason is required") + } + body := map[string]any{"scope_type": *scopeType, "scope_id": *scopeID, "require_approval": true, "reason": *reason} + if *defaultExpirySeconds > 0 { + body["default_expiry_seconds"] = *defaultExpirySeconds + } + return postJSON(*baseURL, *apiKey, "/v1/replay-approval-policies", body) + case "disable": + if strings.TrimSpace(*policyID) == "" { + return fmt.Errorf("policy-id is required") + } + if strings.TrimSpace(*reason) == "" { + return fmt.Errorf("reason is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/replay-approval-policies/"+url.PathEscape(*policyID), map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp replay-approval-policies ") + } +} + +func runReconciliationJobs(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp reconciliation-jobs ") + } + fs := flag.NewFlagSet("reconciliation-jobs "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + jobID := fs.String("job-id", "", "reconciliation job id") + connectionID := fs.String("connection-id", "", "provider connection id") + scopeObjectID := fs.String("scope-object-id", "", "provider-specific object or event scope") + fromRaw := fs.String("from", "", "RFC3339 lower bound") + toRaw := fs.String("to", "", "RFC3339 upper bound") + captureMissing := fs.Bool("capture-missing", false, "capture recoverable missing provider events") + routeRecovered := fs.Bool("route-recovered", false, "route recovered events after durable capture") + redeliverFailed := fs.Bool("redeliver-failed", false, "request provider redelivery for failed deliveries when supported") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs") + case "get": + if strings.TrimSpace(*jobID) == "" { + return fmt.Errorf("job-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs/"+url.PathEscape(*jobID)) + case "items": + if strings.TrimSpace(*jobID) == "" { + return fmt.Errorf("job-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs/"+url.PathEscape(*jobID)+"/items") + case "dry-run", "create": + from, err := parseOptionalTime(*fromRaw) + if err != nil { + return err + } + to, err := parseOptionalTime(*toRaw) + if err != nil { + return err + } + body := map[string]any{ + "connection_id": *connectionID, + "scope_object_id": *scopeObjectID, + "window_start": nullableCLITime(from), + "window_end": nullableCLITime(to), + "capture_missing": *captureMissing, + "route_recovered": *routeRecovered, + "redeliver_failed": *redeliverFailed, + "reason": *reason, + } + if args[0] == "dry-run" { + return postJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs:dry-run", body) + } + return postJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs", body) + case "cancel": + if strings.TrimSpace(*jobID) == "" { + return fmt.Errorf("job-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs/"+url.PathEscape(*jobID)+":cancel", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp reconciliation-jobs ") + } +} diff --git a/cmd/whcp/commands_endpoint_routing.go b/cmd/whcp/commands_endpoint_routing.go new file mode 100644 index 0000000..a4550f6 --- /dev/null +++ b/cmd/whcp/commands_endpoint_routing.go @@ -0,0 +1,393 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "net/url" + "os" + "strings" + "time" + + "webhookery/internal/domain" + "webhookery/internal/transform" +) + +func runEndpoints(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp endpoints ") + } + fs := flag.NewFlagSet("endpoints "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + rawURL := fs.String("url", "", "endpoint URL") + name := fs.String("name", "", "endpoint name") + endpointID := fs.String("endpoint-id", "", "endpoint id") + state := fs.String("state", "", "endpoint state") + reason := fs.String("reason", "", "operator reason") + retryPolicyID := fs.String("retry-policy-id", "", "retry policy id") + mtlsClientCertFile := fs.String("mtls-client-cert-file", "", "PEM client certificate for endpoint mTLS") + mtlsClientKeyFile := fs.String("mtls-client-key-file", "", "PEM client private key for endpoint mTLS") + graceHours := fs.Int("grace-hours", 72, "old signing secret grace period in hours") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/endpoints") + case "get": + if strings.TrimSpace(*endpointID) == "" { + return fmt.Errorf("endpoint-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID)) + case "validate-url": + return postJSON(*baseURL, *apiKey, "/v1/endpoints:validate-url", map[string]string{"url": *rawURL}) + case "create": + body := map[string]string{"name": *name, "url": *rawURL, "retry_policy_id": *retryPolicyID} + if *mtlsClientCertFile != "" || *mtlsClientKeyFile != "" { + cert, key, err := readMTLSFiles(*mtlsClientCertFile, *mtlsClientKeyFile) + if err != nil { + return err + } + body["mtls_client_cert_pem"] = cert + body["mtls_client_key_pem"] = key + } + return postJSON(*baseURL, *apiKey, "/v1/endpoints", body) + case "update": + if strings.TrimSpace(*endpointID) == "" { + return fmt.Errorf("endpoint-id is required") + } + body := map[string]string{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*rawURL) != "" { + body["url"] = *rawURL + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + if strings.TrimSpace(*retryPolicyID) != "" { + body["retry_policy_id"] = *retryPolicyID + } + return patchJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID), body) + case "delete": + if strings.TrimSpace(*endpointID) == "" { + return fmt.Errorf("endpoint-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID), map[string]string{"reason": *reason}) + case "test": + if strings.TrimSpace(*endpointID) == "" { + return fmt.Errorf("endpoint-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID)+":test", map[string]string{"reason": *reason}) + case "rotate-secret": + if strings.TrimSpace(*endpointID) == "" { + return fmt.Errorf("endpoint-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID)+"/secrets:rotate", map[string]any{"grace_period_hours": *graceHours, "reason": *reason}) + default: + return fmt.Errorf("usage: whcp endpoints ") + } +} + +func runSubscriptions(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp subscriptions ") + } + fs := flag.NewFlagSet("subscriptions "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + subscriptionID := fs.String("subscription-id", "", "subscription id") + endpointID := fs.String("endpoint-id", "", "endpoint id") + eventTypes := fs.String("event-types", "", "comma-separated event types") + payloadFormat := fs.String("payload-format", "", "payload format") + transformationID := fs.String("transformation-id", "", "optional transformation id") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/subscriptions") + case "get": + if strings.TrimSpace(*subscriptionID) == "" { + return fmt.Errorf("subscription-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/subscriptions/"+url.PathEscape(*subscriptionID)) + case "create": + body := map[string]any{ + "endpoint_id": *endpointID, + "event_types": splitCSV(*eventTypes), + "transformation_id": *transformationID, + } + if strings.TrimSpace(*payloadFormat) != "" { + body["payload_format"] = *payloadFormat + } + return postJSON(*baseURL, *apiKey, "/v1/subscriptions", body) + case "update": + if strings.TrimSpace(*subscriptionID) == "" { + return fmt.Errorf("subscription-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*endpointID) != "" { + body["endpoint_id"] = *endpointID + } + if strings.TrimSpace(*eventTypes) != "" { + body["event_types"] = splitCSV(*eventTypes) + } + if strings.TrimSpace(*payloadFormat) != "" { + body["payload_format"] = *payloadFormat + } + if strings.TrimSpace(*transformationID) != "" { + body["transformation_id"] = *transformationID + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/subscriptions/"+url.PathEscape(*subscriptionID), body) + case "delete": + if strings.TrimSpace(*subscriptionID) == "" { + return fmt.Errorf("subscription-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/subscriptions/"+url.PathEscape(*subscriptionID), map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp subscriptions ") + } +} + +func runRetryPolicies(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp retry-policies ") + } + fs := flag.NewFlagSet("retry-policies "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + retryPolicyID := fs.String("retry-policy-id", "", "retry policy id") + name := fs.String("name", "", "retry policy name") + maxAttempts := fs.Int("max-attempts", -1, "maximum attempts") + maxDurationSeconds := fs.Int("max-duration-seconds", -1, "maximum retry duration in seconds") + initialDelaySeconds := fs.Int("initial-delay-seconds", -1, "initial retry delay in seconds") + maxDelaySeconds := fs.Int("max-delay-seconds", -1, "maximum retry delay in seconds") + rateLimitPerMinute := fs.Int("rate-limit-per-minute", -1, "optional replay/delivery rate hint") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/retry-policies") + case "get": + if strings.TrimSpace(*retryPolicyID) == "" { + return fmt.Errorf("retry-policy-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/retry-policies/"+url.PathEscape(*retryPolicyID)) + case "create": + body := map[string]any{ + "name": *name, + "max_attempts": valueOrDefault(*maxAttempts, 12), + "max_duration_seconds": valueOrDefault(*maxDurationSeconds, int((72*time.Hour)/time.Second)), + "initial_delay_seconds": valueOrDefault(*initialDelaySeconds, 10), + "max_delay_seconds": valueOrDefault(*maxDelaySeconds, int((6*time.Hour)/time.Second)), + "rate_limit_per_minute": valueOrDefault(*rateLimitPerMinute, 0), + "state": valueOrDefaultString(*state, domain.StateActive), + } + return postJSON(*baseURL, *apiKey, "/v1/retry-policies", body) + case "update": + if strings.TrimSpace(*retryPolicyID) == "" { + return fmt.Errorf("retry-policy-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if *maxAttempts >= 0 { + body["max_attempts"] = *maxAttempts + } + if *maxDurationSeconds >= 0 { + body["max_duration_seconds"] = *maxDurationSeconds + } + if *initialDelaySeconds >= 0 { + body["initial_delay_seconds"] = *initialDelaySeconds + } + if *maxDelaySeconds >= 0 { + body["max_delay_seconds"] = *maxDelaySeconds + } + if *rateLimitPerMinute >= 0 { + body["rate_limit_per_minute"] = *rateLimitPerMinute + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/retry-policies/"+url.PathEscape(*retryPolicyID), body) + case "delete": + if strings.TrimSpace(*retryPolicyID) == "" { + return fmt.Errorf("retry-policy-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/retry-policies/"+url.PathEscape(*retryPolicyID), map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp retry-policies ") + } +} + +func runRoutes(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp routes ") + } + fs := flag.NewFlagSet("routes "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + routeID := fs.String("route-id", "", "route id") + sourceID := fs.String("source-id", "", "source id") + endpointID := fs.String("endpoint-id", "", "endpoint id") + eventTypes := fs.String("event-types", "", "comma-separated event types") + eventID := fs.String("event-id", "", "event id") + reason := fs.String("reason", "", "change reason") + name := fs.String("name", "", "route name") + priority := fs.Int("priority", -1, "route priority") + state := fs.String("state", "", "draft, active, or inactive") + retryPolicyID := fs.String("retry-policy-id", "", "retry policy id") + transformationID := fs.String("transformation-id", "", "optional transformation id") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/routes") + case "get": + if strings.TrimSpace(*routeID) == "" { + return fmt.Errorf("route-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)) + case "create": + body := map[string]any{"name": *name, "source_id": *sourceID, "endpoint_id": *endpointID, "event_types": splitCSV(*eventTypes), "retry_policy_id": *retryPolicyID, "transformation_id": *transformationID} + if *priority >= 0 { + body["priority"] = *priority + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return postJSON(*baseURL, *apiKey, "/v1/routes", body) + case "update": + if strings.TrimSpace(*routeID) == "" { + return fmt.Errorf("route-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*sourceID) != "" { + body["source_id"] = *sourceID + } + if strings.TrimSpace(*endpointID) != "" { + body["endpoint_id"] = *endpointID + } + if strings.TrimSpace(*eventTypes) != "" { + body["event_types"] = splitCSV(*eventTypes) + } + if *priority >= 0 { + body["priority"] = *priority + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + if strings.TrimSpace(*retryPolicyID) != "" { + body["retry_policy_id"] = *retryPolicyID + } + if strings.TrimSpace(*transformationID) != "" { + body["transformation_id"] = *transformationID + } + return patchJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID), body) + case "delete": + if strings.TrimSpace(*routeID) == "" { + return fmt.Errorf("route-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID), map[string]string{"reason": *reason}) + case "activate": + if strings.TrimSpace(*routeID) == "" { + return fmt.Errorf("route-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)+":activate", map[string]string{"reason": *reason}) + case "dry-run": + if strings.TrimSpace(*routeID) == "" { + return fmt.Errorf("route-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)+":dry-run", map[string]string{"event_id": *eventID}) + case "versions": + if strings.TrimSpace(*routeID) == "" { + return fmt.Errorf("route-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)+"/versions") + default: + return fmt.Errorf("usage: whcp routes ") + } +} + +func runTransformations(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp transformations ") + } + fs := flag.NewFlagSet("transformations "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + transformationID := fs.String("transformation-id", "", "transformation id") + versionID := fs.String("version-id", "", "transformation version id") + name := fs.String("name", "", "transformation name") + operationsPath := fs.String("operations-file", "", "JSON operations file") + payloadPath := fs.String("payload-file", "", "JSON payload file for local dry-run") + reason := fs.String("reason", "", "activation reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/transformations") + case "create": + operations, err := readOptionalOperatorFile(*operationsPath) + if err != nil { + return err + } + body := map[string]any{"name": *name} + if strings.TrimSpace(operations) != "" { + body["operations"] = json.RawMessage(operations) + } + return postJSON(*baseURL, *apiKey, "/v1/transformations", body) + case "version": + if strings.TrimSpace(*transformationID) == "" { + return fmt.Errorf("transformation-id is required") + } + operations, err := readRequiredOperatorFile(*operationsPath, "operations-file") + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/transformations/"+url.PathEscape(*transformationID)+"/versions", map[string]any{"operations": json.RawMessage(operations)}) + case "activate": + if strings.TrimSpace(*transformationID) == "" || strings.TrimSpace(*versionID) == "" { + return fmt.Errorf("transformation-id and version-id are required") + } + return postJSON(*baseURL, *apiKey, "/v1/transformations/"+url.PathEscape(*transformationID)+"/versions/"+url.PathEscape(*versionID)+":activate", map[string]string{"reason": *reason}) + case "dry-run": + payload, err := readRequiredOperatorFile(*payloadPath, "payload-file") + if err != nil { + return err + } + operations, err := readRequiredOperatorFile(*operationsPath, "operations-file") + if err != nil { + return err + } + ops, err := transform.ParseOperations([]byte(operations)) + if err != nil { + return err + } + out, err := transform.Apply([]byte(payload), ops) + if err != nil { + return err + } + _, err = os.Stdout.Write(append(out, '\n')) + return err + default: + return fmt.Errorf("usage: whcp transformations ") + } +} diff --git a/cmd/whcp/commands_events_sources.go b/cmd/whcp/commands_events_sources.go new file mode 100644 index 0000000..2ee1b18 --- /dev/null +++ b/cmd/whcp/commands_events_sources.go @@ -0,0 +1,269 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "net/url" + "os" + "strings" + "time" + + apppkg "webhookery/internal/app" +) + +func runEvents(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp events ") + } + fs := flag.NewFlagSet("events "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + eventID := fs.String("event-id", "", "event id") + output := fs.String("output", "-", "raw output path, or '-' for stdout") + format := fs.String("format", "json", "timeline output format: json, table, or markdown") + reason := fs.String("reason", "", "operator reason for elevated raw payload access") + limit := fs.Int("limit", 0, "optional result limit") + providerName := fs.String("provider", "", "provider filter") + externalID := fs.String("external-id", "", "provider event id filter") + deliveryID := fs.String("delivery-id", "", "delivery id filter") + status := fs.String("status", "", "status filter, such as dlq") + verification := fs.String("verification", "", "verification filter: valid or invalid") + receivedAfter := fs.String("received-after", "", "RFC3339 lower bound for received_at") + since := fs.String("since", "", "relative lower bound duration, such as 24h") + routeID := fs.String("route-id", "", "route id filter") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/events") + case "search": + query, err := eventSearchQuery(*limit, *providerName, *externalID, *deliveryID, *status, *verification, *receivedAfter, *since, *routeID) + if err != nil { + return err + } + path := "/v1/events" + if query != "" { + path += "?" + query + } + return getJSON(*baseURL, *apiKey, path) + case "get": + return getJSON(*baseURL, *apiKey, "/v1/events/"+url.PathEscape(*eventID)) + case "timeline": + return getEventTimeline(*baseURL, *apiKey, *eventID, *format) + case "normalized": + return getJSON(*baseURL, *apiKey, "/v1/events/"+url.PathEscape(*eventID)+"/normalized") + case "raw-export": + return exportRawPayload(*baseURL, *apiKey, *eventID, *reason, *output) + default: + return fmt.Errorf("usage: whcp events ") + } +} + +func eventSearchQuery(limit int, providerName, externalID, deliveryID, status, verification, receivedAfter, since, routeID string) (string, error) { + q := url.Values{} + if limit > 0 { + q.Set("limit", fmt.Sprintf("%d", limit)) + } + add := func(name, value string) { + if strings.TrimSpace(value) != "" { + q.Set(name, strings.TrimSpace(value)) + } + } + add("provider", providerName) + add("external_id", externalID) + add("delivery_id", deliveryID) + add("status", status) + add("verification", verification) + add("route_id", routeID) + receivedAfter = strings.TrimSpace(receivedAfter) + since = strings.TrimSpace(since) + if receivedAfter != "" && since != "" { + return "", fmt.Errorf("received-after and since cannot both be set") + } + if since != "" { + d, err := time.ParseDuration(since) + if err != nil || d <= 0 { + return "", fmt.Errorf("since must be a positive duration") + } + receivedAfter = time.Now().UTC().Add(-d).Format(time.RFC3339) + } + if receivedAfter != "" { + if _, err := time.Parse(time.RFC3339, receivedAfter); err != nil { + return "", fmt.Errorf("received-after must be RFC3339") + } + q.Set("received_after", receivedAfter) + } + return q.Encode(), nil +} + +type eventTimelinePage struct { + Data []apppkg.EventTimelineEntry `json:"data"` +} + +func getEventTimeline(baseURL, apiKey, eventID, format string) error { + if strings.TrimSpace(eventID) == "" { + return fmt.Errorf("event-id is required") + } + var page eventTimelinePage + if err := getJSONDecode(baseURL, apiKey, "/v1/events/"+url.PathEscape(eventID)+"/timeline", &page); err != nil { + return err + } + body, err := formatEventTimeline(page.Data, format) + if err != nil { + return err + } + _, err = os.Stdout.Write(body) + return err +} + +func formatEventTimeline(entries []apppkg.EventTimelineEntry, format string) ([]byte, error) { + switch strings.ToLower(strings.TrimSpace(format)) { + case "", "json": + raw, err := json.Marshal(entries) + if err != nil { + return nil, err + } + return append(raw, '\n'), nil + case "table": + var b strings.Builder + b.WriteString("SEQ\tOCCURRED_AT\tKIND\tREF_ID\tSTATE\tDETAIL\n") + for _, entry := range entries { + fmt.Fprintf(&b, "%d\t%s\t%s\t%s\t%s\t%s\n", entry.Sequence, entry.OccurredAt.Format(time.RFC3339), cleanTableCell(entry.Kind), cleanTableCell(entry.RefID), cleanTableCell(entry.State), cleanTableCell(entry.Detail)) + } + return []byte(b.String()), nil + case "markdown": + var b strings.Builder + b.WriteString("## Event Timeline\n\n") + schema := apppkg.EventTimelineSchemaV1 + if len(entries) > 0 && entries[0].SchemaVersion != "" { + schema = entries[0].SchemaVersion + } + fmt.Fprintf(&b, "Schema version: `%s`\n\n", schema) + b.WriteString("| Seq | Occurred At | Kind | Ref ID | State | Detail |\n") + b.WriteString("|---|---|---|---|---|---|\n") + for _, entry := range entries { + fmt.Fprintf(&b, "| %d | `%s` | `%s` | `%s` | `%s` | %s |\n", entry.Sequence, entry.OccurredAt.Format(time.RFC3339), markdownCell(entry.Kind), markdownCell(entry.RefID), markdownCell(entry.State), markdownCell(entry.Detail)) + } + return []byte(b.String()), nil + default: + return nil, fmt.Errorf("timeline format must be json, table, or markdown") + } +} + +func cleanTableCell(value string) string { + value = strings.ReplaceAll(value, "\r", " ") + value = strings.ReplaceAll(value, "\n", " ") + value = strings.ReplaceAll(value, "\t", " ") + return value +} + +func markdownCell(value string) string { + value = cleanTableCell(value) + value = strings.ReplaceAll(value, "|", "\\|") + return value +} + +func runSources(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp sources ") + } + fs := flag.NewFlagSet("sources "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + name := fs.String("name", "", "source name") + providerName := fs.String("provider", "", "provider") + secret := fs.String("secret", "", "verification secret") + sourceID := fs.String("source-id", "", "source id") + state := fs.String("state", "", "source state") + graceHours := fs.Int("grace-hours", 72, "old secret grace period in hours") + reason := fs.String("reason", "", "change reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/sources") + case "get": + if strings.TrimSpace(*sourceID) == "" { + return fmt.Errorf("source-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID)) + case "create": + body := map[string]string{"name": *name, "provider": *providerName, "verification_secret": *secret} + return postJSON(*baseURL, *apiKey, "/v1/sources", body) + case "update": + if strings.TrimSpace(*sourceID) == "" { + return fmt.Errorf("source-id is required") + } + body := map[string]string{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID), body) + case "delete": + if strings.TrimSpace(*sourceID) == "" { + return fmt.Errorf("source-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID), map[string]string{"reason": *reason}) + case "rotate-secret": + if strings.TrimSpace(*sourceID) == "" { + return fmt.Errorf("source-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID)+"/secrets:rotate", map[string]any{"new_secret": *secret, "grace_period_hours": *graceHours, "reason": *reason}) + default: + return fmt.Errorf("usage: whcp sources ") + } +} + +func runProviderConnections(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp provider-connections ") + } + fs := flag.NewFlagSet("provider-connections "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + connectionID := fs.String("connection-id", "", "provider connection id") + name := fs.String("name", "", "connection name") + providerName := fs.String("provider", "", "stripe, github, shopify, or slack") + credential := fs.String("credential", "", "provider API credential") + credentialType := fs.String("credential-type", "api_key", "api_key or bearer_token") + config := fs.String("config", "", "comma-separated key=value provider config") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/provider-connections") + case "get": + if strings.TrimSpace(*connectionID) == "" { + return fmt.Errorf("connection-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/provider-connections/"+url.PathEscape(*connectionID)) + case "create": + return postJSON(*baseURL, *apiKey, "/v1/provider-connections", map[string]any{ + "name": *name, + "provider": *providerName, + "credential": *credential, + "credential_type": *credentialType, + "config": parseKeyValueCSV(*config), + }) + case "verify": + if strings.TrimSpace(*connectionID) == "" { + return fmt.Errorf("connection-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/provider-connections/"+url.PathEscape(*connectionID)+":verify", map[string]string{"reason": *reason}) + case "revoke": + if strings.TrimSpace(*connectionID) == "" { + return fmt.Errorf("connection-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/provider-connections/"+url.PathEscape(*connectionID)+":revoke", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp provider-connections ") + } +} diff --git a/cmd/whcp/commands_evidence.go b/cmd/whcp/commands_evidence.go new file mode 100644 index 0000000..9ff3ba6 --- /dev/null +++ b/cmd/whcp/commands_evidence.go @@ -0,0 +1,23 @@ +package main + +import ( + "flag" + "fmt" +) + +func runEvidence(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp evidence ") + } + fs := flag.NewFlagSet("evidence "+args[0], flag.ContinueOnError) + filePath := fs.String("file", "", "local evidence bundle path") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "view": + return viewEvidenceBundleFile(*filePath) + default: + return fmt.Errorf("usage: whcp evidence ") + } +} diff --git a/cmd/whcp/commands_identity.go b/cmd/whcp/commands_identity.go new file mode 100644 index 0000000..37fa0e2 --- /dev/null +++ b/cmd/whcp/commands_identity.go @@ -0,0 +1,481 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "net/url" + "os" + "strings" + + apppkg "webhookery/internal/app" + "webhookery/internal/config" +) + +func runAdmin(args []string) error { + if len(args) != 2 || args[0] != "hash-key" { + return fmt.Errorf("usage: whcp admin hash-key ") + } + fmt.Println(apppkg.HashToken(args[1])) + return nil +} + +func runAPIKeys(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp api-keys ") + } + fs := flag.NewFlagSet("api-keys "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + name := fs.String("name", "", "API key name") + userID := fs.String("user-id", "", "user id") + email := fs.String("email", "", "user email") + role := fs.String("role", "operator", "membership role") + scopes := fs.String("scopes", "events:read,deliveries:read", "comma-separated scopes") + keyID := fs.String("key-id", "", "API key id") + reason := fs.String("reason", "", "revocation reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "create": + return postJSON(*baseURL, *apiKey, "/v1/api-keys", map[string]any{"name": *name, "user_id": *userID, "email": *email, "role": *role, "scopes": splitCSV(*scopes)}) + case "list": + return getJSON(*baseURL, *apiKey, "/v1/api-keys") + case "revoke": + return postJSON(*baseURL, *apiKey, "/v1/api-keys/"+url.PathEscape(*keyID)+":revoke", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp api-keys ") + } +} + +func runProducerClients(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp producer-clients ") + } + fs := flag.NewFlagSet("producer-clients "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + clientID := fs.String("client-id", "", "producer client id") + name := fs.String("name", "", "producer client name") + sourceID := fs.String("source-id", "", "optional bound source id") + scopes := fs.String("scopes", "events:write", "comma-separated scopes") + ttl := fs.Int("token-ttl-seconds", 900, "producer access token TTL in seconds") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/producer-clients") + case "get": + if strings.TrimSpace(*clientID) == "" { + return fmt.Errorf("client-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID)) + case "create": + return postJSON(*baseURL, *apiKey, "/v1/producer-clients", map[string]any{ + "name": *name, + "source_id": *sourceID, + "scopes": splitCSV(*scopes), + "token_ttl_seconds": *ttl, + }) + case "update": + if strings.TrimSpace(*clientID) == "" { + return fmt.Errorf("client-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*sourceID) != "" { + body["source_id"] = *sourceID + } + if strings.TrimSpace(*scopes) != "" { + body["scopes"] = splitCSV(*scopes) + } + if *ttl != 900 { + body["token_ttl_seconds"] = *ttl + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID), body) + case "disable": + if strings.TrimSpace(*clientID) == "" { + return fmt.Errorf("client-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID), map[string]string{"reason": *reason}) + case "rotate-secret": + if strings.TrimSpace(*clientID) == "" { + return fmt.Errorf("client-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID)+"/secrets:rotate", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp producer-clients ") + } +} + +func runProducerMTLSIdentities(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp producer-mtls-identities ") + } + fs := flag.NewFlagSet("producer-mtls-identities "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + identityID := fs.String("identity-id", "", "producer mTLS identity id") + name := fs.String("name", "", "identity name") + sourceID := fs.String("source-id", "", "optional bound source id") + certFile := fs.String("cert-file", "", "PEM certificate file") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + certBody := func() (string, error) { + if strings.TrimSpace(*certFile) == "" { + return "", fmt.Errorf("cert-file is required") + } + body, err := readSmallFile(*certFile, 1<<20) + if err != nil { + return "", err + } + return string(body), nil + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities") + case "get": + if strings.TrimSpace(*identityID) == "" { + return fmt.Errorf("identity-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID)) + case "create": + certPEM, err := certBody() + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities", map[string]any{"name": *name, "source_id": *sourceID, "certificate_pem": certPEM}) + case "update": + if strings.TrimSpace(*identityID) == "" { + return fmt.Errorf("identity-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*sourceID) != "" { + body["source_id"] = *sourceID + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID), body) + case "disable": + if strings.TrimSpace(*identityID) == "" { + return fmt.Errorf("identity-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID), map[string]string{"reason": *reason}) + case "verify": + if strings.TrimSpace(*identityID) == "" { + return fmt.Errorf("identity-id is required") + } + certPEM, err := certBody() + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID)+":verify", map[string]string{"certificate_pem": certPEM}) + default: + return fmt.Errorf("usage: whcp producer-mtls-identities ") + } +} + +func runKeyCustody(args []string) error { + if len(args) == 0 || args[0] != "test" { + return fmt.Errorf("usage: whcp key-custody test") + } + cfg, err := config.Load() + if err != nil { + return err + } + box, err := secretBoxFromConfig(context.Background(), cfg) + if err != nil { + return err + } + const marker = "webhookery-key-custody-test" + ciphertext, err := box.Encrypt([]byte(marker)) + if err != nil { + return fmt.Errorf("key custody encrypt test failed") + } + plaintext, err := box.Decrypt(ciphertext) + if err != nil { + return fmt.Errorf("key custody decrypt test failed") + } + if string(plaintext) != marker { + return fmt.Errorf("key custody decrypt test returned unexpected plaintext") + } + return json.NewEncoder(os.Stdout).Encode(map[string]any{ + "mode": cfg.SecretBoxMode, + "configured": true, + "ok": true, + "key_ref": keyCustodyKeyRef(cfg), + }) +} + +func runIdentityProviders(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp identity-providers ") + } + fs := flag.NewFlagSet("identity-providers "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + providerID := fs.String("provider-id", "", "identity provider id") + name := fs.String("name", "", "identity provider name") + issuerURL := fs.String("issuer-url", "", "OIDC issuer URL") + authURL := fs.String("authorization-url", "", "OIDC authorization endpoint override") + tokenURL := fs.String("token-url", "", "OIDC token endpoint override") + jwksURL := fs.String("jwks-url", "", "OIDC JWKS endpoint override") + clientID := fs.String("client-id", "", "OIDC client id") + clientSecret := fs.String("client-secret", "", "OIDC client secret") + redirectURI := fs.String("redirect-uri", "", "OIDC callback redirect URI") + allowedDomains := fs.String("allowed-email-domains", "", "comma-separated allowed email domains") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/identity-providers") + case "get": + if strings.TrimSpace(*providerID) == "" { + return fmt.Errorf("provider-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID)) + case "create": + return postJSON(*baseURL, *apiKey, "/v1/identity-providers", map[string]any{ + "name": *name, + "provider_type": "oidc", + "issuer_url": *issuerURL, + "authorization_endpoint": *authURL, + "token_endpoint": *tokenURL, + "jwks_uri": *jwksURL, + "client_id": *clientID, + "client_secret": *clientSecret, + "redirect_uri": *redirectURI, + "allowed_email_domains": splitCSV(*allowedDomains), + }) + case "update": + if strings.TrimSpace(*providerID) == "" { + return fmt.Errorf("provider-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*issuerURL) != "" { + body["issuer_url"] = *issuerURL + } + if strings.TrimSpace(*authURL) != "" { + body["authorization_endpoint"] = *authURL + } + if strings.TrimSpace(*tokenURL) != "" { + body["token_endpoint"] = *tokenURL + } + if strings.TrimSpace(*jwksURL) != "" { + body["jwks_uri"] = *jwksURL + } + if strings.TrimSpace(*clientID) != "" { + body["client_id"] = *clientID + } + if strings.TrimSpace(*clientSecret) != "" { + body["client_secret"] = *clientSecret + } + if strings.TrimSpace(*redirectURI) != "" { + body["redirect_uri"] = *redirectURI + } + if strings.TrimSpace(*allowedDomains) != "" { + body["allowed_email_domains"] = splitCSV(*allowedDomains) + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID), body) + case "disable": + if strings.TrimSpace(*providerID) == "" { + return fmt.Errorf("provider-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID), map[string]string{"reason": *reason}) + case "test": + if strings.TrimSpace(*providerID) == "" { + return fmt.Errorf("provider-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID)+":test", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp identity-providers ") + } +} + +func runSCIMTokens(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp scim-tokens ") + } + fs := flag.NewFlagSet("scim-tokens "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + name := fs.String("name", "", "SCIM token name") + tokenID := fs.String("token-id", "", "SCIM token id") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/scim-tokens") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/scim-tokens", map[string]string{"name": *name}) + case "revoke": + if strings.TrimSpace(*tokenID) == "" { + return fmt.Errorf("token-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/scim-tokens/"+url.PathEscape(*tokenID), map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp scim-tokens ") + } +} + +func runRoleBindings(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp role-bindings ") + } + fs := flag.NewFlagSet("role-bindings "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + bindingID := fs.String("binding-id", "", "role binding id") + principalType := fs.String("principal-type", "user", "user or group") + principalID := fs.String("principal-id", "", "principal id") + role := fs.String("role", "support", "role") + resourceFamily := fs.String("resource-family", "*", "resource family") + resourceID := fs.String("resource-id", "*", "resource id") + environment := fs.String("environment", "*", "environment") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/role-bindings") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/role-bindings", map[string]any{"principal_type": *principalType, "principal_id": *principalID, "role": *role, "resource_family": *resourceFamily, "resource_id": *resourceID, "environment": *environment, "reason": *reason}) + case "update": + if strings.TrimSpace(*bindingID) == "" { + return fmt.Errorf("binding-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*role) != "" { + body["role"] = *role + } + if strings.TrimSpace(*resourceFamily) != "" { + body["resource_family"] = *resourceFamily + } + if strings.TrimSpace(*resourceID) != "" { + body["resource_id"] = *resourceID + } + if strings.TrimSpace(*environment) != "" { + body["environment"] = *environment + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/role-bindings/"+url.PathEscape(*bindingID), body) + case "disable": + if strings.TrimSpace(*bindingID) == "" { + return fmt.Errorf("binding-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/role-bindings/"+url.PathEscape(*bindingID), map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp role-bindings ") + } +} + +func runAccessPolicies(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp access-policies ") + } + fs := flag.NewFlagSet("access-policies "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + policyID := fs.String("policy-id", "", "access policy id") + name := fs.String("name", "", "policy name") + action := fs.String("action", "", "action") + effect := fs.String("effect", "deny", "allow or deny") + resourceFamily := fs.String("resource-family", "*", "resource family") + environment := fs.String("environment", "*", "environment") + conditions := fs.String("conditions", "{}", "JSON policy conditions") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/access-policies") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/access-policies", map[string]any{"name": *name, "action": *action, "effect": *effect, "resource_family": *resourceFamily, "environment": *environment, "conditions": json.RawMessage(*conditions), "reason": *reason}) + case "update": + if strings.TrimSpace(*policyID) == "" { + return fmt.Errorf("policy-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*action) != "" { + body["action"] = *action + } + if strings.TrimSpace(*effect) != "" { + body["effect"] = *effect + } + if strings.TrimSpace(*resourceFamily) != "" { + body["resource_family"] = *resourceFamily + } + if strings.TrimSpace(*environment) != "" { + body["environment"] = *environment + } + if strings.TrimSpace(*conditions) != "" { + body["conditions"] = json.RawMessage(*conditions) + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/access-policies/"+url.PathEscape(*policyID), body) + case "disable": + if strings.TrimSpace(*policyID) == "" { + return fmt.Errorf("policy-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/access-policies/"+url.PathEscape(*policyID), map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp access-policies ") + } +} + +func runAuthz(args []string) error { + if len(args) == 0 || args[0] != "explain" { + return fmt.Errorf("usage: whcp authz explain") + } + fs := flag.NewFlagSet("authz explain", flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + actorID := fs.String("actor-id", "", "actor id to explain") + action := fs.String("action", "", "action") + resourceFamily := fs.String("resource-family", "", "resource family") + resourceID := fs.String("resource-id", "", "resource id") + environment := fs.String("environment", "", "environment") + if err := fs.Parse(args[1:]); err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/authz:explain", map[string]any{"actor_id": *actorID, "action": *action, "resource_family": *resourceFamily, "resource_id": *resourceID, "environment": *environment}) +} diff --git a/cmd/whcp/commands_incidents.go b/cmd/whcp/commands_incidents.go new file mode 100644 index 0000000..940d62c --- /dev/null +++ b/cmd/whcp/commands_incidents.go @@ -0,0 +1,65 @@ +package main + +import ( + "flag" + "fmt" + "net/url" + "os" + "strings" +) + +func runIncidents(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp incidents ") + } + fs := flag.NewFlagSet("incidents "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + incidentID := fs.String("incident-id", "", "incident id") + eventID := fs.String("event-id", "", "event id") + title := fs.String("title", "", "incident title") + reason := fs.String("reason", "", "operator reason") + format := fs.String("format", "markdown", "report format: markdown or json") + output := fs.String("output", "-", "output path, or '-' for stdout") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/incidents") + case "get": + if strings.TrimSpace(*incidentID) == "" { + return fmt.Errorf("incident-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/incidents/"+url.PathEscape(*incidentID)) + case "create": + return postJSON(*baseURL, *apiKey, "/v1/incidents", map[string]string{"title": *title, "reason": *reason}) + case "add-event": + if strings.TrimSpace(*incidentID) == "" { + return fmt.Errorf("incident-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/incidents/"+url.PathEscape(*incidentID)+"/events", map[string]string{"event_id": *eventID, "reason": *reason}) + case "remove-event": + if strings.TrimSpace(*incidentID) == "" || strings.TrimSpace(*eventID) == "" { + return fmt.Errorf("incident-id and event-id are required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/incidents/"+url.PathEscape(*incidentID)+"/events/"+url.PathEscape(*eventID), map[string]string{"reason": *reason}) + case "generate-report": + if strings.TrimSpace(*incidentID) == "" { + return fmt.Errorf("incident-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/incidents/"+url.PathEscape(*incidentID)+"/generate-report", map[string]string{"reason": *reason}) + case "report": + return downloadIncidentReport(*baseURL, *apiKey, *incidentID, *format, *output) + case "export": + if strings.TrimSpace(*incidentID) == "" { + return fmt.Errorf("incident-id is required") + } + if strings.TrimSpace(*output) == "" || *output == "-" { + return postJSON(*baseURL, *apiKey, "/v1/incidents/"+url.PathEscape(*incidentID)+"/evidence-export", map[string]string{"reason": *reason}) + } + return createAndDownloadIncidentExport(*baseURL, *apiKey, *incidentID, *reason, *output) + default: + return fmt.Errorf("usage: whcp incidents ") + } +} diff --git a/cmd/whcp/commands_ops.go b/cmd/whcp/commands_ops.go new file mode 100644 index 0000000..fc8eddf --- /dev/null +++ b/cmd/whcp/commands_ops.go @@ -0,0 +1,508 @@ +package main + +import ( + "flag" + "fmt" + "net/url" + "os" + "strings" + + "webhookery/internal/domain" +) + +func runOps(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp ops ") + } + fs := flag.NewFlagSet("ops "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + workerID := fs.String("worker-id", "", "worker id") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "metrics": + return getJSON(*baseURL, *apiKey, "/v1/ops/metrics") + case "rollups": + return getJSON(*baseURL, *apiKey, "/v1/ops/metrics/rollups") + case "storage": + return getJSON(*baseURL, *apiKey, "/v1/ops/storage") + case "config": + return getJSON(*baseURL, *apiKey, "/v1/ops/config") + case "endpoint-health": + return getJSON(*baseURL, *apiKey, "/v1/endpoint-health") + case "workers": + return getJSON(*baseURL, *apiKey, "/v1/ops/workers") + case "worker": + if strings.TrimSpace(*workerID) == "" { + return fmt.Errorf("worker-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/ops/workers/"+url.PathEscape(*workerID)) + case "queues": + return getJSON(*baseURL, *apiKey, "/v1/ops/queues") + default: + return fmt.Errorf("usage: whcp ops ") + } +} + +func runAlerts(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp alerts ") + } + fs := flag.NewFlagSet("alerts "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + alertID := fs.String("alert-id", "", "alert rule id") + firingID := fs.String("firing-id", "", "alert firing id") + name := fs.String("name", "", "alert name") + ruleType := fs.String("rule-type", "", "alert rule type") + metricName := fs.String("metric-name", "", "optional metric name override") + threshold := fs.Float64("threshold", 0, "threshold") + comparator := fs.String("comparator", ">=", "threshold comparator") + windowSeconds := fs.Int("window-seconds", 300, "evaluation window seconds") + state := fs.String("state", "", "state filter or rule state") + channelIDs := fs.String("channel-ids", "", "comma-separated notification channel ids") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/alerts") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/alerts", map[string]any{ + "name": *name, + "rule_type": *ruleType, + "metric_name": *metricName, + "threshold": *threshold, + "comparator": *comparator, + "window_seconds": *windowSeconds, + "state": *state, + "channel_ids": splitCSV(*channelIDs), + }) + case "update": + if strings.TrimSpace(*alertID) == "" { + return fmt.Errorf("alert-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if *threshold != 0 { + body["threshold"] = *threshold + } + if strings.TrimSpace(*comparator) != "" { + body["comparator"] = *comparator + } + if *windowSeconds != 0 { + body["window_seconds"] = *windowSeconds + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + if strings.TrimSpace(*channelIDs) != "" { + body["channel_ids"] = splitCSV(*channelIDs) + } + return patchJSON(*baseURL, *apiKey, "/v1/alerts/"+url.PathEscape(*alertID), body) + case "disable": + if strings.TrimSpace(*alertID) == "" { + return fmt.Errorf("alert-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/alerts/"+url.PathEscape(*alertID), map[string]any{"reason": *reason}) + case "firings": + path := "/v1/alert-firings" + if strings.TrimSpace(*state) != "" { + path += "?state=" + url.QueryEscape(*state) + } + return getJSON(*baseURL, *apiKey, path) + case "ack": + if strings.TrimSpace(*firingID) == "" { + return fmt.Errorf("firing-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/alert-firings/"+url.PathEscape(*firingID)+":acknowledge", map[string]any{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp alerts ") + } +} + +func runNotificationChannels(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp notification-channels ") + } + fs := flag.NewFlagSet("notification-channels "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + channelID := fs.String("channel-id", "", "notification channel id") + name := fs.String("name", "", "channel name") + targetURL := fs.String("url", "", "HTTPS webhook receiver URL") + secret := fs.String("signing-secret", "", "HMAC signing secret") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/notification-channels") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/notification-channels", map[string]any{ + "name": *name, + "channel_type": domain.NotificationChannelWebhook, + "url": *targetURL, + "signing_secret": *secret, + }) + case "update": + if strings.TrimSpace(*channelID) == "" { + return fmt.Errorf("channel-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*targetURL) != "" { + body["url"] = *targetURL + } + if strings.TrimSpace(*secret) != "" { + body["signing_secret"] = *secret + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/notification-channels/"+url.PathEscape(*channelID), body) + case "disable": + if strings.TrimSpace(*channelID) == "" { + return fmt.Errorf("channel-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/notification-channels/"+url.PathEscape(*channelID), map[string]any{"reason": *reason}) + case "test": + if strings.TrimSpace(*channelID) == "" { + return fmt.Errorf("channel-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/notification-channels/"+url.PathEscape(*channelID)+":test", map[string]any{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp notification-channels ") + } +} + +func runNotificationDeliveries(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp notification-deliveries ") + } + fs := flag.NewFlagSet("notification-deliveries "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + deliveryID := fs.String("delivery-id", "", "notification delivery id") + state := fs.String("state", "", "delivery state filter") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + path := "/v1/notification-deliveries" + if strings.TrimSpace(*state) != "" { + path += "?state=" + url.QueryEscape(*state) + } + return getJSON(*baseURL, *apiKey, path) + case "attempts": + if strings.TrimSpace(*deliveryID) == "" { + return fmt.Errorf("delivery-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/notification-deliveries/"+url.PathEscape(*deliveryID)+"/attempts") + case "retry": + if strings.TrimSpace(*deliveryID) == "" { + return fmt.Errorf("delivery-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/notification-deliveries/"+url.PathEscape(*deliveryID)+":retry", map[string]any{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp notification-deliveries ") + } +} + +func runSIEMSinks(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp siem-sinks ") + } + fs := flag.NewFlagSet("siem-sinks "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + sinkID := fs.String("sink-id", "", "SIEM sink id") + name := fs.String("name", "", "sink name") + targetURL := fs.String("url", "", "HTTPS SIEM receiver URL") + secret := fs.String("signing-secret", "", "HMAC signing secret") + state := fs.String("state", "", "active or disabled") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/siem-sinks") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/siem-sinks", map[string]any{ + "name": *name, + "sink_type": domain.SIEMSinkWebhook, + "url": *targetURL, + "signing_secret": *secret, + }) + case "update": + if strings.TrimSpace(*sinkID) == "" { + return fmt.Errorf("sink-id is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*name) != "" { + body["name"] = *name + } + if strings.TrimSpace(*targetURL) != "" { + body["url"] = *targetURL + } + if strings.TrimSpace(*secret) != "" { + body["signing_secret"] = *secret + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/siem-sinks/"+url.PathEscape(*sinkID), body) + case "disable": + if strings.TrimSpace(*sinkID) == "" { + return fmt.Errorf("sink-id is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/siem-sinks/"+url.PathEscape(*sinkID), map[string]any{"reason": *reason}) + case "test": + if strings.TrimSpace(*sinkID) == "" { + return fmt.Errorf("sink-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/siem-sinks/"+url.PathEscape(*sinkID)+":test", map[string]any{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp siem-sinks ") + } +} + +func runSIEMDeliveries(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp siem-deliveries ") + } + fs := flag.NewFlagSet("siem-deliveries "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + deliveryID := fs.String("delivery-id", "", "SIEM delivery id") + state := fs.String("state", "", "delivery state filter") + reason := fs.String("reason", "", "operator reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + path := "/v1/siem-deliveries" + if strings.TrimSpace(*state) != "" { + path += "?state=" + url.QueryEscape(*state) + } + return getJSON(*baseURL, *apiKey, path) + case "attempts": + if strings.TrimSpace(*deliveryID) == "" { + return fmt.Errorf("delivery-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/siem-deliveries/"+url.PathEscape(*deliveryID)+"/attempts") + case "retry": + if strings.TrimSpace(*deliveryID) == "" { + return fmt.Errorf("delivery-id is required") + } + return postJSON(*baseURL, *apiKey, "/v1/siem-deliveries/"+url.PathEscape(*deliveryID)+":retry", map[string]any{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp siem-deliveries ") + } +} + +func runAudit(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp audit ") + } + fs := flag.NewFlagSet("audit "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + exportID := fs.String("export-id", "", "audit export id") + fromRaw := fs.String("from", "", "RFC3339 lower bound") + toRaw := fs.String("to", "", "RFC3339 upper bound") + includeRaw := fs.Bool("include-raw", false, "include raw payload bodies when authorized") + includePayloads := fs.Bool("include-payloads", false, "include normalized and delivery payload bodies when authorized") + includeTimelines := fs.Bool("include-timelines", false, "include event, receipt, delivery, and audit timelines") + reason := fs.String("reason", "", "operator reason") + output := fs.String("output", "", "download output path") + filePath := fs.String("file", "", "local evidence bundle path") + anchorID := fs.String("anchor-id", "", "audit chain anchor id") + fromSequence := fs.Int64("from-sequence", 0, "optional audit chain start sequence") + toSequence := fs.Int64("to-sequence", 0, "optional audit chain end sequence") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "export": + from, err := parseOptionalTime(*fromRaw) + if err != nil { + return err + } + to, err := parseOptionalTime(*toRaw) + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/audit-events:export", map[string]any{ + "from": nullableCLITime(from), + "to": nullableCLITime(to), + "include_raw_payloads": *includeRaw, + "include_payload_bodies": *includePayloads, + "include_timelines": *includeTimelines, + "reason": *reason, + }) + case "export-status": + if strings.TrimSpace(*exportID) == "" { + return fmt.Errorf("export-id is required") + } + return getJSON(*baseURL, *apiKey, "/v1/audit-exports/"+url.PathEscape(*exportID)) + case "download": + if strings.TrimSpace(*exportID) == "" { + return fmt.Errorf("export-id is required") + } + return downloadAuditExport(*baseURL, *apiKey, *exportID, *output) + case "chain-head": + return getJSON(*baseURL, *apiKey, "/v1/audit-chain/head") + case "verify-chain": + return postJSON(*baseURL, *apiKey, "/v1/audit-chain:verify", map[string]any{"from_sequence": *fromSequence, "to_sequence": *toSequence}) + case "anchor": + return postJSON(*baseURL, *apiKey, "/v1/audit-chain:anchor", map[string]any{"from_sequence": *fromSequence, "to_sequence": *toSequence, "reason": *reason}) + case "anchors": + if strings.TrimSpace(*anchorID) != "" { + return getJSON(*baseURL, *apiKey, "/v1/audit-chain/anchors/"+url.PathEscape(*anchorID)) + } + return getJSON(*baseURL, *apiKey, "/v1/audit-chain/anchors") + case "verify-bundle": + return verifyEvidenceBundleFile(*filePath) + default: + return fmt.Errorf("usage: whcp audit ") + } +} + +func runRetention(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp retention ") + } + fs := flag.NewFlagSet("retention "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + policyID := fs.String("policy-id", "", "retention policy id") + resourceType := fs.String("resource-type", domain.RetentionResourceRawPayload, "raw_payload, normalized_envelope_data, delivery_payload, or audit_event") + sourceID := fs.String("source-id", "", "optional source id for raw payload retention") + retentionDays := fs.Int("retention-days", 0, "retention period in days") + state := fs.String("state", "", "active or disabled") + legalHold := fs.Bool("legal-hold", false, "put policy on legal hold") + clearLegalHold := fs.Bool("clear-legal-hold", false, "clear policy legal hold") + holdReason := fs.String("hold-reason", "", "legal hold reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + visited := map[string]bool{} + fs.Visit(func(flag *flag.Flag) { + visited[flag.Name] = true + }) + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/admin/retention-policies") + case "create": + return postJSON(*baseURL, *apiKey, "/v1/admin/retention-policies", map[string]any{ + "resource_type": *resourceType, + "source_id": *sourceID, + "retention_days": *retentionDays, + "state": *state, + "legal_hold": *legalHold, + "hold_reason": *holdReason, + }) + case "update": + if strings.TrimSpace(*policyID) == "" { + return fmt.Errorf("policy-id is required") + } + body := map[string]any{} + if *retentionDays > 0 { + body["retention_days"] = *retentionDays + } + if *state != "" { + body["state"] = *state + } + if *sourceID != "" { + body["source_id"] = *sourceID + } + if visited["legal-hold"] { + body["legal_hold"] = *legalHold + } + if *clearLegalHold { + body["legal_hold"] = false + body["hold_reason"] = "" + } + if *holdReason != "" { + body["hold_reason"] = *holdReason + } + return patchJSON(*baseURL, *apiKey, "/v1/admin/retention-policies/"+url.PathEscape(*policyID), body) + default: + return fmt.Errorf("usage: whcp retention ") + } +} + +func runDeadLetter(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp dead-letter ") + } + fs := flag.NewFlagSet("dead-letter "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + entryID := fs.String("entry-id", "", "dead-letter entry id") + entryIDs := fs.String("entry-ids", "", "comma-separated dead-letter entry ids") + reasonCode := fs.String("reason-code", "", "structured replay reason code") + reason := fs.String("reason", "", "release reason") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/dead-letter") + case "release": + if strings.TrimSpace(*reasonCode) == "" { + return fmt.Errorf("reason-code is required") + } + if strings.TrimSpace(*reason) == "" { + return fmt.Errorf("reason is required") + } + return postJSON(*baseURL, *apiKey, "/v1/dead-letter/"+url.PathEscape(*entryID)+":release", map[string]string{"reason_code": *reasonCode, "reason": *reason}) + case "bulk-release": + if strings.TrimSpace(*reasonCode) == "" { + return fmt.Errorf("reason-code is required") + } + if strings.TrimSpace(*reason) == "" { + return fmt.Errorf("reason is required") + } + return postJSON(*baseURL, *apiKey, "/v1/dead-letter:bulk-release", map[string]any{"entry_ids": splitCSV(*entryIDs), "reason_code": *reasonCode, "reason": *reason}) + default: + return fmt.Errorf("usage: whcp dead-letter ") + } +} + +func runQuarantine(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp quarantine ") + } + fs := flag.NewFlagSet("quarantine "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + entryID := fs.String("entry-id", "", "quarantine entry id") + reason := fs.String("reason", "", "decision reason") + routeAfterRelease := fs.Bool("route-after-release", false, "create route work after approval") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "approve": + return postJSON(*baseURL, *apiKey, "/v1/quarantine/"+url.PathEscape(*entryID)+":approve", map[string]any{"reason": *reason, "route_after_release": *routeAfterRelease}) + case "reject": + return postJSON(*baseURL, *apiKey, "/v1/quarantine/"+url.PathEscape(*entryID)+":reject", map[string]string{"reason": *reason}) + default: + return fmt.Errorf("usage: whcp quarantine ") + } +} diff --git a/cmd/whcp/commands_schema_adapter_signatures.go b/cmd/whcp/commands_schema_adapter_signatures.go new file mode 100644 index 0000000..ee5a913 --- /dev/null +++ b/cmd/whcp/commands_schema_adapter_signatures.go @@ -0,0 +1,199 @@ +package main + +import ( + "encoding/json" + "flag" + "fmt" + "net/url" + "os" + "strings" + "time" + + "webhookery/internal/domain" + "webhookery/internal/provider" +) + +func runSchemas(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp schemas ") + } + fs := flag.NewFlagSet("schemas "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + name := fs.String("name", "", "event type name") + description := fs.String("description", "", "event type description") + state := fs.String("state", "", "event type or schema state") + reason := fs.String("reason", "", "operator reason") + version := fs.String("version", "", "schema version") + schemaPath := fs.String("schema-file", "", "JSON schema file") + payloadPath := fs.String("payload-file", "", "JSON payload file") + newSchemaPath := fs.String("new-schema-file", "", "candidate JSON schema file") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "event-type-create": + return postJSON(*baseURL, *apiKey, "/v1/event-types", map[string]string{"name": *name, "description": *description}) + case "event-type-list": + return getJSON(*baseURL, *apiKey, "/v1/event-types") + case "event-type-get": + if strings.TrimSpace(*name) == "" { + return fmt.Errorf("name is required") + } + return getJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)) + case "event-type-update": + if strings.TrimSpace(*name) == "" { + return fmt.Errorf("name is required") + } + body := map[string]any{"reason": *reason} + if strings.TrimSpace(*description) != "" { + body["description"] = *description + } + if strings.TrimSpace(*state) != "" { + body["state"] = *state + } + return patchJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name), body) + case "event-type-delete": + if strings.TrimSpace(*name) == "" { + return fmt.Errorf("name is required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name), map[string]string{"reason": *reason}) + case "schema-create": + body, err := os.ReadFile(*schemaPath) // #nosec G304,G703 -- CLI reads an operator-selected schema file. + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas", map[string]string{"version": *version, "schema": string(body)}) + case "schema-list": + if strings.TrimSpace(*name) == "" { + return fmt.Errorf("name is required") + } + return getJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas") + case "schema-get": + if strings.TrimSpace(*name) == "" || strings.TrimSpace(*version) == "" { + return fmt.Errorf("name and version are required") + } + return getJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version)) + case "schema-update": + if strings.TrimSpace(*name) == "" || strings.TrimSpace(*version) == "" { + return fmt.Errorf("name and version are required") + } + return patchJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version), map[string]string{"state": *state, "reason": *reason}) + case "schema-delete": + if strings.TrimSpace(*name) == "" || strings.TrimSpace(*version) == "" { + return fmt.Errorf("name and version are required") + } + return deleteJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version), map[string]string{"reason": *reason}) + case "validate": + body, err := os.ReadFile(*payloadPath) // #nosec G304,G703 -- CLI reads an operator-selected payload file. + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version)+":validate", map[string]string{"payload": string(body)}) + case "check-compat": + body, err := os.ReadFile(*newSchemaPath) // #nosec G304,G703 -- CLI reads an operator-selected schema file. + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version)+":check-compatibility", map[string]string{"new_schema": string(body)}) + default: + return fmt.Errorf("usage: whcp schemas ") + } +} + +func runAdapters(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp adapters ") + } + fs := flag.NewFlagSet("adapters "+args[0], flag.ContinueOnError) + baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") + apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") + adapterID := fs.String("adapter-id", "", "adapter id") + versionID := fs.String("version-id", "", "adapter version id") + name := fs.String("name", "", "adapter name") + kind := fs.String("kind", domain.AdapterKindDeclarative, "adapter kind") + version := fs.String("version", "", "adapter version") + definitionPath := fs.String("definition-file", "", "declarative adapter definition JSON file") + requestPath := fs.String("request-file", "", "adapter test-vector request JSON file") + expectedPath := fs.String("expected-file", "", "adapter test-vector expected JSON file") + action := fs.String("action", "", "transition action") + reason := fs.String("reason", "", "audit reason") + riskLevel := fs.String("risk-level", "", "risk level") + packageSHA := fs.String("package-sha256", "", "plugin package sha256") + packageSignature := fs.String("package-signature", "", "plugin package signature") + sbomSHA := fs.String("sbom-sha256", "", "plugin SBOM sha256") + provenanceURL := fs.String("provenance-url", "", "provenance URL") + description := fs.String("description", "", "description") + if err := fs.Parse(args[1:]); err != nil { + return err + } + switch args[0] { + case "list": + return getJSON(*baseURL, *apiKey, "/v1/adapters") + case "get": + return getJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)) + case "create": + return postJSON(*baseURL, *apiKey, "/v1/adapters", map[string]any{"name": *name, "kind": *kind, "description": *description, "risk_level": *riskLevel, "provenance_url": *provenanceURL}) + case "versions": + return getJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions") + case "version-create": + definition, err := readOptionalOperatorFile(*definitionPath) + if err != nil { + return err + } + body := map[string]any{"version": *version, "reason": *reason, "risk_level": *riskLevel, "package_sha256": *packageSHA, "package_signature": *packageSignature, "sbom_sha256": *sbomSHA, "provenance_url": *provenanceURL} + if definition != "" { + body["definition"] = json.RawMessage(definition) + } + return postJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions", body) + case "vector-create": + requestBody, err := readRequiredOperatorFile(*requestPath, "request-file") + if err != nil { + return err + } + expectedBody, err := readRequiredOperatorFile(*expectedPath, "expected-file") + if err != nil { + return err + } + return postJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions/"+url.PathEscape(*versionID)+"/test-vectors", map[string]any{"name": *name, "request": json.RawMessage(requestBody), "expected": json.RawMessage(expectedBody)}) + case "transition": + return postJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions/"+url.PathEscape(*versionID)+":transition", map[string]string{"action": *action, "reason": *reason}) + default: + return fmt.Errorf("usage: whcp adapters ") + } +} + +func runSignatures(args []string) error { + if len(args) == 0 || args[0] != "verify" { + return fmt.Errorf("usage: whcp signatures verify --provider PROVIDER --secret SECRET --body FILE --header 'Name: value'") + } + fs := flag.NewFlagSet("signatures verify", flag.ContinueOnError) + providerName := fs.String("provider", "", "provider") + secret := fs.String("secret", "", "secret") + bodyPath := fs.String("body", "", "body file") + header := fs.String("header", "", "header as 'Name: value'") + if err := fs.Parse(args[1:]); err != nil { + return err + } + body, err := os.ReadFile(*bodyPath) + if err != nil { + return err + } + name, value, ok := strings.Cut(*header, ":") + if !ok { + return fmt.Errorf("header must be formatted as 'Name: value'") + } + adapter, ok := provider.BuiltInRegistry().Adapter(*providerName) + if !ok { + return fmt.Errorf("unknown provider %q", *providerName) + } + result := adapter.Verify(provider.VerifyInput{ + RawBody: body, + Headers: map[string][]string{strings.ToLower(strings.TrimSpace(name)): {strings.TrimSpace(value)}}, + Secret: []byte(*secret), + Now: time.Now().UTC(), + }) + out := json.NewEncoder(os.Stdout) + out.SetIndent("", " ") + return out.Encode(result) +} diff --git a/cmd/whcp/doctor.go b/cmd/whcp/doctor.go new file mode 100644 index 0000000..4f2ab5d --- /dev/null +++ b/cmd/whcp/doctor.go @@ -0,0 +1,456 @@ +package main + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + + "webhookery/internal/adapters/deliveryhttp" + "webhookery/internal/domain" + "webhookery/internal/ssrf" +) + +func runDoctor(args []string) error { + if len(args) == 0 { + return fmt.Errorf("usage: whcp doctor ") + } + switch args[0] { + case "production": + if len(args) != 1 { + return fmt.Errorf("usage: whcp doctor production") + } + findings := productionDoctorFindings(os.Getenv) + writeDoctorFindings(os.Stdout, findings) + if blockers := countDoctorBlockers(findings); blockers > 0 { + return fmt.Errorf("production doctor found %d blocker(s)", blockers) + } + return nil + case "pilot": + return runPilotDoctor(args[1:]) + default: + return fmt.Errorf("usage: whcp doctor ") + } +} + +func runPilotDoctor(args []string) error { + fs := flag.NewFlagSet("doctor pilot", flag.ContinueOnError) + noNetwork := fs.Bool("no-network", false, "skip safe network connectivity checks") + timeout := fs.Duration("timeout", 3*time.Second, "network check timeout") + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() != 0 { + return fmt.Errorf("usage: whcp doctor pilot [--no-network] [--timeout duration]") + } + findings := pilotDoctorFindings(os.Getenv, pilotDoctorOptions{ + Network: !*noNetwork, + Timeout: *timeout, + DBCheck: checkPilotDatabase, + ReceiverCheck: checkPilotReceiver, + }) + writeDoctorFindings(os.Stdout, findings) + if blockers := countDoctorBlockers(findings); blockers > 0 { + return fmt.Errorf("pilot doctor found %d blocker(s)", blockers) + } + return nil +} + +type pilotDoctorOptions struct { + Network bool + Timeout time.Duration + DBCheck func(context.Context, string, time.Duration) (pilotDatabaseStatus, error) + ReceiverCheck func(context.Context, string, time.Duration) error +} + +type pilotDatabaseStatus struct { + AppliedMigrations int + ExpectedMigrations int + PendingOutbox int + InProgressOutbox int + RetentionPolicies int + AuditChainEntries int +} + +func pilotDoctorFindings(getenv func(string) string, opts pilotDoctorOptions) []doctorFinding { + env := func(name string) string { return strings.TrimSpace(getenv(name)) } + timeout := opts.Timeout + if timeout <= 0 { + timeout = 3 * time.Second + } + if opts.DBCheck == nil { + opts.DBCheck = checkPilotDatabase + } + if opts.ReceiverCheck == nil { + opts.ReceiverCheck = checkPilotReceiver + } + var findings []doctorFinding + add := func(severity, check, message string) { + findings = append(findings, doctorFinding{Severity: severity, Check: check, Message: message}) + } + + switch env("WEBHOOKERY_ENVIRONMENT") { + case "production": + add("ok", "environment", "production mode is explicit for pilot") + case "development", "": + add("warning", "environment", "development environment is acceptable only for local pilot drills") + default: + add("warning", "environment", "custom environment name is configured; confirm release evidence labels") + } + + databaseURL := env("WEBHOOKERY_DATABASE_URL") + switch { + case databaseURL == "": + add("blocker", "database", "WEBHOOKERY_DATABASE_URL is required for pilot readiness") + case containsUnsafePlaceholder(databaseURL): + add("blocker", "database", "database URL contains placeholder material") + case strings.Contains(strings.ToLower(databaseURL), "sslmode=disable"): + add("warning", "database", "database TLS appears disabled; use only on a private trusted pilot network") + default: + add("ok", "database", "database URL is configured") + } + + addSecretBoxFindings(add, env, false) + addRawStorageFindings(add, env, false) + addBootstrapFinding(add, env) + addProviderProofFinding(add, env) + + if databaseURL != "" && !containsUnsafePlaceholder(databaseURL) { + if !opts.Network { + add("warning", "database-connectivity", "PostgreSQL connectivity skipped because --no-network is set") + add("warning", "migrations", "migration-state check skipped because --no-network is set") + add("warning", "queue", "outbox health check skipped because --no-network is set") + add("warning", "audit-chain", "audit-chain metadata check skipped because --no-network is set") + add("warning", "retention", "retention policy check skipped because --no-network is set") + } else { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + status, err := opts.DBCheck(ctx, databaseURL, timeout) + cancel() + if err != nil { + add("blocker", "database-connectivity", "PostgreSQL connectivity or metadata query failed") + } else { + addPilotDatabaseFindings(add, status) + } + } + } + + addReceiverConnectivityFinding(add, env, opts) + return findings +} + +func productionDoctorFindings(getenv func(string) string) []doctorFinding { + env := func(name string) string { return strings.TrimSpace(getenv(name)) } + var findings []doctorFinding + add := func(severity, check, message string) { + findings = append(findings, doctorFinding{Severity: severity, Check: check, Message: message}) + } + + if env("WEBHOOKERY_ENVIRONMENT") != "production" { + add("blocker", "environment", "WEBHOOKERY_ENVIRONMENT must be production for this doctor") + } else { + add("ok", "environment", "production mode is explicit") + } + + databaseURL := env("WEBHOOKERY_DATABASE_URL") + switch { + case databaseURL == "": + add("blocker", "database", "WEBHOOKERY_DATABASE_URL is required") + case containsUnsafePlaceholder(databaseURL): + add("blocker", "database", "database URL contains placeholder material") + case strings.Contains(strings.ToLower(databaseURL), "sslmode=disable"): + add("warning", "database", "database TLS appears disabled; use only on a private trusted network") + default: + add("ok", "database", "database URL is configured") + } + + tlsCert := env("WEBHOOKERY_TLS_CERT_FILE") + tlsKey := env("WEBHOOKERY_TLS_KEY_FILE") + switch { + case tlsCert == "" && tlsKey == "": + add("blocker", "tls", "production API listener requires WEBHOOKERY_TLS_CERT_FILE and WEBHOOKERY_TLS_KEY_FILE") + case tlsCert == "" || tlsKey == "": + add("blocker", "tls", "WEBHOOKERY_TLS_CERT_FILE and WEBHOOKERY_TLS_KEY_FILE must be configured together") + default: + add("ok", "tls", "API TLS certificate and key paths are configured") + } + if env("WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE") != "" { + if tlsCert == "" || tlsKey == "" { + add("blocker", "producer-mtls", "producer mTLS client CA requires app-side TLS") + } else { + add("ok", "producer-mtls", "producer mTLS client CA is configured") + } + } else { + add("warning", "producer-mtls", "producer mTLS is disabled") + } + + addSecretBoxFindings(add, env, true) + addRawStorageFindings(add, env, true) + addBootstrapFinding(add, env) + + return findings +} + +func addSecretBoxFindings(add func(string, string, string), env func(string) string, production bool) { + secretBoxMode := envDefaultValue(env("WEBHOOKERY_SECRET_BOX_MODE"), "local") + switch secretBoxMode { + case "local": + master := env("WEBHOOKERY_MASTER_KEY_BASE64") + if master == "" { + add("blocker", "secret-box", "local secret box requires WEBHOOKERY_MASTER_KEY_BASE64") + } else if weak, reason := weakLocalMasterKey(master); weak { + add("blocker", "secret-box", reason) + } else if production { + add("warning", "secret-box", "local secret box is configured; prefer Vault Transit or AWS KMS for shared production operations") + } else { + add("warning", "secret-box", "local secret box is configured; document custody before pilot traffic") + } + case "vault-transit": + if env("WEBHOOKERY_VAULT_ADDR") == "" || env("WEBHOOKERY_VAULT_TOKEN") == "" || env("WEBHOOKERY_VAULT_TRANSIT_KEY") == "" { + add("blocker", "secret-box", "Vault Transit mode requires Vault address, token, and transit key") + } else { + add("ok", "secret-box", "Vault Transit secret box is configured") + } + case "aws-kms": + if env("WEBHOOKERY_AWS_REGION") == "" || env("WEBHOOKERY_AWS_KMS_KEY_ID") == "" { + add("blocker", "secret-box", "AWS KMS mode requires AWS region and KMS key id") + } else { + add("ok", "secret-box", "AWS KMS secret box is configured with redacted key custody") + } + if strings.HasPrefix(strings.ToLower(env("WEBHOOKERY_AWS_KMS_ENDPOINT")), "http://") { + add("warning", "secret-box", "AWS KMS endpoint override is non-TLS; use only for local emulators") + } + default: + add("blocker", "secret-box", "WEBHOOKERY_SECRET_BOX_MODE must be local, vault-transit, or aws-kms") + } +} + +func addRawStorageFindings(add func(string, string, string), env func(string) string, production bool) { + rawStorageMode := envDefaultValue(env("WEBHOOKERY_RAW_STORAGE_MODE"), domain.RawStoragePostgres) + switch rawStorageMode { + case domain.RawStoragePostgres: + add("ok", "raw-storage", "PostgreSQL raw payload storage is configured") + case domain.RawStorageS3: + if env("WEBHOOKERY_OBJECT_STORAGE_ENDPOINT") == "" || env("WEBHOOKERY_OBJECT_STORAGE_BUCKET") == "" || + env("WEBHOOKERY_OBJECT_STORAGE_ACCESS_KEY") == "" || env("WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY") == "" { + add("blocker", "raw-storage", "S3 raw storage requires endpoint, bucket, access key, and secret key") + } else if containsUnsafePlaceholder(env("WEBHOOKERY_OBJECT_STORAGE_ACCESS_KEY")) || containsUnsafePlaceholder(env("WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY")) { + add("blocker", "raw-storage", "object storage credentials contain placeholder material") + } else if production && strings.EqualFold(envDefaultValue(env("WEBHOOKERY_OBJECT_STORAGE_USE_SSL"), "true"), "false") { + add("blocker", "raw-storage", "S3 raw storage must use TLS in production") + } else if strings.EqualFold(envDefaultValue(env("WEBHOOKERY_OBJECT_STORAGE_USE_SSL"), "true"), "false") { + add("warning", "raw-storage", "S3 raw storage has TLS disabled; use only for controlled local object-store pilots") + } else { + add("ok", "raw-storage", "S3 raw payload storage is configured with TLS") + } + default: + add("blocker", "raw-storage", "WEBHOOKERY_RAW_STORAGE_MODE must be postgres or s3") + } +} + +func addBootstrapFinding(add func(string, string, string), env func(string) string) { + bootstrapHash := env("WEBHOOKERY_BOOTSTRAP_API_KEY_HASH") + bootstrapPrefix := strings.ToLower(env("WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX")) + switch { + case bootstrapHash == "": + add("ok", "bootstrap", "no bootstrap API key hash is configured") + case containsUnsafePlaceholder(bootstrapHash) || strings.Contains(bootstrapPrefix, "change") || strings.Contains(bootstrapPrefix, "dev"): + add("blocker", "bootstrap", "bootstrap API key appears to use development placeholder material") + default: + add("warning", "bootstrap", "bootstrap API key is configured; rotate or remove it after initial tenant setup") + } +} + +func addProviderProofFinding(add func(string, string, string), env func(string) string) { + manifestPath := envDefaultValue(env("WEBHOOKERY_PROVIDER_PROOF_MANIFEST_PATH"), "docs/provider-proof-manifest.json") + // #nosec G304 -- doctor reads an operator-selected local metadata file and never prints its content. + body, err := os.ReadFile(manifestPath) + if err != nil { + add("warning", "provider-proof", "provider proof manifest was not found; run make provider-proof-check from a repository checkout") + return + } + var manifest struct { + SchemaVersion string `json:"schema_version"` + NoLiveProviderCalls bool `json:"no_live_provider_calls"` + Proofs []struct { + Provider string `json:"provider"` + } `json:"proofs"` + } + if err := json.Unmarshal(body, &manifest); err != nil || manifest.SchemaVersion != "provider-proof-v1" || !manifest.NoLiveProviderCalls { + add("warning", "provider-proof", "provider proof manifest is present but not valid for pilot readiness") + return + } + if len(manifest.Proofs) == 0 { + add("warning", "provider-proof", "provider proof manifest has no provider proof entries") + return + } + add("ok", "provider-proof", "provider proof metadata is present; run make provider-proof-check for freshness") +} + +func addPilotDatabaseFindings(add func(string, string, string), status pilotDatabaseStatus) { + add("ok", "database-connectivity", "PostgreSQL connectivity succeeded") + switch { + case status.ExpectedMigrations == 0: + add("warning", "migrations", "local migration files were not found; run make rc-check from a repository checkout") + case status.AppliedMigrations < status.ExpectedMigrations: + add("blocker", "migrations", fmt.Sprintf("database has %d applied migrations; repository has %d migration files", status.AppliedMigrations, status.ExpectedMigrations)) + case status.AppliedMigrations > status.ExpectedMigrations: + add("blocker", "migrations", fmt.Sprintf("database has %d applied migrations; repository has %d migration files", status.AppliedMigrations, status.ExpectedMigrations)) + default: + add("ok", "migrations", fmt.Sprintf("database has %d applied migrations matching repository files", status.AppliedMigrations)) + } + if status.PendingOutbox == 0 && status.InProgressOutbox == 0 { + add("ok", "queue", "durable outbox has no pending or in-progress work") + } else { + add("warning", "queue", fmt.Sprintf("durable outbox has pending=%d in_progress=%d; inspect worker health before pilot", status.PendingOutbox, status.InProgressOutbox)) + } + if status.RetentionPolicies == 0 { + add("warning", "retention", "no retention policies are configured; define pilot retention before real provider data") + } else { + add("ok", "retention", fmt.Sprintf("%d retention policy row(s) found", status.RetentionPolicies)) + } + if status.AuditChainEntries == 0 { + add("warning", "audit-chain", "no audit-chain entries found yet; run an evidence drill before pilot traffic") + } else { + add("ok", "audit-chain", fmt.Sprintf("%d audit-chain entrie(s) found; run whcp audit verify-chain for full verification", status.AuditChainEntries)) + } +} + +func addReceiverConnectivityFinding(add func(string, string, string), env func(string) string, opts pilotDoctorOptions) { + receiverURL := env("WEBHOOKERY_PILOT_RECEIVER_CHECK_URL") + if receiverURL == "" { + add("warning", "receiver-connectivity", "receiver connectivity not configured; set WEBHOOKERY_PILOT_RECEIVER_CHECK_URL only for explicit pilot checks") + return + } + if !strings.EqualFold(env("WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK"), "true") { + add("warning", "receiver-connectivity", "receiver URL is configured but WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK=true is required") + return + } + if !opts.Network { + add("warning", "receiver-connectivity", "receiver connectivity skipped because --no-network is set") + return + } + ctx, cancel := context.WithTimeout(context.Background(), opts.Timeout) + err := opts.ReceiverCheck(ctx, receiverURL, opts.Timeout) + cancel() + if err != nil { + var policyErr ssrf.PolicyError + if errors.As(err, &policyErr) { + add("blocker", "receiver-connectivity", "receiver URL failed SSRF policy validation") + return + } + add("warning", "receiver-connectivity", "receiver connectivity check failed; inspect endpoint test output") + return + } + add("ok", "receiver-connectivity", "receiver connectivity check succeeded") +} + +func writeDoctorFindings(w io.Writer, findings []doctorFinding) { + for _, finding := range findings { + _, _ = fmt.Fprintf(w, "%s: %s - %s\n", finding.Severity, finding.Check, finding.Message) + } +} + +func countDoctorBlockers(findings []doctorFinding) int { + count := 0 + for _, finding := range findings { + if finding.Severity == "blocker" { + count++ + } + } + return count +} + +func checkPilotDatabase(ctx context.Context, databaseURL string, timeout time.Duration) (pilotDatabaseStatus, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + pool, err := pgxpool.New(ctx, databaseURL) + if err != nil { + return pilotDatabaseStatus{}, err + } + defer pool.Close() + if err := pool.Ping(ctx); err != nil { + return pilotDatabaseStatus{}, err + } + status := pilotDatabaseStatus{} + files, err := filepath.Glob(filepath.Join("migrations", "*.up.sql")) + if err == nil { + status.ExpectedMigrations = len(files) + } + if err := pool.QueryRow(ctx, "SELECT count(*) FROM schema_migrations").Scan(&status.AppliedMigrations); err != nil { + return status, err + } + if err := pool.QueryRow(ctx, "SELECT count(*) FROM outbox WHERE state='pending'").Scan(&status.PendingOutbox); err != nil { + return status, err + } + if err := pool.QueryRow(ctx, "SELECT count(*) FROM outbox WHERE state='in_progress'").Scan(&status.InProgressOutbox); err != nil { + return status, err + } + if err := pool.QueryRow(ctx, "SELECT count(*) FROM retention_policies WHERE state='active'").Scan(&status.RetentionPolicies); err != nil { + return status, err + } + if err := pool.QueryRow(ctx, "SELECT count(*) FROM audit_chain_entries").Scan(&status.AuditChainEntries); err != nil { + return status, err + } + return status, nil +} + +func checkPilotReceiver(ctx context.Context, rawURL string, timeout time.Duration) error { + validator := ssrf.Validator{} + if result := validator.Validate(ctx, rawURL, ssrf.DefaultPolicy()); !result.Allowed { + return ssrf.PolicyError{Reasons: result.BlockedReasons} + } + client := deliveryhttp.HTTPClient(timeout) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, rawURL, nil) + if err != nil { + return err + } + resp, err := client.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode >= 500 { + return fmt.Errorf("receiver returned server error") + } + return nil +} + +func envDefaultValue(value, fallback string) string { + if strings.TrimSpace(value) == "" { + return fallback + } + return strings.TrimSpace(value) +} + +func containsUnsafePlaceholder(value string) bool { + lower := strings.ToLower(value) + return strings.Contains(lower, "change-me") || strings.Contains(lower, "changeme") || strings.Contains(lower, "example") +} + +func weakLocalMasterKey(value string) (bool, string) { + key, err := base64.StdEncoding.DecodeString(value) + if err != nil || len(key) != 32 { + return true, "WEBHOOKERY_MASTER_KEY_BASE64 must be base64 encoded 32 bytes" + } + allZero := true + for _, b := range key { + if b != 0 { + allZero = false + break + } + } + if allZero { + return true, "WEBHOOKERY_MASTER_KEY_BASE64 uses the documented zero-value example key" + } + return false, "" +} diff --git a/cmd/whcp/main.go b/cmd/whcp/main.go index 82daa3b..3231b9e 100644 --- a/cmd/whcp/main.go +++ b/cmd/whcp/main.go @@ -1,46 +1,13 @@ package main import ( - "bytes" "context" - "crypto/sha256" - "crypto/tls" - "crypto/x509" - "encoding/base64" - "encoding/hex" - "encoding/json" - "errors" - "flag" "fmt" - "io" - "log/slog" - "net/http" - "net/url" "os" - "os/signal" - "strings" - "syscall" - "time" - "webhookery/internal/adapters/crypto" "webhookery/internal/adapters/deliveryhttp" - "webhookery/internal/adapters/httpapi" - "webhookery/internal/adapters/objectstore" - "webhookery/internal/adapters/postgres" "webhookery/internal/adapters/signalhttp" - apppkg "webhookery/internal/app" - "webhookery/internal/authz" - "webhookery/internal/config" - "webhookery/internal/domain" - "webhookery/internal/evidence" - "webhookery/internal/provider" - "webhookery/internal/ssrf" - "webhookery/internal/transform" "webhookery/internal/worker" - - "github.com/aws/aws-sdk-go-v2/aws" - awsconfig "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/service/kms" ) func main() { @@ -105,6 +72,8 @@ func run(args []string) error { return runDeliveries(args[1:]) case "replay-jobs": return runReplayJobs(args[1:]) + case "replay-approval-policies": + return runReplayApprovalPolicies(args[1:]) case "reconciliation-jobs": return runReconciliationJobs(args[1:]) case "ops": @@ -121,6 +90,8 @@ func run(args []string) error { return runSIEMDeliveries(args[1:]) case "audit": return runAudit(args[1:]) + case "evidence": + return runEvidence(args[1:]) case "retention": return runRetention(args[1:]) case "schemas": @@ -129,6 +100,8 @@ func run(args []string) error { return runDeadLetter(args[1:]) case "quarantine": return runQuarantine(args[1:]) + case "incidents": + return runIncidents(args[1:]) case "signatures": return runSignatures(args[1:]) default: @@ -137,137 +110,7 @@ func run(args []string) error { } func usage() error { - return fmt.Errorf("usage: whcp ") -} - -func runAPI() error { - ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) - defer stop() - cfg, err := config.Load() - if err != nil { - return err - } - store, err := openStore(ctx, cfg) - if err != nil { - return err - } - defer store.Close() - - openAPI, err := os.ReadFile("openapi.yaml") - if err != nil { - return err - } - server := httpapi.NewServer(httpapi.ServerConfig{ - Control: apppkg.NewControlServiceWithRuntimeConfig(store, ssrf.Validator{}, opsRuntimeConfig(cfg)), - Ingest: apppkg.NewIngestService(store, apppkg.SystemClock{}), - Auth: runtimeAuth(cfg, store), - SessionAuth: apppkg.SessionAuthenticator{Lookup: store}, - ProducerAuth: apppkg.ProducerTokenAuthenticator{Lookup: store}, - ProducerMTLSAuth: apppkg.ProducerMTLSAuthenticator{Lookup: store}, - OpenAPI: openAPI, - EnableUI: cfg.EnableUI, - SessionCookieSecure: cfg.Environment == "production", - Health: store.Health, - }) - tlsConfig, err := serverTLSConfig(cfg) - if err != nil { - return err - } - httpServer := &http.Server{Addr: cfg.HTTPAddr, Handler: server.Routes(), ReadHeaderTimeout: 5 * time.Second, MaxHeaderBytes: 64 << 10, TLSConfig: tlsConfig} - errCh := make(chan error, 1) - go func() { - slog.Info("starting api", "addr", cfg.HTTPAddr) - if cfg.TLSCertFile != "" { - errCh <- httpServer.ListenAndServeTLS(cfg.TLSCertFile, cfg.TLSKeyFile) - return - } - errCh <- httpServer.ListenAndServe() - }() - select { - case <-ctx.Done(): - shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - return httpServer.Shutdown(shutdownCtx) - case err := <-errCh: - if errors.Is(err, http.ErrServerClosed) { - return nil - } - return err - } -} - -func runMigrate(args []string) error { - fs := flag.NewFlagSet("migrate", flag.ContinueOnError) - dir := fs.String("dir", "migrations", "migration directory") - if err := fs.Parse(args); err != nil { - return err - } - if fs.NArg() != 1 || fs.Arg(0) != "up" { - return fmt.Errorf("usage: whcp migrate [--dir migrations] up") - } - cfg, err := config.Load() - if err != nil { - return err - } - return postgres.MigrateUp(context.Background(), cfg.DatabaseURL, *dir) -} - -func runWorker(args []string) error { - fs := flag.NewFlagSet("worker", flag.ContinueOnError) - once := fs.Bool("once", false, "run one polling iteration") - interval := fs.Duration("interval", 2*time.Second, "poll interval") - if err := fs.Parse(args); err != nil { - return err - } - ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) - defer stop() - cfg, err := config.Load() - if err != nil { - return err - } - store, err := openStore(ctx, cfg) - if err != nil { - return err - } - defer store.Close() - w := worker.Worker{ - Store: store, - Processor: store, - DeliveryStore: store, - DeliveryClient: deliveryAdapter{client: deliveryhttp.Client{SSRF: ssrf.Validator{}}}, - NotificationDeliveryStore: store, - NotificationClient: signalAdapter{client: signalhttp.Client{SSRF: ssrf.Validator{}}}, - SIEMDeliveryStore: store, - SIEMClient: signalAdapter{client: signalhttp.Client{SSRF: ssrf.Validator{}}}, - RetentionStore: store, - MetricsStore: store, - AlertStore: store, - WorkerID: "worker-" + time.Now().UTC().Format("20060102150405"), - Limit: 10, - } - if *once { - return w.RunOnce(ctx) - } - ticker := time.NewTicker(*interval) - defer ticker.Stop() - for { - if err := w.RunOnce(ctx); err != nil { - slog.Error("worker iteration failed", "error", err) - } - select { - case <-ctx.Done(): - return nil - case <-ticker.C: - } - } -} - -func runAdmin(args []string) error { - if len(args) != 2 || args[0] != "hash-key" { - return fmt.Errorf("usage: whcp admin hash-key ") - } - fmt.Println(apppkg.HashToken(args[1])) - return nil + return fmt.Errorf("usage: whcp ") } type doctorFinding struct { @@ -276,2095 +119,6 @@ type doctorFinding struct { Message string } -func runDoctor(args []string) error { - if len(args) != 1 || args[0] != "production" { - return fmt.Errorf("usage: whcp doctor production") - } - findings := productionDoctorFindings(os.Getenv) - writeDoctorFindings(os.Stdout, findings) - if blockers := countDoctorBlockers(findings); blockers > 0 { - return fmt.Errorf("production doctor found %d blocker(s)", blockers) - } - return nil -} - -func productionDoctorFindings(getenv func(string) string) []doctorFinding { - env := func(name string) string { return strings.TrimSpace(getenv(name)) } - var findings []doctorFinding - add := func(severity, check, message string) { - findings = append(findings, doctorFinding{Severity: severity, Check: check, Message: message}) - } - - if env("WEBHOOKERY_ENVIRONMENT") != "production" { - add("blocker", "environment", "WEBHOOKERY_ENVIRONMENT must be production for this doctor") - } else { - add("ok", "environment", "production mode is explicit") - } - - databaseURL := env("WEBHOOKERY_DATABASE_URL") - switch { - case databaseURL == "": - add("blocker", "database", "WEBHOOKERY_DATABASE_URL is required") - case containsUnsafePlaceholder(databaseURL): - add("blocker", "database", "database URL contains placeholder material") - case strings.Contains(strings.ToLower(databaseURL), "sslmode=disable"): - add("warning", "database", "database TLS appears disabled; use only on a private trusted network") - default: - add("ok", "database", "database URL is configured") - } - - tlsCert := env("WEBHOOKERY_TLS_CERT_FILE") - tlsKey := env("WEBHOOKERY_TLS_KEY_FILE") - switch { - case tlsCert == "" && tlsKey == "": - add("blocker", "tls", "production API listener requires WEBHOOKERY_TLS_CERT_FILE and WEBHOOKERY_TLS_KEY_FILE") - case tlsCert == "" || tlsKey == "": - add("blocker", "tls", "WEBHOOKERY_TLS_CERT_FILE and WEBHOOKERY_TLS_KEY_FILE must be configured together") - default: - add("ok", "tls", "API TLS certificate and key paths are configured") - } - if env("WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE") != "" { - if tlsCert == "" || tlsKey == "" { - add("blocker", "producer-mtls", "producer mTLS client CA requires app-side TLS") - } else { - add("ok", "producer-mtls", "producer mTLS client CA is configured") - } - } else { - add("warning", "producer-mtls", "producer mTLS is disabled") - } - - secretBoxMode := envDefaultValue(env("WEBHOOKERY_SECRET_BOX_MODE"), "local") - switch secretBoxMode { - case "local": - master := env("WEBHOOKERY_MASTER_KEY_BASE64") - if master == "" { - add("blocker", "secret-box", "local secret box requires WEBHOOKERY_MASTER_KEY_BASE64") - } else if weak, reason := weakLocalMasterKey(master); weak { - add("blocker", "secret-box", reason) - } else { - add("warning", "secret-box", "local secret box is configured; prefer Vault Transit or AWS KMS for shared production operations") - } - case "vault-transit": - if env("WEBHOOKERY_VAULT_ADDR") == "" || env("WEBHOOKERY_VAULT_TOKEN") == "" || env("WEBHOOKERY_VAULT_TRANSIT_KEY") == "" { - add("blocker", "secret-box", "Vault Transit mode requires Vault address, token, and transit key") - } else { - add("ok", "secret-box", "Vault Transit secret box is configured") - } - case "aws-kms": - if env("WEBHOOKERY_AWS_REGION") == "" || env("WEBHOOKERY_AWS_KMS_KEY_ID") == "" { - add("blocker", "secret-box", "AWS KMS mode requires AWS region and KMS key id") - } else { - add("ok", "secret-box", "AWS KMS secret box is configured with redacted key custody") - } - if strings.HasPrefix(strings.ToLower(env("WEBHOOKERY_AWS_KMS_ENDPOINT")), "http://") { - add("warning", "secret-box", "AWS KMS endpoint override is non-TLS; use only for local emulators") - } - default: - add("blocker", "secret-box", "WEBHOOKERY_SECRET_BOX_MODE must be local, vault-transit, or aws-kms") - } - - rawStorageMode := envDefaultValue(env("WEBHOOKERY_RAW_STORAGE_MODE"), domain.RawStoragePostgres) - switch rawStorageMode { - case domain.RawStoragePostgres: - add("ok", "raw-storage", "PostgreSQL raw payload storage is configured") - case domain.RawStorageS3: - if env("WEBHOOKERY_OBJECT_STORAGE_ENDPOINT") == "" || env("WEBHOOKERY_OBJECT_STORAGE_BUCKET") == "" || - env("WEBHOOKERY_OBJECT_STORAGE_ACCESS_KEY") == "" || env("WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY") == "" { - add("blocker", "raw-storage", "S3 raw storage requires endpoint, bucket, access key, and secret key") - } else if containsUnsafePlaceholder(env("WEBHOOKERY_OBJECT_STORAGE_ACCESS_KEY")) || containsUnsafePlaceholder(env("WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY")) { - add("blocker", "raw-storage", "object storage credentials contain placeholder material") - } else if strings.EqualFold(envDefaultValue(env("WEBHOOKERY_OBJECT_STORAGE_USE_SSL"), "true"), "false") { - add("blocker", "raw-storage", "S3 raw storage must use TLS in production") - } else { - add("ok", "raw-storage", "S3 raw payload storage is configured with TLS") - } - default: - add("blocker", "raw-storage", "WEBHOOKERY_RAW_STORAGE_MODE must be postgres or s3") - } - - bootstrapHash := env("WEBHOOKERY_BOOTSTRAP_API_KEY_HASH") - bootstrapPrefix := strings.ToLower(env("WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX")) - switch { - case bootstrapHash == "": - add("ok", "bootstrap", "no bootstrap API key hash is configured") - case containsUnsafePlaceholder(bootstrapHash) || strings.Contains(bootstrapPrefix, "change") || strings.Contains(bootstrapPrefix, "dev"): - add("blocker", "bootstrap", "bootstrap API key appears to use development placeholder material") - default: - add("warning", "bootstrap", "bootstrap API key is configured; rotate or remove it after initial tenant setup") - } - - return findings -} - -func writeDoctorFindings(w io.Writer, findings []doctorFinding) { - for _, finding := range findings { - _, _ = fmt.Fprintf(w, "%s: %s - %s\n", finding.Severity, finding.Check, finding.Message) - } -} - -func countDoctorBlockers(findings []doctorFinding) int { - count := 0 - for _, finding := range findings { - if finding.Severity == "blocker" { - count++ - } - } - return count -} - -func envDefaultValue(value, fallback string) string { - if strings.TrimSpace(value) == "" { - return fallback - } - return strings.TrimSpace(value) -} - -func containsUnsafePlaceholder(value string) bool { - lower := strings.ToLower(value) - return strings.Contains(lower, "change-me") || strings.Contains(lower, "changeme") || strings.Contains(lower, "example") -} - -func weakLocalMasterKey(value string) (bool, string) { - key, err := base64.StdEncoding.DecodeString(value) - if err != nil || len(key) != 32 { - return true, "WEBHOOKERY_MASTER_KEY_BASE64 must be base64 encoded 32 bytes" - } - allZero := true - for _, b := range key { - if b != 0 { - allZero = false - break - } - } - if allZero { - return true, "WEBHOOKERY_MASTER_KEY_BASE64 uses the documented zero-value example key" - } - return false, "" -} - -func runAPIKeys(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp api-keys ") - } - fs := flag.NewFlagSet("api-keys "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - name := fs.String("name", "", "API key name") - userID := fs.String("user-id", "", "user id") - email := fs.String("email", "", "user email") - role := fs.String("role", "operator", "membership role") - scopes := fs.String("scopes", "events:read,deliveries:read", "comma-separated scopes") - keyID := fs.String("key-id", "", "API key id") - reason := fs.String("reason", "", "revocation reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "create": - return postJSON(*baseURL, *apiKey, "/v1/api-keys", map[string]any{"name": *name, "user_id": *userID, "email": *email, "role": *role, "scopes": splitCSV(*scopes)}) - case "list": - return getJSON(*baseURL, *apiKey, "/v1/api-keys") - case "revoke": - return postJSON(*baseURL, *apiKey, "/v1/api-keys/"+url.PathEscape(*keyID)+":revoke", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp api-keys ") - } -} - -func runProducerClients(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp producer-clients ") - } - fs := flag.NewFlagSet("producer-clients "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - clientID := fs.String("client-id", "", "producer client id") - name := fs.String("name", "", "producer client name") - sourceID := fs.String("source-id", "", "optional bound source id") - scopes := fs.String("scopes", "events:write", "comma-separated scopes") - ttl := fs.Int("token-ttl-seconds", 900, "producer access token TTL in seconds") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/producer-clients") - case "get": - if strings.TrimSpace(*clientID) == "" { - return fmt.Errorf("client-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID)) - case "create": - return postJSON(*baseURL, *apiKey, "/v1/producer-clients", map[string]any{ - "name": *name, - "source_id": *sourceID, - "scopes": splitCSV(*scopes), - "token_ttl_seconds": *ttl, - }) - case "update": - if strings.TrimSpace(*clientID) == "" { - return fmt.Errorf("client-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*sourceID) != "" { - body["source_id"] = *sourceID - } - if strings.TrimSpace(*scopes) != "" { - body["scopes"] = splitCSV(*scopes) - } - if *ttl != 900 { - body["token_ttl_seconds"] = *ttl - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID), body) - case "disable": - if strings.TrimSpace(*clientID) == "" { - return fmt.Errorf("client-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID), map[string]string{"reason": *reason}) - case "rotate-secret": - if strings.TrimSpace(*clientID) == "" { - return fmt.Errorf("client-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/producer-clients/"+url.PathEscape(*clientID)+"/secrets:rotate", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp producer-clients ") - } -} - -func runProducerMTLSIdentities(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp producer-mtls-identities ") - } - fs := flag.NewFlagSet("producer-mtls-identities "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - identityID := fs.String("identity-id", "", "producer mTLS identity id") - name := fs.String("name", "", "identity name") - sourceID := fs.String("source-id", "", "optional bound source id") - certFile := fs.String("cert-file", "", "PEM certificate file") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - certBody := func() (string, error) { - if strings.TrimSpace(*certFile) == "" { - return "", fmt.Errorf("cert-file is required") - } - body, err := readSmallFile(*certFile, 1<<20) - if err != nil { - return "", err - } - return string(body), nil - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities") - case "get": - if strings.TrimSpace(*identityID) == "" { - return fmt.Errorf("identity-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID)) - case "create": - certPEM, err := certBody() - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities", map[string]any{"name": *name, "source_id": *sourceID, "certificate_pem": certPEM}) - case "update": - if strings.TrimSpace(*identityID) == "" { - return fmt.Errorf("identity-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*sourceID) != "" { - body["source_id"] = *sourceID - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID), body) - case "disable": - if strings.TrimSpace(*identityID) == "" { - return fmt.Errorf("identity-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID), map[string]string{"reason": *reason}) - case "verify": - if strings.TrimSpace(*identityID) == "" { - return fmt.Errorf("identity-id is required") - } - certPEM, err := certBody() - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/producer-mtls-identities/"+url.PathEscape(*identityID)+":verify", map[string]string{"certificate_pem": certPEM}) - default: - return fmt.Errorf("usage: whcp producer-mtls-identities ") - } -} - -func runKeyCustody(args []string) error { - if len(args) == 0 || args[0] != "test" { - return fmt.Errorf("usage: whcp key-custody test") - } - cfg, err := config.Load() - if err != nil { - return err - } - box, err := secretBoxFromConfig(context.Background(), cfg) - if err != nil { - return err - } - const marker = "webhookery-key-custody-test" - ciphertext, err := box.Encrypt([]byte(marker)) - if err != nil { - return fmt.Errorf("key custody encrypt test failed") - } - plaintext, err := box.Decrypt(ciphertext) - if err != nil { - return fmt.Errorf("key custody decrypt test failed") - } - if string(plaintext) != marker { - return fmt.Errorf("key custody decrypt test returned unexpected plaintext") - } - return json.NewEncoder(os.Stdout).Encode(map[string]any{ - "mode": cfg.SecretBoxMode, - "configured": true, - "ok": true, - "key_ref": keyCustodyKeyRef(cfg), - }) -} - -func runIdentityProviders(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp identity-providers ") - } - fs := flag.NewFlagSet("identity-providers "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - providerID := fs.String("provider-id", "", "identity provider id") - name := fs.String("name", "", "identity provider name") - issuerURL := fs.String("issuer-url", "", "OIDC issuer URL") - authURL := fs.String("authorization-url", "", "OIDC authorization endpoint override") - tokenURL := fs.String("token-url", "", "OIDC token endpoint override") - jwksURL := fs.String("jwks-url", "", "OIDC JWKS endpoint override") - clientID := fs.String("client-id", "", "OIDC client id") - clientSecret := fs.String("client-secret", "", "OIDC client secret") - redirectURI := fs.String("redirect-uri", "", "OIDC callback redirect URI") - allowedDomains := fs.String("allowed-email-domains", "", "comma-separated allowed email domains") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/identity-providers") - case "get": - if strings.TrimSpace(*providerID) == "" { - return fmt.Errorf("provider-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID)) - case "create": - return postJSON(*baseURL, *apiKey, "/v1/identity-providers", map[string]any{ - "name": *name, - "provider_type": "oidc", - "issuer_url": *issuerURL, - "authorization_endpoint": *authURL, - "token_endpoint": *tokenURL, - "jwks_uri": *jwksURL, - "client_id": *clientID, - "client_secret": *clientSecret, - "redirect_uri": *redirectURI, - "allowed_email_domains": splitCSV(*allowedDomains), - }) - case "update": - if strings.TrimSpace(*providerID) == "" { - return fmt.Errorf("provider-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*issuerURL) != "" { - body["issuer_url"] = *issuerURL - } - if strings.TrimSpace(*authURL) != "" { - body["authorization_endpoint"] = *authURL - } - if strings.TrimSpace(*tokenURL) != "" { - body["token_endpoint"] = *tokenURL - } - if strings.TrimSpace(*jwksURL) != "" { - body["jwks_uri"] = *jwksURL - } - if strings.TrimSpace(*clientID) != "" { - body["client_id"] = *clientID - } - if strings.TrimSpace(*clientSecret) != "" { - body["client_secret"] = *clientSecret - } - if strings.TrimSpace(*redirectURI) != "" { - body["redirect_uri"] = *redirectURI - } - if strings.TrimSpace(*allowedDomains) != "" { - body["allowed_email_domains"] = splitCSV(*allowedDomains) - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID), body) - case "disable": - if strings.TrimSpace(*providerID) == "" { - return fmt.Errorf("provider-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID), map[string]string{"reason": *reason}) - case "test": - if strings.TrimSpace(*providerID) == "" { - return fmt.Errorf("provider-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/identity-providers/"+url.PathEscape(*providerID)+":test", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp identity-providers ") - } -} - -func runSCIMTokens(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp scim-tokens ") - } - fs := flag.NewFlagSet("scim-tokens "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - name := fs.String("name", "", "SCIM token name") - tokenID := fs.String("token-id", "", "SCIM token id") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/scim-tokens") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/scim-tokens", map[string]string{"name": *name}) - case "revoke": - if strings.TrimSpace(*tokenID) == "" { - return fmt.Errorf("token-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/scim-tokens/"+url.PathEscape(*tokenID), map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp scim-tokens ") - } -} - -func runRoleBindings(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp role-bindings ") - } - fs := flag.NewFlagSet("role-bindings "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - bindingID := fs.String("binding-id", "", "role binding id") - principalType := fs.String("principal-type", "user", "user or group") - principalID := fs.String("principal-id", "", "principal id") - role := fs.String("role", "support", "role") - resourceFamily := fs.String("resource-family", "*", "resource family") - resourceID := fs.String("resource-id", "*", "resource id") - environment := fs.String("environment", "*", "environment") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/role-bindings") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/role-bindings", map[string]any{"principal_type": *principalType, "principal_id": *principalID, "role": *role, "resource_family": *resourceFamily, "resource_id": *resourceID, "environment": *environment, "reason": *reason}) - case "update": - if strings.TrimSpace(*bindingID) == "" { - return fmt.Errorf("binding-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*role) != "" { - body["role"] = *role - } - if strings.TrimSpace(*resourceFamily) != "" { - body["resource_family"] = *resourceFamily - } - if strings.TrimSpace(*resourceID) != "" { - body["resource_id"] = *resourceID - } - if strings.TrimSpace(*environment) != "" { - body["environment"] = *environment - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/role-bindings/"+url.PathEscape(*bindingID), body) - case "disable": - if strings.TrimSpace(*bindingID) == "" { - return fmt.Errorf("binding-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/role-bindings/"+url.PathEscape(*bindingID), map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp role-bindings ") - } -} - -func runAccessPolicies(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp access-policies ") - } - fs := flag.NewFlagSet("access-policies "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - policyID := fs.String("policy-id", "", "access policy id") - name := fs.String("name", "", "policy name") - action := fs.String("action", "", "action") - effect := fs.String("effect", "deny", "allow or deny") - resourceFamily := fs.String("resource-family", "*", "resource family") - environment := fs.String("environment", "*", "environment") - conditions := fs.String("conditions", "{}", "JSON policy conditions") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/access-policies") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/access-policies", map[string]any{"name": *name, "action": *action, "effect": *effect, "resource_family": *resourceFamily, "environment": *environment, "conditions": json.RawMessage(*conditions), "reason": *reason}) - case "update": - if strings.TrimSpace(*policyID) == "" { - return fmt.Errorf("policy-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*action) != "" { - body["action"] = *action - } - if strings.TrimSpace(*effect) != "" { - body["effect"] = *effect - } - if strings.TrimSpace(*resourceFamily) != "" { - body["resource_family"] = *resourceFamily - } - if strings.TrimSpace(*environment) != "" { - body["environment"] = *environment - } - if strings.TrimSpace(*conditions) != "" { - body["conditions"] = json.RawMessage(*conditions) - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/access-policies/"+url.PathEscape(*policyID), body) - case "disable": - if strings.TrimSpace(*policyID) == "" { - return fmt.Errorf("policy-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/access-policies/"+url.PathEscape(*policyID), map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp access-policies ") - } -} - -func runAuthz(args []string) error { - if len(args) == 0 || args[0] != "explain" { - return fmt.Errorf("usage: whcp authz explain") - } - fs := flag.NewFlagSet("authz explain", flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - actorID := fs.String("actor-id", "", "actor id to explain") - action := fs.String("action", "", "action") - resourceFamily := fs.String("resource-family", "", "resource family") - resourceID := fs.String("resource-id", "", "resource id") - environment := fs.String("environment", "", "environment") - if err := fs.Parse(args[1:]); err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/authz:explain", map[string]any{"actor_id": *actorID, "action": *action, "resource_family": *resourceFamily, "resource_id": *resourceID, "environment": *environment}) -} - -func runEvents(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp events ") - } - fs := flag.NewFlagSet("events "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - eventID := fs.String("event-id", "", "event id") - output := fs.String("output", "-", "raw output path, or '-' for stdout") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/events") - case "get": - return getJSON(*baseURL, *apiKey, "/v1/events/"+url.PathEscape(*eventID)) - case "timeline": - return getJSON(*baseURL, *apiKey, "/v1/events/"+url.PathEscape(*eventID)+"/timeline") - case "normalized": - return getJSON(*baseURL, *apiKey, "/v1/events/"+url.PathEscape(*eventID)+"/normalized") - case "raw-export": - return exportRawPayload(*baseURL, *apiKey, *eventID, *output) - default: - return fmt.Errorf("usage: whcp events ") - } -} - -func runSources(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp sources ") - } - fs := flag.NewFlagSet("sources "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - name := fs.String("name", "", "source name") - providerName := fs.String("provider", "", "provider") - secret := fs.String("secret", "", "verification secret") - sourceID := fs.String("source-id", "", "source id") - state := fs.String("state", "", "source state") - graceHours := fs.Int("grace-hours", 72, "old secret grace period in hours") - reason := fs.String("reason", "", "change reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/sources") - case "get": - if strings.TrimSpace(*sourceID) == "" { - return fmt.Errorf("source-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID)) - case "create": - body := map[string]string{"name": *name, "provider": *providerName, "verification_secret": *secret} - return postJSON(*baseURL, *apiKey, "/v1/sources", body) - case "update": - if strings.TrimSpace(*sourceID) == "" { - return fmt.Errorf("source-id is required") - } - body := map[string]string{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID), body) - case "delete": - if strings.TrimSpace(*sourceID) == "" { - return fmt.Errorf("source-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID), map[string]string{"reason": *reason}) - case "rotate-secret": - if strings.TrimSpace(*sourceID) == "" { - return fmt.Errorf("source-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/sources/"+url.PathEscape(*sourceID)+"/secrets:rotate", map[string]any{"new_secret": *secret, "grace_period_hours": *graceHours, "reason": *reason}) - default: - return fmt.Errorf("usage: whcp sources ") - } -} - -func runProviderConnections(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp provider-connections ") - } - fs := flag.NewFlagSet("provider-connections "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - connectionID := fs.String("connection-id", "", "provider connection id") - name := fs.String("name", "", "connection name") - providerName := fs.String("provider", "", "stripe, github, shopify, or slack") - credential := fs.String("credential", "", "provider API credential") - credentialType := fs.String("credential-type", "api_key", "api_key or bearer_token") - config := fs.String("config", "", "comma-separated key=value provider config") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/provider-connections") - case "get": - if strings.TrimSpace(*connectionID) == "" { - return fmt.Errorf("connection-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/provider-connections/"+url.PathEscape(*connectionID)) - case "create": - return postJSON(*baseURL, *apiKey, "/v1/provider-connections", map[string]any{ - "name": *name, - "provider": *providerName, - "credential": *credential, - "credential_type": *credentialType, - "config": parseKeyValueCSV(*config), - }) - case "verify": - if strings.TrimSpace(*connectionID) == "" { - return fmt.Errorf("connection-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/provider-connections/"+url.PathEscape(*connectionID)+":verify", map[string]string{"reason": *reason}) - case "revoke": - if strings.TrimSpace(*connectionID) == "" { - return fmt.Errorf("connection-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/provider-connections/"+url.PathEscape(*connectionID)+":revoke", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp provider-connections ") - } -} - -func runEndpoints(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp endpoints ") - } - fs := flag.NewFlagSet("endpoints "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - rawURL := fs.String("url", "", "endpoint URL") - name := fs.String("name", "", "endpoint name") - endpointID := fs.String("endpoint-id", "", "endpoint id") - state := fs.String("state", "", "endpoint state") - reason := fs.String("reason", "", "operator reason") - retryPolicyID := fs.String("retry-policy-id", "", "retry policy id") - mtlsClientCertFile := fs.String("mtls-client-cert-file", "", "PEM client certificate for endpoint mTLS") - mtlsClientKeyFile := fs.String("mtls-client-key-file", "", "PEM client private key for endpoint mTLS") - graceHours := fs.Int("grace-hours", 72, "old signing secret grace period in hours") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/endpoints") - case "get": - if strings.TrimSpace(*endpointID) == "" { - return fmt.Errorf("endpoint-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID)) - case "validate-url": - return postJSON(*baseURL, *apiKey, "/v1/endpoints:validate-url", map[string]string{"url": *rawURL}) - case "create": - body := map[string]string{"name": *name, "url": *rawURL, "retry_policy_id": *retryPolicyID} - if *mtlsClientCertFile != "" || *mtlsClientKeyFile != "" { - cert, key, err := readMTLSFiles(*mtlsClientCertFile, *mtlsClientKeyFile) - if err != nil { - return err - } - body["mtls_client_cert_pem"] = cert - body["mtls_client_key_pem"] = key - } - return postJSON(*baseURL, *apiKey, "/v1/endpoints", body) - case "update": - if strings.TrimSpace(*endpointID) == "" { - return fmt.Errorf("endpoint-id is required") - } - body := map[string]string{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*rawURL) != "" { - body["url"] = *rawURL - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - if strings.TrimSpace(*retryPolicyID) != "" { - body["retry_policy_id"] = *retryPolicyID - } - return patchJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID), body) - case "delete": - if strings.TrimSpace(*endpointID) == "" { - return fmt.Errorf("endpoint-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID), map[string]string{"reason": *reason}) - case "test": - if strings.TrimSpace(*endpointID) == "" { - return fmt.Errorf("endpoint-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID)+":test", map[string]string{"reason": *reason}) - case "rotate-secret": - if strings.TrimSpace(*endpointID) == "" { - return fmt.Errorf("endpoint-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/endpoints/"+url.PathEscape(*endpointID)+"/secrets:rotate", map[string]any{"grace_period_hours": *graceHours, "reason": *reason}) - default: - return fmt.Errorf("usage: whcp endpoints ") - } -} - -func runSubscriptions(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp subscriptions ") - } - fs := flag.NewFlagSet("subscriptions "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - subscriptionID := fs.String("subscription-id", "", "subscription id") - endpointID := fs.String("endpoint-id", "", "endpoint id") - eventTypes := fs.String("event-types", "", "comma-separated event types") - payloadFormat := fs.String("payload-format", "", "payload format") - transformationID := fs.String("transformation-id", "", "optional transformation id") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/subscriptions") - case "get": - if strings.TrimSpace(*subscriptionID) == "" { - return fmt.Errorf("subscription-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/subscriptions/"+url.PathEscape(*subscriptionID)) - case "create": - body := map[string]any{ - "endpoint_id": *endpointID, - "event_types": splitCSV(*eventTypes), - "transformation_id": *transformationID, - } - if strings.TrimSpace(*payloadFormat) != "" { - body["payload_format"] = *payloadFormat - } - return postJSON(*baseURL, *apiKey, "/v1/subscriptions", body) - case "update": - if strings.TrimSpace(*subscriptionID) == "" { - return fmt.Errorf("subscription-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*endpointID) != "" { - body["endpoint_id"] = *endpointID - } - if strings.TrimSpace(*eventTypes) != "" { - body["event_types"] = splitCSV(*eventTypes) - } - if strings.TrimSpace(*payloadFormat) != "" { - body["payload_format"] = *payloadFormat - } - if strings.TrimSpace(*transformationID) != "" { - body["transformation_id"] = *transformationID - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/subscriptions/"+url.PathEscape(*subscriptionID), body) - case "delete": - if strings.TrimSpace(*subscriptionID) == "" { - return fmt.Errorf("subscription-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/subscriptions/"+url.PathEscape(*subscriptionID), map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp subscriptions ") - } -} - -func runRetryPolicies(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp retry-policies ") - } - fs := flag.NewFlagSet("retry-policies "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - retryPolicyID := fs.String("retry-policy-id", "", "retry policy id") - name := fs.String("name", "", "retry policy name") - maxAttempts := fs.Int("max-attempts", -1, "maximum attempts") - maxDurationSeconds := fs.Int("max-duration-seconds", -1, "maximum retry duration in seconds") - initialDelaySeconds := fs.Int("initial-delay-seconds", -1, "initial retry delay in seconds") - maxDelaySeconds := fs.Int("max-delay-seconds", -1, "maximum retry delay in seconds") - rateLimitPerMinute := fs.Int("rate-limit-per-minute", -1, "optional replay/delivery rate hint") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/retry-policies") - case "get": - if strings.TrimSpace(*retryPolicyID) == "" { - return fmt.Errorf("retry-policy-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/retry-policies/"+url.PathEscape(*retryPolicyID)) - case "create": - body := map[string]any{ - "name": *name, - "max_attempts": valueOrDefault(*maxAttempts, 12), - "max_duration_seconds": valueOrDefault(*maxDurationSeconds, int((72*time.Hour)/time.Second)), - "initial_delay_seconds": valueOrDefault(*initialDelaySeconds, 10), - "max_delay_seconds": valueOrDefault(*maxDelaySeconds, int((6*time.Hour)/time.Second)), - "rate_limit_per_minute": valueOrDefault(*rateLimitPerMinute, 0), - "state": valueOrDefaultString(*state, domain.StateActive), - } - return postJSON(*baseURL, *apiKey, "/v1/retry-policies", body) - case "update": - if strings.TrimSpace(*retryPolicyID) == "" { - return fmt.Errorf("retry-policy-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if *maxAttempts >= 0 { - body["max_attempts"] = *maxAttempts - } - if *maxDurationSeconds >= 0 { - body["max_duration_seconds"] = *maxDurationSeconds - } - if *initialDelaySeconds >= 0 { - body["initial_delay_seconds"] = *initialDelaySeconds - } - if *maxDelaySeconds >= 0 { - body["max_delay_seconds"] = *maxDelaySeconds - } - if *rateLimitPerMinute >= 0 { - body["rate_limit_per_minute"] = *rateLimitPerMinute - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/retry-policies/"+url.PathEscape(*retryPolicyID), body) - case "delete": - if strings.TrimSpace(*retryPolicyID) == "" { - return fmt.Errorf("retry-policy-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/retry-policies/"+url.PathEscape(*retryPolicyID), map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp retry-policies ") - } -} - -func runRoutes(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp routes ") - } - fs := flag.NewFlagSet("routes "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - routeID := fs.String("route-id", "", "route id") - sourceID := fs.String("source-id", "", "source id") - endpointID := fs.String("endpoint-id", "", "endpoint id") - eventTypes := fs.String("event-types", "", "comma-separated event types") - eventID := fs.String("event-id", "", "event id") - reason := fs.String("reason", "", "change reason") - name := fs.String("name", "", "route name") - priority := fs.Int("priority", -1, "route priority") - state := fs.String("state", "", "draft, active, or inactive") - retryPolicyID := fs.String("retry-policy-id", "", "retry policy id") - transformationID := fs.String("transformation-id", "", "optional transformation id") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/routes") - case "get": - if strings.TrimSpace(*routeID) == "" { - return fmt.Errorf("route-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)) - case "create": - body := map[string]any{"name": *name, "source_id": *sourceID, "endpoint_id": *endpointID, "event_types": splitCSV(*eventTypes), "retry_policy_id": *retryPolicyID, "transformation_id": *transformationID} - if *priority >= 0 { - body["priority"] = *priority - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return postJSON(*baseURL, *apiKey, "/v1/routes", body) - case "update": - if strings.TrimSpace(*routeID) == "" { - return fmt.Errorf("route-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*sourceID) != "" { - body["source_id"] = *sourceID - } - if strings.TrimSpace(*endpointID) != "" { - body["endpoint_id"] = *endpointID - } - if strings.TrimSpace(*eventTypes) != "" { - body["event_types"] = splitCSV(*eventTypes) - } - if *priority >= 0 { - body["priority"] = *priority - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - if strings.TrimSpace(*retryPolicyID) != "" { - body["retry_policy_id"] = *retryPolicyID - } - if strings.TrimSpace(*transformationID) != "" { - body["transformation_id"] = *transformationID - } - return patchJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID), body) - case "delete": - if strings.TrimSpace(*routeID) == "" { - return fmt.Errorf("route-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID), map[string]string{"reason": *reason}) - case "activate": - if strings.TrimSpace(*routeID) == "" { - return fmt.Errorf("route-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)+":activate", map[string]string{"reason": *reason}) - case "dry-run": - if strings.TrimSpace(*routeID) == "" { - return fmt.Errorf("route-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)+":dry-run", map[string]string{"event_id": *eventID}) - case "versions": - if strings.TrimSpace(*routeID) == "" { - return fmt.Errorf("route-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/routes/"+url.PathEscape(*routeID)+"/versions") - default: - return fmt.Errorf("usage: whcp routes ") - } -} - -func runTransformations(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp transformations ") - } - fs := flag.NewFlagSet("transformations "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - transformationID := fs.String("transformation-id", "", "transformation id") - versionID := fs.String("version-id", "", "transformation version id") - name := fs.String("name", "", "transformation name") - operationsPath := fs.String("operations-file", "", "JSON operations file") - payloadPath := fs.String("payload-file", "", "JSON payload file for local dry-run") - reason := fs.String("reason", "", "activation reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/transformations") - case "create": - operations, err := readOptionalOperatorFile(*operationsPath) - if err != nil { - return err - } - body := map[string]any{"name": *name} - if strings.TrimSpace(operations) != "" { - body["operations"] = json.RawMessage(operations) - } - return postJSON(*baseURL, *apiKey, "/v1/transformations", body) - case "version": - if strings.TrimSpace(*transformationID) == "" { - return fmt.Errorf("transformation-id is required") - } - operations, err := readRequiredOperatorFile(*operationsPath, "operations-file") - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/transformations/"+url.PathEscape(*transformationID)+"/versions", map[string]any{"operations": json.RawMessage(operations)}) - case "activate": - if strings.TrimSpace(*transformationID) == "" || strings.TrimSpace(*versionID) == "" { - return fmt.Errorf("transformation-id and version-id are required") - } - return postJSON(*baseURL, *apiKey, "/v1/transformations/"+url.PathEscape(*transformationID)+"/versions/"+url.PathEscape(*versionID)+":activate", map[string]string{"reason": *reason}) - case "dry-run": - payload, err := readRequiredOperatorFile(*payloadPath, "payload-file") - if err != nil { - return err - } - operations, err := readRequiredOperatorFile(*operationsPath, "operations-file") - if err != nil { - return err - } - ops, err := transform.ParseOperations([]byte(operations)) - if err != nil { - return err - } - out, err := transform.Apply([]byte(payload), ops) - if err != nil { - return err - } - _, err = os.Stdout.Write(append(out, '\n')) - return err - default: - return fmt.Errorf("usage: whcp transformations ") - } -} - -func runDeliveries(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp deliveries ") - } - fs := flag.NewFlagSet("deliveries "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - deliveryID := fs.String("delivery-id", "", "delivery id") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/deliveries") - case "attempts": - return getJSON(*baseURL, *apiKey, "/v1/deliveries/"+url.PathEscape(*deliveryID)+"/attempts") - case "retry": - return postJSON(*baseURL, *apiKey, "/v1/deliveries/"+url.PathEscape(*deliveryID)+":retry", map[string]string{"reason": *reason}) - case "cancel": - return postJSON(*baseURL, *apiKey, "/v1/deliveries/"+url.PathEscape(*deliveryID)+":cancel", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp deliveries ") - } -} - -func runReplayJobs(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp replay-jobs ") - } - fs := flag.NewFlagSet("replay-jobs "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - replayJobID := fs.String("replay-job-id", "", "replay job id") - eventID := fs.String("event-id", "", "event id") - deliveryID := fs.String("delivery-id", "", "delivery id") - endpointID := fs.String("endpoint-id", "", "endpoint id") - reason := fs.String("reason", "", "operator reason") - configMode := fs.String("config-mode", apppkg.ReplayConfigCurrent, "current or original") - rateLimitPerMinute := fs.Int("rate-limit-per-minute", 0, "optional replay rate limit") - requireApproval := fs.Bool("require-approval", false, "create job in pending approval state") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/replay-jobs") - case "dry-run": - return postJSON(*baseURL, *apiKey, "/v1/replay-jobs:dry-run", map[string]any{"event_id": *eventID, "delivery_id": *deliveryID, "endpoint_id": *endpointID, "reason": *reason, "config_mode": *configMode, "rate_limit_per_minute": *rateLimitPerMinute}) - case "create": - return postJSON(*baseURL, *apiKey, "/v1/replay-jobs", map[string]any{"event_id": *eventID, "delivery_id": *deliveryID, "endpoint_id": *endpointID, "reason": *reason, "config_mode": *configMode, "rate_limit_per_minute": *rateLimitPerMinute, "require_approval": *requireApproval}) - case "approve": - return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":approve", map[string]string{"reason": *reason}) - case "pause": - return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":pause", map[string]string{"reason": *reason}) - case "resume": - return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":resume", map[string]string{"reason": *reason}) - case "cancel": - return postJSON(*baseURL, *apiKey, "/v1/replay-jobs/"+url.PathEscape(*replayJobID)+":cancel", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp replay-jobs ") - } -} - -func runReconciliationJobs(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp reconciliation-jobs ") - } - fs := flag.NewFlagSet("reconciliation-jobs "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - jobID := fs.String("job-id", "", "reconciliation job id") - connectionID := fs.String("connection-id", "", "provider connection id") - scopeObjectID := fs.String("scope-object-id", "", "provider-specific object or event scope") - fromRaw := fs.String("from", "", "RFC3339 lower bound") - toRaw := fs.String("to", "", "RFC3339 upper bound") - captureMissing := fs.Bool("capture-missing", false, "capture recoverable missing provider events") - routeRecovered := fs.Bool("route-recovered", false, "route recovered events after durable capture") - redeliverFailed := fs.Bool("redeliver-failed", false, "request provider redelivery for failed deliveries when supported") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs") - case "get": - if strings.TrimSpace(*jobID) == "" { - return fmt.Errorf("job-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs/"+url.PathEscape(*jobID)) - case "items": - if strings.TrimSpace(*jobID) == "" { - return fmt.Errorf("job-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs/"+url.PathEscape(*jobID)+"/items") - case "dry-run", "create": - from, err := parseOptionalTime(*fromRaw) - if err != nil { - return err - } - to, err := parseOptionalTime(*toRaw) - if err != nil { - return err - } - body := map[string]any{ - "connection_id": *connectionID, - "scope_object_id": *scopeObjectID, - "window_start": nullableCLITime(from), - "window_end": nullableCLITime(to), - "capture_missing": *captureMissing, - "route_recovered": *routeRecovered, - "redeliver_failed": *redeliverFailed, - "reason": *reason, - } - if args[0] == "dry-run" { - return postJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs:dry-run", body) - } - return postJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs", body) - case "cancel": - if strings.TrimSpace(*jobID) == "" { - return fmt.Errorf("job-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/reconciliation-jobs/"+url.PathEscape(*jobID)+":cancel", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp reconciliation-jobs ") - } -} - -func runOps(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp ops ") - } - fs := flag.NewFlagSet("ops "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - workerID := fs.String("worker-id", "", "worker id") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "metrics": - return getJSON(*baseURL, *apiKey, "/v1/ops/metrics") - case "rollups": - return getJSON(*baseURL, *apiKey, "/v1/ops/metrics/rollups") - case "storage": - return getJSON(*baseURL, *apiKey, "/v1/ops/storage") - case "config": - return getJSON(*baseURL, *apiKey, "/v1/ops/config") - case "endpoint-health": - return getJSON(*baseURL, *apiKey, "/v1/endpoint-health") - case "workers": - return getJSON(*baseURL, *apiKey, "/v1/ops/workers") - case "worker": - if strings.TrimSpace(*workerID) == "" { - return fmt.Errorf("worker-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/ops/workers/"+url.PathEscape(*workerID)) - case "queues": - return getJSON(*baseURL, *apiKey, "/v1/ops/queues") - default: - return fmt.Errorf("usage: whcp ops ") - } -} - -func runAlerts(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp alerts ") - } - fs := flag.NewFlagSet("alerts "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - alertID := fs.String("alert-id", "", "alert rule id") - firingID := fs.String("firing-id", "", "alert firing id") - name := fs.String("name", "", "alert name") - ruleType := fs.String("rule-type", "", "alert rule type") - metricName := fs.String("metric-name", "", "optional metric name override") - threshold := fs.Float64("threshold", 0, "threshold") - comparator := fs.String("comparator", ">=", "threshold comparator") - windowSeconds := fs.Int("window-seconds", 300, "evaluation window seconds") - state := fs.String("state", "", "state filter or rule state") - channelIDs := fs.String("channel-ids", "", "comma-separated notification channel ids") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/alerts") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/alerts", map[string]any{ - "name": *name, - "rule_type": *ruleType, - "metric_name": *metricName, - "threshold": *threshold, - "comparator": *comparator, - "window_seconds": *windowSeconds, - "state": *state, - "channel_ids": splitCSV(*channelIDs), - }) - case "update": - if strings.TrimSpace(*alertID) == "" { - return fmt.Errorf("alert-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if *threshold != 0 { - body["threshold"] = *threshold - } - if strings.TrimSpace(*comparator) != "" { - body["comparator"] = *comparator - } - if *windowSeconds != 0 { - body["window_seconds"] = *windowSeconds - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - if strings.TrimSpace(*channelIDs) != "" { - body["channel_ids"] = splitCSV(*channelIDs) - } - return patchJSON(*baseURL, *apiKey, "/v1/alerts/"+url.PathEscape(*alertID), body) - case "disable": - if strings.TrimSpace(*alertID) == "" { - return fmt.Errorf("alert-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/alerts/"+url.PathEscape(*alertID), map[string]any{"reason": *reason}) - case "firings": - path := "/v1/alert-firings" - if strings.TrimSpace(*state) != "" { - path += "?state=" + url.QueryEscape(*state) - } - return getJSON(*baseURL, *apiKey, path) - case "ack": - if strings.TrimSpace(*firingID) == "" { - return fmt.Errorf("firing-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/alert-firings/"+url.PathEscape(*firingID)+":acknowledge", map[string]any{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp alerts ") - } -} - -func runNotificationChannels(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp notification-channels ") - } - fs := flag.NewFlagSet("notification-channels "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - channelID := fs.String("channel-id", "", "notification channel id") - name := fs.String("name", "", "channel name") - targetURL := fs.String("url", "", "HTTPS webhook receiver URL") - secret := fs.String("signing-secret", "", "HMAC signing secret") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/notification-channels") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/notification-channels", map[string]any{ - "name": *name, - "channel_type": domain.NotificationChannelWebhook, - "url": *targetURL, - "signing_secret": *secret, - }) - case "update": - if strings.TrimSpace(*channelID) == "" { - return fmt.Errorf("channel-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*targetURL) != "" { - body["url"] = *targetURL - } - if strings.TrimSpace(*secret) != "" { - body["signing_secret"] = *secret - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/notification-channels/"+url.PathEscape(*channelID), body) - case "disable": - if strings.TrimSpace(*channelID) == "" { - return fmt.Errorf("channel-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/notification-channels/"+url.PathEscape(*channelID), map[string]any{"reason": *reason}) - case "test": - if strings.TrimSpace(*channelID) == "" { - return fmt.Errorf("channel-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/notification-channels/"+url.PathEscape(*channelID)+":test", map[string]any{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp notification-channels ") - } -} - -func runNotificationDeliveries(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp notification-deliveries ") - } - fs := flag.NewFlagSet("notification-deliveries "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - deliveryID := fs.String("delivery-id", "", "notification delivery id") - state := fs.String("state", "", "delivery state filter") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - path := "/v1/notification-deliveries" - if strings.TrimSpace(*state) != "" { - path += "?state=" + url.QueryEscape(*state) - } - return getJSON(*baseURL, *apiKey, path) - case "attempts": - if strings.TrimSpace(*deliveryID) == "" { - return fmt.Errorf("delivery-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/notification-deliveries/"+url.PathEscape(*deliveryID)+"/attempts") - case "retry": - if strings.TrimSpace(*deliveryID) == "" { - return fmt.Errorf("delivery-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/notification-deliveries/"+url.PathEscape(*deliveryID)+":retry", map[string]any{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp notification-deliveries ") - } -} - -func runSIEMSinks(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp siem-sinks ") - } - fs := flag.NewFlagSet("siem-sinks "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - sinkID := fs.String("sink-id", "", "SIEM sink id") - name := fs.String("name", "", "sink name") - targetURL := fs.String("url", "", "HTTPS SIEM receiver URL") - secret := fs.String("signing-secret", "", "HMAC signing secret") - state := fs.String("state", "", "active or disabled") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/siem-sinks") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/siem-sinks", map[string]any{ - "name": *name, - "sink_type": domain.SIEMSinkWebhook, - "url": *targetURL, - "signing_secret": *secret, - }) - case "update": - if strings.TrimSpace(*sinkID) == "" { - return fmt.Errorf("sink-id is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*name) != "" { - body["name"] = *name - } - if strings.TrimSpace(*targetURL) != "" { - body["url"] = *targetURL - } - if strings.TrimSpace(*secret) != "" { - body["signing_secret"] = *secret - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/siem-sinks/"+url.PathEscape(*sinkID), body) - case "disable": - if strings.TrimSpace(*sinkID) == "" { - return fmt.Errorf("sink-id is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/siem-sinks/"+url.PathEscape(*sinkID), map[string]any{"reason": *reason}) - case "test": - if strings.TrimSpace(*sinkID) == "" { - return fmt.Errorf("sink-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/siem-sinks/"+url.PathEscape(*sinkID)+":test", map[string]any{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp siem-sinks ") - } -} - -func runSIEMDeliveries(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp siem-deliveries ") - } - fs := flag.NewFlagSet("siem-deliveries "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - deliveryID := fs.String("delivery-id", "", "SIEM delivery id") - state := fs.String("state", "", "delivery state filter") - reason := fs.String("reason", "", "operator reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - path := "/v1/siem-deliveries" - if strings.TrimSpace(*state) != "" { - path += "?state=" + url.QueryEscape(*state) - } - return getJSON(*baseURL, *apiKey, path) - case "attempts": - if strings.TrimSpace(*deliveryID) == "" { - return fmt.Errorf("delivery-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/siem-deliveries/"+url.PathEscape(*deliveryID)+"/attempts") - case "retry": - if strings.TrimSpace(*deliveryID) == "" { - return fmt.Errorf("delivery-id is required") - } - return postJSON(*baseURL, *apiKey, "/v1/siem-deliveries/"+url.PathEscape(*deliveryID)+":retry", map[string]any{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp siem-deliveries ") - } -} - -func runAudit(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp audit ") - } - fs := flag.NewFlagSet("audit "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - exportID := fs.String("export-id", "", "audit export id") - fromRaw := fs.String("from", "", "RFC3339 lower bound") - toRaw := fs.String("to", "", "RFC3339 upper bound") - includeRaw := fs.Bool("include-raw", false, "include raw payload bodies when authorized") - includePayloads := fs.Bool("include-payloads", false, "include normalized and delivery payload bodies when authorized") - includeTimelines := fs.Bool("include-timelines", false, "include event, receipt, delivery, and audit timelines") - reason := fs.String("reason", "", "operator reason") - output := fs.String("output", "", "download output path") - filePath := fs.String("file", "", "local evidence bundle path") - anchorID := fs.String("anchor-id", "", "audit chain anchor id") - fromSequence := fs.Int64("from-sequence", 0, "optional audit chain start sequence") - toSequence := fs.Int64("to-sequence", 0, "optional audit chain end sequence") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "export": - from, err := parseOptionalTime(*fromRaw) - if err != nil { - return err - } - to, err := parseOptionalTime(*toRaw) - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/audit-events:export", map[string]any{ - "from": nullableCLITime(from), - "to": nullableCLITime(to), - "include_raw_payloads": *includeRaw, - "include_payload_bodies": *includePayloads, - "include_timelines": *includeTimelines, - "reason": *reason, - }) - case "export-status": - if strings.TrimSpace(*exportID) == "" { - return fmt.Errorf("export-id is required") - } - return getJSON(*baseURL, *apiKey, "/v1/audit-exports/"+url.PathEscape(*exportID)) - case "download": - if strings.TrimSpace(*exportID) == "" { - return fmt.Errorf("export-id is required") - } - return downloadAuditExport(*baseURL, *apiKey, *exportID, *output) - case "chain-head": - return getJSON(*baseURL, *apiKey, "/v1/audit-chain/head") - case "verify-chain": - return postJSON(*baseURL, *apiKey, "/v1/audit-chain:verify", map[string]any{"from_sequence": *fromSequence, "to_sequence": *toSequence}) - case "anchor": - return postJSON(*baseURL, *apiKey, "/v1/audit-chain:anchor", map[string]any{"from_sequence": *fromSequence, "to_sequence": *toSequence, "reason": *reason}) - case "anchors": - if strings.TrimSpace(*anchorID) != "" { - return getJSON(*baseURL, *apiKey, "/v1/audit-chain/anchors/"+url.PathEscape(*anchorID)) - } - return getJSON(*baseURL, *apiKey, "/v1/audit-chain/anchors") - case "verify-bundle": - return verifyEvidenceBundleFile(*filePath) - default: - return fmt.Errorf("usage: whcp audit ") - } -} - -func runRetention(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp retention ") - } - fs := flag.NewFlagSet("retention "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - policyID := fs.String("policy-id", "", "retention policy id") - resourceType := fs.String("resource-type", domain.RetentionResourceRawPayload, "raw_payload, normalized_envelope_data, delivery_payload, or audit_event") - sourceID := fs.String("source-id", "", "optional source id for raw payload retention") - retentionDays := fs.Int("retention-days", 0, "retention period in days") - state := fs.String("state", "", "active or disabled") - legalHold := fs.Bool("legal-hold", false, "put policy on legal hold") - clearLegalHold := fs.Bool("clear-legal-hold", false, "clear policy legal hold") - holdReason := fs.String("hold-reason", "", "legal hold reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - visited := map[string]bool{} - fs.Visit(func(flag *flag.Flag) { - visited[flag.Name] = true - }) - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/admin/retention-policies") - case "create": - return postJSON(*baseURL, *apiKey, "/v1/admin/retention-policies", map[string]any{ - "resource_type": *resourceType, - "source_id": *sourceID, - "retention_days": *retentionDays, - "state": *state, - "legal_hold": *legalHold, - "hold_reason": *holdReason, - }) - case "update": - if strings.TrimSpace(*policyID) == "" { - return fmt.Errorf("policy-id is required") - } - body := map[string]any{} - if *retentionDays > 0 { - body["retention_days"] = *retentionDays - } - if *state != "" { - body["state"] = *state - } - if *sourceID != "" { - body["source_id"] = *sourceID - } - if visited["legal-hold"] { - body["legal_hold"] = *legalHold - } - if *clearLegalHold { - body["legal_hold"] = false - body["hold_reason"] = "" - } - if *holdReason != "" { - body["hold_reason"] = *holdReason - } - return patchJSON(*baseURL, *apiKey, "/v1/admin/retention-policies/"+url.PathEscape(*policyID), body) - default: - return fmt.Errorf("usage: whcp retention ") - } -} - -func runSchemas(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp schemas ") - } - fs := flag.NewFlagSet("schemas "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - name := fs.String("name", "", "event type name") - description := fs.String("description", "", "event type description") - state := fs.String("state", "", "event type or schema state") - reason := fs.String("reason", "", "operator reason") - version := fs.String("version", "", "schema version") - schemaPath := fs.String("schema-file", "", "JSON schema file") - payloadPath := fs.String("payload-file", "", "JSON payload file") - newSchemaPath := fs.String("new-schema-file", "", "candidate JSON schema file") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "event-type-create": - return postJSON(*baseURL, *apiKey, "/v1/event-types", map[string]string{"name": *name, "description": *description}) - case "event-type-list": - return getJSON(*baseURL, *apiKey, "/v1/event-types") - case "event-type-get": - if strings.TrimSpace(*name) == "" { - return fmt.Errorf("name is required") - } - return getJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)) - case "event-type-update": - if strings.TrimSpace(*name) == "" { - return fmt.Errorf("name is required") - } - body := map[string]any{"reason": *reason} - if strings.TrimSpace(*description) != "" { - body["description"] = *description - } - if strings.TrimSpace(*state) != "" { - body["state"] = *state - } - return patchJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name), body) - case "event-type-delete": - if strings.TrimSpace(*name) == "" { - return fmt.Errorf("name is required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name), map[string]string{"reason": *reason}) - case "schema-create": - body, err := os.ReadFile(*schemaPath) // #nosec G304,G703 -- CLI reads an operator-selected schema file. - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas", map[string]string{"version": *version, "schema": string(body)}) - case "schema-list": - if strings.TrimSpace(*name) == "" { - return fmt.Errorf("name is required") - } - return getJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas") - case "schema-get": - if strings.TrimSpace(*name) == "" || strings.TrimSpace(*version) == "" { - return fmt.Errorf("name and version are required") - } - return getJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version)) - case "schema-update": - if strings.TrimSpace(*name) == "" || strings.TrimSpace(*version) == "" { - return fmt.Errorf("name and version are required") - } - return patchJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version), map[string]string{"state": *state, "reason": *reason}) - case "schema-delete": - if strings.TrimSpace(*name) == "" || strings.TrimSpace(*version) == "" { - return fmt.Errorf("name and version are required") - } - return deleteJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version), map[string]string{"reason": *reason}) - case "validate": - body, err := os.ReadFile(*payloadPath) // #nosec G304,G703 -- CLI reads an operator-selected payload file. - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version)+":validate", map[string]string{"payload": string(body)}) - case "check-compat": - body, err := os.ReadFile(*newSchemaPath) // #nosec G304,G703 -- CLI reads an operator-selected schema file. - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/event-types/"+url.PathEscape(*name)+"/schemas/"+url.PathEscape(*version)+":check-compatibility", map[string]string{"new_schema": string(body)}) - default: - return fmt.Errorf("usage: whcp schemas ") - } -} - -func runDeadLetter(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp dead-letter ") - } - fs := flag.NewFlagSet("dead-letter "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - entryID := fs.String("entry-id", "", "dead-letter entry id") - entryIDs := fs.String("entry-ids", "", "comma-separated dead-letter entry ids") - reason := fs.String("reason", "", "release reason") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/dead-letter") - case "release": - return postJSON(*baseURL, *apiKey, "/v1/dead-letter/"+url.PathEscape(*entryID)+":release", map[string]string{"reason": *reason}) - case "bulk-release": - return postJSON(*baseURL, *apiKey, "/v1/dead-letter:bulk-release", map[string]any{"entry_ids": splitCSV(*entryIDs), "reason": *reason}) - default: - return fmt.Errorf("usage: whcp dead-letter ") - } -} - -func runQuarantine(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp quarantine ") - } - fs := flag.NewFlagSet("quarantine "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - entryID := fs.String("entry-id", "", "quarantine entry id") - reason := fs.String("reason", "", "decision reason") - routeAfterRelease := fs.Bool("route-after-release", false, "create route work after approval") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "approve": - return postJSON(*baseURL, *apiKey, "/v1/quarantine/"+url.PathEscape(*entryID)+":approve", map[string]any{"reason": *reason, "route_after_release": *routeAfterRelease}) - case "reject": - return postJSON(*baseURL, *apiKey, "/v1/quarantine/"+url.PathEscape(*entryID)+":reject", map[string]string{"reason": *reason}) - default: - return fmt.Errorf("usage: whcp quarantine ") - } -} - -func runAdapters(args []string) error { - if len(args) == 0 { - return fmt.Errorf("usage: whcp adapters ") - } - fs := flag.NewFlagSet("adapters "+args[0], flag.ContinueOnError) - baseURL := fs.String("base-url", "http://localhost:8080", "API base URL") - apiKey := fs.String("api-key", os.Getenv("WEBHOOKERY_API_KEY"), "API key") - adapterID := fs.String("adapter-id", "", "adapter id") - versionID := fs.String("version-id", "", "adapter version id") - name := fs.String("name", "", "adapter name") - kind := fs.String("kind", domain.AdapterKindDeclarative, "adapter kind") - version := fs.String("version", "", "adapter version") - definitionPath := fs.String("definition-file", "", "declarative adapter definition JSON file") - requestPath := fs.String("request-file", "", "adapter test-vector request JSON file") - expectedPath := fs.String("expected-file", "", "adapter test-vector expected JSON file") - action := fs.String("action", "", "transition action") - reason := fs.String("reason", "", "audit reason") - riskLevel := fs.String("risk-level", "", "risk level") - packageSHA := fs.String("package-sha256", "", "plugin package sha256") - packageSignature := fs.String("package-signature", "", "plugin package signature") - sbomSHA := fs.String("sbom-sha256", "", "plugin SBOM sha256") - provenanceURL := fs.String("provenance-url", "", "provenance URL") - description := fs.String("description", "", "description") - if err := fs.Parse(args[1:]); err != nil { - return err - } - switch args[0] { - case "list": - return getJSON(*baseURL, *apiKey, "/v1/adapters") - case "get": - return getJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)) - case "create": - return postJSON(*baseURL, *apiKey, "/v1/adapters", map[string]any{"name": *name, "kind": *kind, "description": *description, "risk_level": *riskLevel, "provenance_url": *provenanceURL}) - case "versions": - return getJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions") - case "version-create": - definition, err := readOptionalOperatorFile(*definitionPath) - if err != nil { - return err - } - body := map[string]any{"version": *version, "reason": *reason, "risk_level": *riskLevel, "package_sha256": *packageSHA, "package_signature": *packageSignature, "sbom_sha256": *sbomSHA, "provenance_url": *provenanceURL} - if definition != "" { - body["definition"] = json.RawMessage(definition) - } - return postJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions", body) - case "vector-create": - requestBody, err := readRequiredOperatorFile(*requestPath, "request-file") - if err != nil { - return err - } - expectedBody, err := readRequiredOperatorFile(*expectedPath, "expected-file") - if err != nil { - return err - } - return postJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions/"+url.PathEscape(*versionID)+"/test-vectors", map[string]any{"name": *name, "request": json.RawMessage(requestBody), "expected": json.RawMessage(expectedBody)}) - case "transition": - return postJSON(*baseURL, *apiKey, "/v1/adapters/"+url.PathEscape(*adapterID)+"/versions/"+url.PathEscape(*versionID)+":transition", map[string]string{"action": *action, "reason": *reason}) - default: - return fmt.Errorf("usage: whcp adapters ") - } -} - -func runSignatures(args []string) error { - if len(args) == 0 || args[0] != "verify" { - return fmt.Errorf("usage: whcp signatures verify --provider PROVIDER --secret SECRET --body FILE --header 'Name: value'") - } - fs := flag.NewFlagSet("signatures verify", flag.ContinueOnError) - providerName := fs.String("provider", "", "provider") - secret := fs.String("secret", "", "secret") - bodyPath := fs.String("body", "", "body file") - header := fs.String("header", "", "header as 'Name: value'") - if err := fs.Parse(args[1:]); err != nil { - return err - } - body, err := os.ReadFile(*bodyPath) - if err != nil { - return err - } - name, value, ok := strings.Cut(*header, ":") - if !ok { - return fmt.Errorf("header must be formatted as 'Name: value'") - } - adapter, ok := provider.BuiltInRegistry().Adapter(*providerName) - if !ok { - return fmt.Errorf("unknown provider %q", *providerName) - } - result := adapter.Verify(provider.VerifyInput{ - RawBody: body, - Headers: map[string][]string{strings.ToLower(strings.TrimSpace(name)): {strings.TrimSpace(value)}}, - Secret: []byte(*secret), - Now: time.Now().UTC(), - }) - out := json.NewEncoder(os.Stdout) - out.SetIndent("", " ") - return out.Encode(result) -} - -func openStore(ctx context.Context, cfg config.Config) (*postgres.Store, error) { - box, err := secretBoxFromConfig(ctx, cfg) - if err != nil { - return nil, err - } - opts := postgres.StoreOptions{RawStorageMode: cfg.RawStorageMode} - if cfg.RawStorageMode == domain.RawStorageS3 { - store, err := objectstore.NewS3Store(objectstore.S3Config{ - Endpoint: cfg.ObjectStorageEndpoint, - AccessKey: cfg.ObjectStorageAccessKey, - SecretKey: cfg.ObjectStorageSecretKey, - Bucket: cfg.ObjectStorageBucket, - Region: cfg.ObjectStorageRegion, - UseSSL: cfg.ObjectStorageUseSSL, - }) - if err != nil { - return nil, err - } - opts.ObjectStore = store - opts.ObjectBucket = store.Bucket() - } - return postgres.NewWithOptions(ctx, cfg.DatabaseURL, box, opts) -} - -func secretBoxFromConfig(ctx context.Context, cfg config.Config) (postgres.SecretBox, error) { - switch cfg.SecretBoxMode { - case "", "local": - return crypto.NewEnvelope(cfg.MasterKeyBase64) - case "vault-transit": - return crypto.NewVaultTransitEnvelope(crypto.VaultTransitConfig{ - Address: cfg.VaultAddr, - Token: cfg.VaultToken, - KeyName: cfg.VaultTransitKey, - }) - case "aws-kms": - awsCfg, err := awsconfig.LoadDefaultConfig(ctx, awsconfig.WithRegion(cfg.AWSRegion)) - if err != nil { - return nil, fmt.Errorf("load aws config: %w", err) - } - client := kms.NewFromConfig(awsCfg, func(opts *kms.Options) { - if strings.TrimSpace(cfg.AWSKMSEndpoint) != "" { - opts.BaseEndpoint = aws.String(strings.TrimSpace(cfg.AWSKMSEndpoint)) - } - }) - return crypto.NewAWSKMSEnvelope(crypto.AWSKMSEnvelopeConfig{ - KeyID: cfg.AWSKMSKeyID, - Client: client, - }) - default: - return nil, fmt.Errorf("unsupported secret box mode %q", cfg.SecretBoxMode) - } -} - -func serverTLSConfig(cfg config.Config) (*tls.Config, error) { - if cfg.TLSCertFile == "" && cfg.ProducerMTLSClientCAFile == "" { - return nil, nil - } - tlsConfig := &tls.Config{MinVersion: tls.VersionTLS12} - if cfg.ProducerMTLSClientCAFile != "" { - body, err := readSmallFile(cfg.ProducerMTLSClientCAFile, 1<<20) - if err != nil { - return nil, fmt.Errorf("read producer mTLS client CA file: %w", err) - } - pool := x509.NewCertPool() - if !pool.AppendCertsFromPEM(body) { - return nil, fmt.Errorf("producer mTLS client CA file did not contain certificates") - } - tlsConfig.ClientCAs = pool - tlsConfig.ClientAuth = tls.VerifyClientCertIfGiven - } - return tlsConfig, nil -} - -func opsRuntimeConfig(cfg config.Config) domain.OpsConfig { - return domain.OpsConfig{ - Environment: cfg.Environment, - UIEnabled: cfg.EnableUI, - RawStorageMode: cfg.RawStorageMode, - ObjectStorageConfigured: cfg.RawStorageMode == domain.RawStorageS3, - SecretBoxMode: cfg.SecretBoxMode, - KeyCustodyConfigured: cfg.SecretBoxMode != "", - KeyCustodyKeyRef: keyCustodyKeyRef(cfg), - MaxIngressBodyBytes: 2 << 20, - MaxHeaderBytes: 64 << 10, - MaxHeaderPairs: 128, - MaxHeaderValueBytes: 8 << 10, - } -} - -func keyCustodyKeyRef(cfg config.Config) string { - if cfg.SecretBoxMode != "aws-kms" || strings.TrimSpace(cfg.AWSKMSKeyID) == "" { - return "" - } - sum := sha256.Sum256([]byte(strings.TrimSpace(cfg.AWSKMSKeyID))) - return "sha256:" + hex.EncodeToString(sum[:])[:12] -} - -func runtimeAuth(cfg config.Config, lookup apppkg.APIKeyLookup) apppkg.Authenticator { - authenticators := []apppkg.Authenticator{apppkg.APIKeyAuthenticator{Lookup: lookup}} - if cfg.BootstrapAPIKeyHash != "" { - authenticators = append(authenticators, apppkg.StaticAuthenticator{ - Hash: cfg.BootstrapAPIKeyHash, - Actor: authz.Actor{ - ID: "bootstrap", - TenantID: cfg.BootstrapTenantID, - Role: authz.RoleOwner, - Scopes: []string{"*"}, - }, - }) - } - return apppkg.MultiAuthenticator{Authenticators: authenticators} -} - -func readMTLSFiles(certPath, keyPath string) (string, string, error) { - if strings.TrimSpace(certPath) == "" || strings.TrimSpace(keyPath) == "" { - return "", "", fmt.Errorf("mtls-client-cert-file and mtls-client-key-file are required together") - } - cert, err := readSmallFile(certPath, 64<<10) - if err != nil { - return "", "", fmt.Errorf("read mTLS client certificate: %w", err) - } - key, err := readSmallFile(keyPath, 64<<10) - if err != nil { - return "", "", fmt.Errorf("read mTLS client key: %w", err) - } - return string(cert), string(key), nil -} - -func readSmallFile(path string, max int64) ([]byte, error) { - path = strings.TrimSpace(path) - if path == "" || strings.ContainsRune(path, 0) { - return nil, fmt.Errorf("invalid file path") - } - info, err := os.Lstat(path) // #nosec G703 -- explicit local operator PEM path; symlinks, directories, and size are checked before use. - if err != nil { - return nil, err - } - if info.IsDir() { - return nil, fmt.Errorf("path is a directory") - } - if info.Mode()&os.ModeSymlink != 0 { - return nil, fmt.Errorf("path must not be a symlink") - } - if info.Size() > max { - return nil, fmt.Errorf("file exceeds %d bytes", max) - } - body, err := os.ReadFile(path) // #nosec G304,G703 -- explicit local operator PEM path; no shell execution and bounded to small PEM files. - if err != nil { - return nil, err - } - if int64(len(body)) > max { - return nil, fmt.Errorf("file exceeds %d bytes", max) - } - return body, nil -} - type deliveryAdapter struct { client deliveryhttp.Client } @@ -2398,295 +152,3 @@ func (s signalAdapter) Deliver(ctx context.Context, rawURL string, body []byte, FailureClass: result.FailureClass, }, err } - -func getJSON(baseURL, apiKey, path string) error { - endpoint, err := apiEndpoint(baseURL, path) - if err != nil { - return err - } - // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+apiKey) - // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - _, err = io.Copy(os.Stdout, resp.Body) - return err -} - -func postJSON(baseURL, apiKey, path string, body any) error { - raw, err := json.Marshal(body) - if err != nil { - return err - } - endpoint, err := apiEndpoint(baseURL, path) - if err != nil { - return err - } - // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. - req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, endpoint, bytes.NewReader(raw)) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+apiKey) - req.Header.Set("Content-Type", "application/json") - // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - _, err = io.Copy(os.Stdout, resp.Body) - return err -} - -func patchJSON(baseURL, apiKey, path string, body any) error { - raw, err := json.Marshal(body) - if err != nil { - return err - } - endpoint, err := apiEndpoint(baseURL, path) - if err != nil { - return err - } - // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. - req, err := http.NewRequestWithContext(context.Background(), http.MethodPatch, endpoint, bytes.NewReader(raw)) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+apiKey) - req.Header.Set("Content-Type", "application/json") - // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - _, err = io.Copy(os.Stdout, resp.Body) - return err -} - -func deleteJSON(baseURL, apiKey, path string, body any) error { - raw, err := json.Marshal(body) - if err != nil { - return err - } - endpoint, err := apiEndpoint(baseURL, path) - if err != nil { - return err - } - // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. - req, err := http.NewRequestWithContext(context.Background(), http.MethodDelete, endpoint, bytes.NewReader(raw)) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+apiKey) - req.Header.Set("Content-Type", "application/json") - // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - _, err = io.Copy(os.Stdout, resp.Body) - return err -} - -func downloadAuditExport(baseURL, apiKey, exportID, outputPath string) error { - endpoint, err := apiEndpoint(baseURL, "/v1/audit-exports/"+url.PathEscape(exportID)+":download") - if err != nil { - return err - } - // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+apiKey) - // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return fmt.Errorf("audit export download failed with status %d", resp.StatusCode) - } - if outputPath == "" { - outputPath = exportID + ".tar.gz" - } - body, err := io.ReadAll(resp.Body) - if err != nil { - return err - } - return writePrivateFile(outputPath, body) -} - -func verifyEvidenceBundleFile(path string) error { - if strings.TrimSpace(path) == "" { - return fmt.Errorf("file is required") - } - body, err := os.ReadFile(path) // #nosec G304,G703 -- CLI verifies an operator-selected local evidence bundle. - if err != nil { - return err - } - result, err := evidence.VerifyTarGzipBundle(body) - if err != nil { - return err - } - return json.NewEncoder(os.Stdout).Encode(result) -} - -func exportRawPayload(baseURL, apiKey, eventID, outputPath string) error { - if strings.TrimSpace(eventID) == "" { - return fmt.Errorf("event-id is required") - } - endpoint, err := apiEndpoint(baseURL, "/v1/events/"+url.PathEscape(eventID)+"/raw") - if err != nil { - return err - } - // #nosec G107,G704 -- CLI connects only to the operator-supplied Webhookery API URL after scheme/host validation. - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, endpoint, nil) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+apiKey) - // #nosec G704 -- operator-supplied CLI API URL; not reachable from untrusted remote input. - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer func() { _ = resp.Body.Close() }() - var payload struct { - BodyBase64 string `json:"body_base64"` - } - if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { - return err - } - if resp.StatusCode < 200 || resp.StatusCode > 299 { - return fmt.Errorf("raw export failed with status %d", resp.StatusCode) - } - raw, err := base64.StdEncoding.DecodeString(payload.BodyBase64) - if err != nil { - return err - } - if outputPath == "" || outputPath == "-" { - _, err = os.Stdout.Write(raw) - return err - } - return writePrivateFile(outputPath, raw) -} - -func readRequiredOperatorFile(path, flagName string) (string, error) { - if strings.TrimSpace(path) == "" { - return "", fmt.Errorf("%s is required", flagName) - } - body, err := os.ReadFile(path) // #nosec G304,G703 -- CLI reads an operator-selected local file. - if err != nil { - return "", err - } - return string(body), nil -} - -func readOptionalOperatorFile(path string) (string, error) { - if strings.TrimSpace(path) == "" { - return "", nil - } - return readRequiredOperatorFile(path, "file") -} - -func writePrivateFile(outputPath string, body []byte) error { - if strings.TrimSpace(outputPath) == "" || outputPath == "-" { - return fmt.Errorf("output path is required") - } - if info, err := os.Lstat(outputPath); err == nil { // #nosec G304,G703 -- CLI checks an operator-selected path before writing. - if info.Mode()&os.ModeSymlink != 0 { - return fmt.Errorf("refusing to write through symlink: %s", outputPath) - } - } - return os.WriteFile(outputPath, body, 0o600) // #nosec G304,G306,G703 -- CLI writes operator-selected export files with private permissions. -} - -func apiEndpoint(baseURL, path string) (string, error) { - parsed, err := url.Parse(strings.TrimRight(baseURL, "/")) - if err != nil { - return "", err - } - if parsed.Scheme != "http" && parsed.Scheme != "https" { - return "", fmt.Errorf("base-url must use http or https") - } - if parsed.Host == "" || parsed.User != nil { - return "", fmt.Errorf("base-url must include a host and must not include credentials") - } - return parsed.String() + path, nil -} - -func splitCSV(value string) []string { - if value == "" { - return nil - } - parts := strings.Split(value, ",") - out := make([]string, 0, len(parts)) - for _, part := range parts { - part = strings.TrimSpace(part) - if part != "" { - out = append(out, part) - } - } - return out -} - -func valueOrDefault(value, fallback int) int { - if value < 0 { - return fallback - } - return value -} - -func valueOrDefaultString(value, fallback string) string { - if strings.TrimSpace(value) == "" { - return fallback - } - return value -} - -func parseKeyValueCSV(value string) map[string]string { - out := map[string]string{} - for _, part := range splitCSV(value) { - key, val, ok := strings.Cut(part, "=") - if !ok { - continue - } - key = strings.TrimSpace(key) - if key == "" { - continue - } - out[key] = strings.TrimSpace(val) - } - return out -} - -func parseOptionalTime(value string) (time.Time, error) { - value = strings.TrimSpace(value) - if value == "" { - return time.Time{}, nil - } - parsed, err := time.Parse(time.RFC3339, value) - if err != nil { - return time.Time{}, fmt.Errorf("time must be RFC3339: %w", err) - } - return parsed.UTC(), nil -} - -func nullableCLITime(value time.Time) any { - if value.IsZero() { - return nil - } - return value -} diff --git a/cmd/whcp/main_test.go b/cmd/whcp/main_test.go index 07ffb12..ca91405 100644 --- a/cmd/whcp/main_test.go +++ b/cmd/whcp/main_test.go @@ -3,13 +3,19 @@ package main import ( "bytes" "context" + "encoding/base64" "encoding/json" + "errors" "io" + "net/http" + "net/http/httptest" "os" "path/filepath" + "strings" "testing" "time" + apppkg "webhookery/internal/app" "webhookery/internal/config" "webhookery/internal/evidence" ) @@ -29,6 +35,62 @@ func TestWritePrivateFileUsesPrivatePermissions(t *testing.T) { } } +func TestRunDispatchesSubcommandUsage(t *testing.T) { + if err := run(nil); err == nil || !strings.Contains(err.Error(), "usage: whcp") { + t.Fatalf("expected top-level usage, got %v", err) + } + if err := run([]string{"unknown"}); err == nil || !strings.Contains(err.Error(), "usage: whcp") { + t.Fatalf("expected unknown command usage, got %v", err) + } + + for _, command := range []string{ + "admin", + "api-keys", + "producer-clients", + "producer-mtls-identities", + "key-custody", + "identity-providers", + "scim-tokens", + "role-bindings", + "access-policies", + "authz", + "events", + "sources", + "provider-connections", + "adapters", + "endpoints", + "subscriptions", + "retry-policies", + "routes", + "transformations", + "deliveries", + "replay-jobs", + "replay-approval-policies", + "reconciliation-jobs", + "ops", + "alerts", + "notification-channels", + "notification-deliveries", + "siem-sinks", + "siem-deliveries", + "audit", + "evidence", + "retention", + "schemas", + "dead-letter", + "quarantine", + "incidents", + "signatures", + } { + t.Run(command, func(t *testing.T) { + err := run([]string{command}) + if err == nil || !strings.Contains(err.Error(), "usage: whcp "+command) { + t.Fatalf("expected %s usage, got %v", command, err) + } + }) + } +} + func TestWritePrivateFileRejectsSymlink(t *testing.T) { dir := t.TempDir() target := filepath.Join(dir, "target") @@ -74,6 +136,654 @@ func TestVerifyEvidenceBundleFileAcceptsValidBundle(t *testing.T) { } } +func TestViewEvidenceBundleFileRequiresExplicitFile(t *testing.T) { + if err := viewEvidenceBundleFile(""); err == nil { + t.Fatal("expected missing file path error") + } +} + +func TestViewEvidenceBundleFileSummarizesWithoutPrintingBodies(t *testing.T) { + bundle, err := evidence.BuildTarGzipBundle(evidence.Manifest{ + ExportID: "exp_1", + TenantID: "ten_1", + CreatedAt: time.Unix(1, 0).UTC(), + IncludedEvents: []string{"evt_1"}, + IncludedIncidents: []string{"inc_1"}, + IncludeRawPayloads: true, + IncludePayloadBodies: true, + IncludeTimelines: true, + }, map[string][]byte{ + "audit_events.jsonl": []byte(`{"action":"incident_report.generated"}` + "\n"), + "incident_report.json": []byte(`{"body":"do-not-print-incident-json"}` + "\n"), + "incident_report.md": []byte("do-not-print-incident-markdown\n"), + "raw_payload.bin": []byte("do-not-print-raw-payload\n"), + "timelines.jsonl": []byte( + `{"kind":"delivery","state":"failed"}` + "\n" + + `{"kind":"replay","state":"succeeded"}` + "\n", + ), + }) + if err != nil { + t.Fatal(err) + } + path := filepath.Join(t.TempDir(), "bundle.tar.gz") + if err := os.WriteFile(path, bundle.Bytes, 0o600); err != nil { + t.Fatal(err) + } + + oldStdout := os.Stdout + reader, writer, err := os.Pipe() + if err != nil { + t.Fatal(err) + } + os.Stdout = writer + defer func() { os.Stdout = oldStdout }() + + err = viewEvidenceBundleFile(path) + _ = writer.Close() + if err != nil { + t.Fatal(err) + } + body, err := io.ReadAll(reader) + if err != nil { + t.Fatal(err) + } + for _, forbidden := range []string{"do-not-print-incident-json", "do-not-print-incident-markdown", "do-not-print-raw-payload"} { + if bytes.Contains(body, []byte(forbidden)) { + t.Fatalf("evidence view printed bundled file body %q in %s", forbidden, body) + } + } + var view evidence.BundleView + if err := json.Unmarshal(body, &view); err != nil { + t.Fatalf("invalid view JSON %q: %v", body, err) + } + if view.SchemaVersion != evidence.BundleViewSchemaV1 || !view.Verification.Valid { + t.Fatalf("unexpected view status: %+v", view) + } + if view.Summary.IncludedEventCount != 1 || view.Summary.IncludedIncidentCount != 1 || view.Summary.TimelineEntryCount != 2 || view.Summary.AuditEventCount != 1 { + t.Fatalf("unexpected summary counts: %+v", view.Summary) + } + if !view.Summary.HasIncidentReportJSON || !view.Summary.HasIncidentReportMarkdown || view.Summary.TimelineKinds["delivery"] != 1 || view.Summary.TimelineKinds["replay"] != 1 { + t.Fatalf("unexpected summary details: %+v", view.Summary) + } + if !strings.Contains(strings.Join(view.Warnings, "\n"), "raw payload bodies may be included") { + t.Fatalf("expected raw-payload handling warning, got %+v", view.Warnings) + } +} + +func TestAPIEndpointRejectsUnsafeBaseURLs(t *testing.T) { + tests := []string{ + "ftp://api.example", + "https://user:pass@api.example", + "https:///missing-host", + } + for _, baseURL := range tests { + t.Run(baseURL, func(t *testing.T) { + if _, err := apiEndpoint(baseURL, "/v1/events"); err == nil { + t.Fatal("expected unsafe base URL rejection") + } + }) + } + + endpoint, err := apiEndpoint("https://api.example/", "/v1/events") + if err != nil { + t.Fatal(err) + } + if endpoint != "https://api.example/v1/events" { + t.Fatalf("unexpected endpoint %q", endpoint) + } +} + +func TestCLIParsersTrimAndIgnoreInvalidEntries(t *testing.T) { + if got := splitCSV(" events:read, ,events:write "); strings.Join(got, "|") != "events:read|events:write" { + t.Fatalf("unexpected csv split: %#v", got) + } + values := parseKeyValueCSV("provider=stripe,broken, empty = , =ignored,region=eu") + if values["provider"] != "stripe" || values["region"] != "eu" || values["empty"] != "" { + t.Fatalf("unexpected key value parse: %#v", values) + } + if _, ok := values[""]; ok { + t.Fatalf("empty key must be ignored: %#v", values) + } +} + +func TestParseOptionalTimeRequiresRFC3339AndNormalizesUTC(t *testing.T) { + if zero, err := parseOptionalTime(""); err != nil || !zero.IsZero() { + t.Fatalf("empty time should be nil value, got %v err=%v", zero, err) + } + parsed, err := parseOptionalTime("2026-05-28T12:30:00+03:00") + if err != nil { + t.Fatal(err) + } + if parsed.Format(time.RFC3339) != "2026-05-28T09:30:00Z" { + t.Fatalf("time was not normalized to UTC: %s", parsed.Format(time.RFC3339)) + } + if _, err := parseOptionalTime("2026-05-28"); err == nil { + t.Fatal("expected non-RFC3339 time rejection") + } +} + +func TestFormatEventTimelineSupportsTableMarkdownAndJSON(t *testing.T) { + entries := []apppkg.EventTimelineEntry{{ + SchemaVersion: apppkg.EventTimelineSchemaV1, + Sequence: 1, + Kind: "replay", + RefID: "rpl_1", + State: "completed", + Detail: "reason_code=incident_recovery reason=receiver fixed", + OccurredAt: time.Unix(123, 0).UTC(), + }} + + table, err := formatEventTimeline(entries, "table") + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(table), "SEQ\tOCCURRED_AT\tKIND\tREF_ID\tSTATE\tDETAIL") || !strings.Contains(string(table), "1\t1970-01-01T00:02:03Z\treplay\trpl_1\tcompleted\treason_code=incident_recovery") { + t.Fatalf("unexpected table timeline:\n%s", table) + } + + markdown, err := formatEventTimeline(entries, "markdown") + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(markdown), "## Event Timeline") || !strings.Contains(string(markdown), "`webhookery.event_timeline.v1`") || !strings.Contains(string(markdown), "| 1 | `1970-01-01T00:02:03Z` | `replay` | `rpl_1` | `completed` |") { + t.Fatalf("unexpected markdown timeline:\n%s", markdown) + } + + jsonBody, err := formatEventTimeline(entries, "json") + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(jsonBody), `"schema_version":"webhookery.event_timeline.v1"`) { + t.Fatalf("unexpected json timeline:\n%s", jsonBody) + } + + if _, err := formatEventTimeline(entries, "xml"); err == nil { + t.Fatal("expected unknown timeline format rejection") + } +} + +func TestReplayCreateRequiresReasonCodeBeforeRequest(t *testing.T) { + err := runReplayJobs([]string{"create", "--event-id", "evt_1", "--reason", "debug", "--base-url", "https://api.example", "--api-key", "whkey_test"}) + if err == nil || !strings.Contains(err.Error(), "reason-code is required") { + t.Fatalf("expected missing reason-code validation, got %v", err) + } +} + +func TestReplayCreateApprovalExpiryRequiresApproval(t *testing.T) { + err := runReplayJobs([]string{"create", "--event-id", "evt_1", "--reason-code", "support_investigation", "--reason", "debug", "--approval-expires-at", "2026-06-05T12:00:00Z", "--base-url", "https://api.example", "--api-key", "whkey_test"}) + if err == nil || !strings.Contains(err.Error(), "approval-expires-at requires require-approval") { + t.Fatalf("expected approval expiry validation, got %v", err) + } +} + +func TestPostJSONSendsBearerAndJSONBody(t *testing.T) { + var gotAuth, gotContentType, gotBody string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("Authorization") + gotContentType = r.Header.Get("Content-Type") + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatal(err) + } + gotBody = string(body) + w.WriteHeader(http.StatusAccepted) + _, _ = w.Write([]byte(`{"ok":true}`)) + })) + defer server.Close() + + if err := postJSON(server.URL, "whkey_test", "/v1/replay-jobs", map[string]string{"event_id": "evt_1"}); err != nil { + t.Fatal(err) + } + if gotAuth != "Bearer whkey_test" { + t.Fatalf("unexpected auth header %q", gotAuth) + } + if gotContentType != "application/json" { + t.Fatalf("unexpected content type %q", gotContentType) + } + if gotBody != `{"event_id":"evt_1"}` { + t.Fatalf("unexpected JSON body %q", gotBody) + } +} + +func TestJSONRequestHelpersUseExpectedMethodsAndPaths(t *testing.T) { + var seen []string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatal(err) + } + seen = append(seen, r.Method+" "+r.URL.Path+" "+r.Header.Get("Authorization")+" "+string(body)) + _, _ = w.Write([]byte(`{"ok":true}`)) + })) + defer server.Close() + + if err := getJSON(server.URL, "whkey_test", "/v1/events"); err != nil { + t.Fatal(err) + } + if err := patchJSON(server.URL, "whkey_test", "/v1/sources/src_1", map[string]string{"reason": "rename"}); err != nil { + t.Fatal(err) + } + if err := deleteJSON(server.URL, "whkey_test", "/v1/sources/src_1", map[string]string{"reason": "delete"}); err != nil { + t.Fatal(err) + } + + want := []string{ + `GET /v1/events Bearer whkey_test `, + `PATCH /v1/sources/src_1 Bearer whkey_test {"reason":"rename"}`, + `DELETE /v1/sources/src_1 Bearer whkey_test {"reason":"delete"}`, + } + if strings.Join(seen, "\n") != strings.Join(want, "\n") { + t.Fatalf("unexpected requests:\ngot:\n%s\nwant:\n%s", strings.Join(seen, "\n"), strings.Join(want, "\n")) + } +} + +func TestCLIResourceCommandsSendExpectedRequests(t *testing.T) { + type request struct { + method string + path string + body string + } + var seen []request + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatal(err) + } + path := r.URL.Path + if r.URL.RawQuery != "" { + path += "?" + r.URL.RawQuery + } + seen = append(seen, request{method: r.Method, path: path, body: string(body)}) + _, _ = w.Write([]byte(`{"ok":true}`)) + })) + defer server.Close() + + common := []string{"--base-url", server.URL, "--api-key", "whkey_cli"} + cases := []struct { + name string + args []string + wantMethod string + wantPath string + bodyContains []string + }{ + { + name: "sources create", + args: append([]string{"sources", "create", "--name", "Stripe", "--provider", "stripe", "--secret", "whsec_test"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/sources", + bodyContains: []string{`"name":"Stripe"`, `"provider":"stripe"`, `"verification_secret":"whsec_test"`}, + }, + { + name: "events search", + args: append([]string{"events", "search", "--provider", "stripe", "--external-id", "evt_external", "--verification", "invalid", "--status", "dlq", "--received-after", "2026-06-04T10:00:00Z", "--route-id", "rte_1", "--delivery-id", "del_1", "--limit", "25"}, common...), + wantMethod: http.MethodGet, + wantPath: "/v1/events?delivery_id=del_1&external_id=evt_external&limit=25&provider=stripe&received_after=2026-06-04T10%3A00%3A00Z&route_id=rte_1&status=dlq&verification=invalid", + }, + { + name: "endpoints validate url", + args: append([]string{"endpoints", "validate-url", "--url", "https://receiver.example.com/hook"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/endpoints:validate-url", + bodyContains: []string{`"url":"https://receiver.example.com/hook"`}, + }, + { + name: "subscriptions create", + args: append([]string{"subscriptions", "create", "--endpoint-id", "end_1", "--event-types", "invoice.created,customer.created", "--payload-format", "canonical_json"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/subscriptions", + bodyContains: []string{`"endpoint_id":"end_1"`, `"event_types":["invoice.created","customer.created"]`, `"payload_format":"canonical_json"`}, + }, + { + name: "retry policy create", + args: append([]string{"retry-policies", "create", "--name", "standard", "--max-attempts", "4", "--initial-delay-seconds", "2", "--max-delay-seconds", "30"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/retry-policies", + bodyContains: []string{`"name":"standard"`, `"max_attempts":4`, `"initial_delay_seconds":2`, `"max_delay_seconds":30`}, + }, + { + name: "routes activate", + args: append([]string{"routes", "activate", "--route-id", "rte_1", "--reason", "ship"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/routes/rte_1:activate", + bodyContains: []string{`"reason":"ship"`}, + }, + { + name: "replay create", + args: append([]string{"replay-jobs", "create", "--event-id", "evt_1", "--endpoint-id", "end_1", "--reason-code", "support_investigation", "--reason", "debug", "--require-approval", "--approval-expires-at", "2026-06-05T12:00:00Z"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/replay-jobs", + bodyContains: []string{`"event_id":"evt_1"`, `"endpoint_id":"end_1"`, `"reason_code":"support_investigation"`, `"reason":"debug"`, `"require_approval":true`, `"approval_expires_at":"2026-06-05T12:00:00Z"`}, + }, + { + name: "replay preview", + args: append([]string{"replay-jobs", "preview", "--event-id", "evt_1", "--reason-code", "operator_requested", "--reason", "inspect"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/replay-jobs/preview", + bodyContains: []string{`"event_id":"evt_1"`, `"reason_code":"operator_requested"`, `"reason":"inspect"`}, + }, + { + name: "replay approval policy create", + args: append([]string{"replay-approval-policies", "create", "--scope-type", "source", "--scope-id", "src_1", "--default-expiry-seconds", "3600", "--reason", "sensitive source"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/replay-approval-policies", + bodyContains: []string{`"scope_type":"source"`, `"scope_id":"src_1"`, `"require_approval":true`, `"default_expiry_seconds":3600`, `"reason":"sensitive source"`}, + }, + { + name: "replay approval policy disable", + args: append([]string{"replay-approval-policies", "disable", "--policy-id", "rap_1", "--reason", "retire policy"}, common...), + wantMethod: http.MethodDelete, + wantPath: "/v1/replay-approval-policies/rap_1", + bodyContains: []string{`"reason":"retire policy"`}, + }, + { + name: "alert firings filter", + args: append([]string{"alerts", "firings", "--state", "open"}, common...), + wantMethod: http.MethodGet, + wantPath: "/v1/alert-firings?state=open", + }, + { + name: "notification channel test", + args: append([]string{"notification-channels", "test", "--channel-id", "nch_1", "--reason", "probe"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/notification-channels/nch_1:test", + bodyContains: []string{`"reason":"probe"`}, + }, + { + name: "schema event type update", + args: append([]string{"schemas", "event-type-update", "--name", "invoice.created", "--description", "updated", "--state", "active", "--reason", "docs"}, common...), + wantMethod: http.MethodPatch, + wantPath: "/v1/event-types/invoice.created", + bodyContains: []string{`"description":"updated"`, `"state":"active"`, `"reason":"docs"`}, + }, + { + name: "adapter transition", + args: append([]string{"adapters", "transition", "--adapter-id", "adp_1", "--version-id", "adv_1", "--action", "approve", "--reason", "reviewed"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/adapters/adp_1/versions/adv_1:transition", + bodyContains: []string{`"action":"approve"`, `"reason":"reviewed"`}, + }, + { + name: "api key create", + args: append([]string{"api-keys", "create", "--name", "operator", "--user-id", "usr_1", "--email", "ops@example.com", "--role", "operator", "--scopes", "events:read,deliveries:read"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/api-keys", + bodyContains: []string{`"email":"ops@example.com"`, `"role":"operator"`, `"scopes":["events:read","deliveries:read"]`}, + }, + { + name: "producer client update", + args: append([]string{"producer-clients", "update", "--client-id", "pcl_1", "--name", "producer", "--source-id", "src_1", "--scopes", "events:write", "--token-ttl-seconds", "120", "--state", "active", "--reason", "rotate"}, common...), + wantMethod: http.MethodPatch, + wantPath: "/v1/producer-clients/pcl_1", + bodyContains: []string{`"name":"producer"`, `"source_id":"src_1"`, `"token_ttl_seconds":120`, `"reason":"rotate"`}, + }, + { + name: "identity provider create", + args: append([]string{"identity-providers", "create", "--name", "OIDC", "--issuer-url", "https://issuer.example.com", "--client-id", "client", "--client-secret", "secret", "--allowed-email-domains", "example.com,ops.example.com"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/identity-providers", + bodyContains: []string{`"name":"OIDC"`, `"issuer_url":"https://issuer.example.com"`, `"client_secret":"secret"`, `"allowed_email_domains":["example.com","ops.example.com"]`}, + }, + { + name: "scim token revoke", + args: append([]string{"scim-tokens", "revoke", "--token-id", "sct_1", "--reason", "compromised"}, common...), + wantMethod: http.MethodDelete, + wantPath: "/v1/scim-tokens/sct_1", + bodyContains: []string{`"reason":"compromised"`}, + }, + { + name: "role binding create", + args: append([]string{"role-bindings", "create", "--principal-type", "group", "--principal-id", "scg_1", "--role", "security", "--resource-family", "events", "--resource-id", "*", "--environment", "prod", "--reason", "least privilege"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/role-bindings", + bodyContains: []string{`"principal_type":"group"`, `"principal_id":"scg_1"`, `"role":"security"`, `"resource_family":"events"`, `"environment":"prod"`}, + }, + { + name: "access policy create", + args: append([]string{"access-policies", "create", "--name", "deny raw", "--action", "events:raw", "--effect", "deny", "--resource-family", "events", "--environment", "prod", "--conditions", `{"ip":"outside"}`, "--reason", "policy"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/access-policies", + bodyContains: []string{`"name":"deny raw"`, `"action":"events:raw"`, `"effect":"deny"`, `"conditions":{"ip":"outside"}`}, + }, + { + name: "authz explain", + args: append([]string{"authz", "explain", "--actor-id", "usr_1", "--action", "events:raw", "--resource-family", "events", "--resource-id", "evt_1", "--environment", "prod"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/authz:explain", + bodyContains: []string{`"actor_id":"usr_1"`, `"action":"events:raw"`, `"resource_id":"evt_1"`, `"environment":"prod"`}, + }, + { + name: "siem sink test", + args: append([]string{"siem-sinks", "test", "--sink-id", "snk_1", "--reason", "probe"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/siem-sinks/snk_1:test", + bodyContains: []string{`"reason":"probe"`}, + }, + { + name: "audit export", + args: append([]string{"audit", "export", "--from", "2026-05-28T09:00:00Z", "--to", "2026-05-28T10:00:00Z", "--include-raw", "--include-payloads", "--include-timelines", "--reason", "evidence"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/audit-events:export", + bodyContains: []string{`"from":"2026-05-28T09:00:00Z"`, `"to":"2026-05-28T10:00:00Z"`, `"include_raw_payloads":true`, `"include_payload_bodies":true`, `"include_timelines":true`}, + }, + { + name: "retention update clears legal hold", + args: append([]string{"retention", "update", "--policy-id", "ret_1", "--retention-days", "30", "--clear-legal-hold"}, common...), + wantMethod: http.MethodPatch, + wantPath: "/v1/admin/retention-policies/ret_1", + bodyContains: []string{`"retention_days":30`, `"legal_hold":false`, `"hold_reason":""`}, + }, + { + name: "dead letter bulk release", + args: append([]string{"dead-letter", "bulk-release", "--entry-ids", "dlq_1,dlq_2", "--reason-code", "incident_recovery", "--reason", "recovered"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/dead-letter:bulk-release", + bodyContains: []string{`"entry_ids":["dlq_1","dlq_2"]`, `"reason_code":"incident_recovery"`, `"reason":"recovered"`}, + }, + { + name: "quarantine approve", + args: append([]string{"quarantine", "approve", "--entry-id", "qua_1", "--route-after-release", "--reason", "verified"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/quarantine/qua_1:approve", + bodyContains: []string{`"reason":"verified"`, `"route_after_release":true`}, + }, + { + name: "incident create", + args: append([]string{"incidents", "create", "--title", "Stripe payment webhook failed", "--reason", "support investigation"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/incidents", + bodyContains: []string{`"title":"Stripe payment webhook failed"`, `"reason":"support investigation"`}, + }, + { + name: "incident add event", + args: append([]string{"incidents", "add-event", "--incident-id", "inc_1", "--event-id", "evt_1", "--reason", "attach failed payment"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/incidents/inc_1/events", + bodyContains: []string{`"event_id":"evt_1"`, `"reason":"attach failed payment"`}, + }, + { + name: "incident generate report", + args: append([]string{"incidents", "generate-report", "--incident-id", "inc_1", "--reason", "handoff"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/incidents/inc_1/generate-report", + bodyContains: []string{`"reason":"handoff"`}, + }, + { + name: "incident evidence export", + args: append([]string{"incidents", "export", "--incident-id", "inc_1", "--reason", "customer evidence"}, common...), + wantMethod: http.MethodPost, + wantPath: "/v1/incidents/inc_1/evidence-export", + bodyContains: []string{`"reason":"customer evidence"`}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + seen = nil + if err := run(tc.args); err != nil { + t.Fatal(err) + } + if len(seen) != 1 { + t.Fatalf("expected one request, got %+v", seen) + } + got := seen[0] + if got.method != tc.wantMethod || got.path != tc.wantPath { + t.Fatalf("unexpected request: %+v", got) + } + if got.method != http.MethodGet && got.body == "" { + t.Fatal("expected JSON body") + } + for _, needle := range tc.bodyContains { + if !strings.Contains(got.body, needle) { + t.Fatalf("request body %s did not contain %s", got.body, needle) + } + } + }) + } +} + +func TestDownloadAuditExportWritesPrivateFile(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/audit-exports/exp_1:download" { + t.Fatalf("unexpected path %s", r.URL.Path) + } + if got := r.Header.Get("Authorization"); got != "Bearer whkey_test" { + t.Fatalf("unexpected auth header %q", got) + } + _, _ = w.Write([]byte("bundle")) + })) + defer server.Close() + + output := filepath.Join(t.TempDir(), "exp.tar.gz") + if err := downloadAuditExport(server.URL, "whkey_test", "exp_1", output); err != nil { + t.Fatal(err) + } + body, err := os.ReadFile(output) + if err != nil { + t.Fatal(err) + } + if string(body) != "bundle" { + t.Fatalf("unexpected bundle body %q", string(body)) + } + info, err := os.Stat(output) + if err != nil { + t.Fatal(err) + } + if got := info.Mode().Perm(); got != 0o600 { + t.Fatalf("permissions=%o want 0600", got) + } +} + +func TestProblemResponseErrorIncludesStableCodeAndRequestID(t *testing.T) { + body := []byte(`{"code":"authorization_error","stable_code":"WEBHOOKERY_TENANT_ACCESS_DENIED","request_id":"req_cli","detail":"redacted detail"}`) + err := problemResponseError("request failed", http.StatusForbidden, body) + if err == nil { + t.Fatal("expected problem response error") + } + got := err.Error() + for _, want := range []string{"403", "WEBHOOKERY_TENANT_ACCESS_DENIED", "req_cli"} { + if !strings.Contains(got, want) { + t.Fatalf("error %q did not contain %q", got, want) + } + } + for _, forbidden := range []string{"whkey_test", "redacted detail"} { + if strings.Contains(got, forbidden) { + t.Fatalf("error %q leaked %q", got, forbidden) + } + } +} + +func TestExportRawPayloadDecodesBase64ToPrivateFile(t *testing.T) { + rawBody := []byte("raw evidence bytes") + var gotAuthorization string + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/v1/events/evt_1/raw" { + t.Fatalf("unexpected path %s", r.URL.Path) + } + if got := r.URL.Query().Get("reason"); got != "support case" { + t.Fatalf("unexpected raw export reason %q", got) + } + gotAuthorization = r.Header.Get("Authorization") + _ = json.NewEncoder(w).Encode(map[string]string{"body_base64": base64.StdEncoding.EncodeToString(rawBody)}) + })) + defer server.Close() + + output := filepath.Join(t.TempDir(), "raw.bin") + if err := exportRawPayload(server.URL, "whkey_tenant_scoped", "evt_1", "support case", output); err != nil { + t.Fatal(err) + } + if gotAuthorization != "Bearer whkey_tenant_scoped" { + t.Fatalf("authorization header %q did not use the scoped key", gotAuthorization) + } + body, err := os.ReadFile(output) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(body, rawBody) { + t.Fatalf("unexpected raw body %q", string(body)) + } +} + +func TestExportRawPayloadRequiresReasonBeforeRequest(t *testing.T) { + called := false + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + called = true + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + output := filepath.Join(t.TempDir(), "raw.bin") + err := exportRawPayload(server.URL, "whkey_test", "evt_1", " ", output) + if err == nil || !strings.Contains(err.Error(), "reason is required") { + t.Fatalf("expected missing reason error, got %v", err) + } + if called { + t.Fatal("raw payload export request was sent without a reason") + } + if _, statErr := os.Stat(output); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("raw output should not be created without a reason, stat err=%v", statErr) + } +} + +func TestOperatorFileHelpers(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "secret.txt") + if err := os.WriteFile(path, []byte("secret\n"), 0o600); err != nil { + t.Fatal(err) + } + got, err := readRequiredOperatorFile(path, "secret-file") + if err != nil { + t.Fatal(err) + } + if got != "secret\n" { + t.Fatalf("unexpected file body %q", got) + } + if _, err := readRequiredOperatorFile("", "secret-file"); err == nil { + t.Fatal("expected required file validation") + } + if got, err := readOptionalOperatorFile(""); err != nil || got != "" { + t.Fatalf("empty optional file got %q err=%v", got, err) + } + if got, err := readOptionalOperatorFile(path); err != nil || got != "secret\n" { + t.Fatalf("optional file got %q err=%v", got, err) + } +} + +func TestSmallCLIValueHelpers(t *testing.T) { + if valueOrDefault(-1, 10) != 10 || valueOrDefault(0, 10) != 0 { + t.Fatal("unexpected integer default behavior") + } + if valueOrDefaultString(" ", "fallback") != "fallback" || valueOrDefaultString("value", "fallback") != "value" { + t.Fatal("unexpected string default behavior") + } + if nullableCLITime(time.Time{}) != nil { + t.Fatal("zero CLI time should encode as null") + } + now := time.Unix(1, 0).UTC() + if nullableCLITime(now) != now { + t.Fatal("non-zero CLI time should be preserved") + } +} + func TestReadMTLSFilesRequiresBothFiles(t *testing.T) { if _, _, err := readMTLSFiles("client.crt", ""); err == nil { t.Fatal("expected mTLS file pair validation") @@ -247,3 +957,91 @@ func TestRunDoctorProductionReturnsNonZeroOnBlockers(t *testing.T) { t.Fatalf("doctor output leaked database password: %s", body) } } + +func TestPilotDoctorNoNetworkSkipsConnectivityAndRedactsValues(t *testing.T) { + env := map[string]string{ + "WEBHOOKERY_ENVIRONMENT": "production", + "WEBHOOKERY_DATABASE_URL": "postgres://webhookery:secret-db-password@db/webhookery?sslmode=require", + "WEBHOOKERY_SECRET_BOX_MODE": "local", + "WEBHOOKERY_MASTER_KEY_BASE64": "MTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTI=", + "WEBHOOKERY_RAW_STORAGE_MODE": "postgres", + "WEBHOOKERY_PILOT_RECEIVER_CHECK_URL": "https://receiver.example.test/webhook?token=secret", + "WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK": "true", + "WEBHOOKERY_STRIPE_WEBHOOK_SECRET": "stripe-secret-marker", + "WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY": "object-secret-password", + "WEBHOOKERY_BOOTSTRAP_API_KEY_HASH": "sha256:bootstrap-secret-hash", + "WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX": "live", + "WEBHOOKERY_PROVIDER_PROOF_MANIFEST_PATH": "docs/provider-proof-manifest.json", + "WEBHOOKERY_PROVIDER_CONFORMANCE_MANIFEST": "docs/provider-conformance.manifest.json", + } + calledDB := false + calledReceiver := false + findings := pilotDoctorFindings(func(name string) string { return env[name] }, pilotDoctorOptions{ + Network: false, + DBCheck: func(_ context.Context, _ string, _ time.Duration) (pilotDatabaseStatus, error) { + calledDB = true + return pilotDatabaseStatus{}, nil + }, + ReceiverCheck: func(_ context.Context, _ string, _ time.Duration) error { + calledReceiver = true + return nil + }, + }) + if calledDB || calledReceiver { + t.Fatalf("no-network pilot doctor called network checks: db=%t receiver=%t", calledDB, calledReceiver) + } + var out bytes.Buffer + writeDoctorFindings(&out, findings) + body := out.String() + for _, want := range []string{"warning: database-connectivity", "warning: receiver-connectivity"} { + if !strings.Contains(body, want) { + t.Fatalf("expected %q in pilot doctor output:\n%s", want, body) + } + } + for _, forbidden := range []string{"secret-db-password", "token=secret", "stripe-secret-marker", "object-secret-password", env["WEBHOOKERY_DATABASE_URL"]} { + if strings.Contains(body, forbidden) { + t.Fatalf("pilot doctor output leaked sensitive value %q in %s", forbidden, body) + } + } +} + +func TestPilotDoctorReportsDatabaseReadiness(t *testing.T) { + env := map[string]string{ + "WEBHOOKERY_ENVIRONMENT": "production", + "WEBHOOKERY_DATABASE_URL": "postgres://webhookery@db/webhookery?sslmode=require", + "WEBHOOKERY_SECRET_BOX_MODE": "vault-transit", + "WEBHOOKERY_VAULT_ADDR": "https://vault.internal", + "WEBHOOKERY_VAULT_TOKEN": "vault-token", + "WEBHOOKERY_VAULT_TRANSIT_KEY": "webhookery", + "WEBHOOKERY_RAW_STORAGE_MODE": "postgres", + "WEBHOOKERY_BOOTSTRAP_API_KEY_HASH": "", + "WEBHOOKERY_PROVIDER_PROOF_MANIFEST_PATH": "docs/provider-proof-manifest.json", + } + findings := pilotDoctorFindings(func(name string) string { return env[name] }, pilotDoctorOptions{ + Network: true, + DBCheck: func(_ context.Context, databaseURL string, _ time.Duration) (pilotDatabaseStatus, error) { + if databaseURL != env["WEBHOOKERY_DATABASE_URL"] { + t.Fatalf("unexpected database url %q", databaseURL) + } + return pilotDatabaseStatus{ + AppliedMigrations: 3, + ExpectedMigrations: 3, + PendingOutbox: 0, + InProgressOutbox: 0, + RetentionPolicies: 1, + AuditChainEntries: 4, + }, nil + }, + }) + if blockers := countDoctorBlockers(findings); blockers != 0 { + t.Fatalf("expected no pilot doctor blockers, got %d: %+v", blockers, findings) + } + var out bytes.Buffer + writeDoctorFindings(&out, findings) + body := out.String() + for _, want := range []string{"ok: database-connectivity", "ok: migrations", "ok: queue", "ok: retention", "ok: audit-chain"} { + if !strings.Contains(body, want) { + t.Fatalf("expected %q in pilot doctor output:\n%s", want, body) + } + } +} diff --git a/cmd/whcp/runtime.go b/cmd/whcp/runtime.go new file mode 100644 index 0000000..5188a9e --- /dev/null +++ b/cmd/whcp/runtime.go @@ -0,0 +1,340 @@ +package main + +import ( + "context" + "crypto/sha256" + "crypto/tls" + "crypto/x509" + "encoding/hex" + "errors" + "flag" + "fmt" + "log/slog" + "net/http" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "webhookery/internal/adapters/crypto" + "webhookery/internal/adapters/deliveryhttp" + "webhookery/internal/adapters/httpapi" + "webhookery/internal/adapters/objectstore" + "webhookery/internal/adapters/postgres" + "webhookery/internal/adapters/signalhttp" + apppkg "webhookery/internal/app" + "webhookery/internal/authz" + "webhookery/internal/config" + "webhookery/internal/domain" + "webhookery/internal/ssrf" + "webhookery/internal/worker" + + "github.com/aws/aws-sdk-go-v2/aws" + awsconfig "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/kms" +) + +func runAPI() error { + ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer stop() + cfg, err := config.Load() + if err != nil { + return err + } + store, err := openStore(ctx, cfg) + if err != nil { + return err + } + defer store.Close() + + openAPI, err := os.ReadFile("openapi.yaml") + if err != nil { + return err + } + server := httpapi.NewServer(httpapi.ServerConfig{ + Control: apppkg.NewControlServiceWithRuntimeConfig(store, ssrf.Validator{}, opsRuntimeConfig(cfg)), + Ingest: apppkg.NewIngestService(store, apppkg.SystemClock{}), + Auth: runtimeAuth(cfg, store), + SessionAuth: apppkg.SessionAuthenticator{Lookup: store}, + ProducerAuth: apppkg.ProducerTokenAuthenticator{Lookup: store}, + ProducerMTLSAuth: apppkg.ProducerMTLSAuthenticator{Lookup: store}, + OpenAPI: openAPI, + EnableUI: cfg.EnableUI, + SessionCookieSecure: cfg.Environment == "production", + TrustedProxyCIDRs: cfg.TrustedProxyCIDRs, + Health: store.Health, + }) + tlsConfig, err := serverTLSConfig(cfg) + if err != nil { + return err + } + httpServer := &http.Server{Addr: cfg.HTTPAddr, Handler: server.Routes(), ReadHeaderTimeout: 5 * time.Second, MaxHeaderBytes: 64 << 10, TLSConfig: tlsConfig} + errCh := make(chan error, 1) + go func() { + slog.Info("starting api", "addr", cfg.HTTPAddr) + if cfg.TLSCertFile != "" { + errCh <- httpServer.ListenAndServeTLS(cfg.TLSCertFile, cfg.TLSKeyFile) + return + } + errCh <- httpServer.ListenAndServe() + }() + select { + case <-ctx.Done(): + shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + return httpServer.Shutdown(shutdownCtx) + case err := <-errCh: + if errors.Is(err, http.ErrServerClosed) { + return nil + } + return err + } +} + +func runMigrate(args []string) error { + fs := flag.NewFlagSet("migrate", flag.ContinueOnError) + dir := fs.String("dir", "migrations", "migration directory") + limit := fs.Int("limit", 100, "maximum audit-chain events to backfill") + workerID := fs.String("worker-id", "whcp-migrate", "worker id for operational leases") + if err := fs.Parse(args); err != nil { + return err + } + if fs.NArg() != 1 { + return fmt.Errorf("usage: whcp migrate [--dir migrations] [--limit 100] [--worker-id whcp-migrate] ") + } + cfg, err := config.Load() + if err != nil { + return err + } + switch fs.Arg(0) { + case "up": + return postgres.MigrateUp(context.Background(), cfg.DatabaseURL, *dir) + case "audit-chain-backfill": + store, err := openStore(context.Background(), cfg) + if err != nil { + return err + } + defer store.Close() + result, err := store.BackfillAuditChain(context.Background(), *workerID, *limit) + if err != nil { + return err + } + fmt.Fprintf(os.Stdout, "audit_chain_backfill lease_acquired=%t tenants_scanned=%d events_backfilled=%d more=%t\n", result.LeaseAcquired, result.TenantsScanned, result.EventsBackfilled, result.More) + return nil + default: + return fmt.Errorf("usage: whcp migrate [--dir migrations] [--limit 100] [--worker-id whcp-migrate] ") + } +} + +func runWorker(args []string) error { + fs := flag.NewFlagSet("worker", flag.ContinueOnError) + once := fs.Bool("once", false, "run one polling iteration") + interval := fs.Duration("interval", 2*time.Second, "poll interval") + if err := fs.Parse(args); err != nil { + return err + } + ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer stop() + cfg, err := config.Load() + if err != nil { + return err + } + store, err := openStore(ctx, cfg) + if err != nil { + return err + } + defer store.Close() + fanout := apppkg.NewDeliveryFanoutService(store, apppkg.SystemClock{}) + reconciliation := apppkg.NewReconciliationService(store, nil) + processor := apppkg.NewOutboxProcessorService(fanout, reconciliation) + egressResolver := ssrf.NetResolver{} + egressValidator := ssrf.Validator{Resolver: egressResolver} + w := worker.Worker{ + Store: store, + Processor: processor, + DeliveryStore: store, + DeliveryClient: deliveryAdapter{client: deliveryhttp.Client{HTTP: deliveryhttp.HTTPClient(10*time.Second, egressResolver), SSRF: egressValidator}}, + NotificationDeliveryStore: store, + NotificationClient: signalAdapter{client: signalhttp.Client{HTTP: signalhttp.HTTPClient(10*time.Second, egressResolver), SSRF: egressValidator}}, + SIEMDeliveryStore: store, + SIEMClient: signalAdapter{client: signalhttp.Client{HTTP: signalhttp.HTTPClient(10*time.Second, egressResolver), SSRF: egressValidator}}, + RetentionStore: store, + MetricsStore: store, + AlertStore: store, + AuditChainBackfillStore: store, + WorkerID: "worker-" + time.Now().UTC().Format("20060102150405"), + Limit: 10, + } + if *once { + return w.RunOnce(ctx) + } + ticker := time.NewTicker(*interval) + defer ticker.Stop() + for { + if err := w.RunOnce(ctx); err != nil { + slog.Error("worker iteration failed", "error", err) + } + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + } + } +} + +func openStore(ctx context.Context, cfg config.Config) (*postgres.Store, error) { + box, err := secretBoxFromConfig(ctx, cfg) + if err != nil { + return nil, err + } + opts := postgres.StoreOptions{RawStorageMode: cfg.RawStorageMode} + if cfg.RawStorageMode == domain.RawStorageS3 { + store, err := objectstore.NewS3Store(objectstore.S3Config{ + Endpoint: cfg.ObjectStorageEndpoint, + AccessKey: cfg.ObjectStorageAccessKey, + SecretKey: cfg.ObjectStorageSecretKey, + Bucket: cfg.ObjectStorageBucket, + Region: cfg.ObjectStorageRegion, + UseSSL: cfg.ObjectStorageUseSSL, + }) + if err != nil { + return nil, err + } + opts.ObjectStore = store + opts.ObjectBucket = store.Bucket() + } + return postgres.NewWithOptions(ctx, cfg.DatabaseURL, box, opts) +} + +func secretBoxFromConfig(ctx context.Context, cfg config.Config) (postgres.SecretBox, error) { + switch cfg.SecretBoxMode { + case "", "local": + return crypto.NewEnvelope(cfg.MasterKeyBase64) + case "vault-transit": + return crypto.NewVaultTransitEnvelope(crypto.VaultTransitConfig{ + Address: cfg.VaultAddr, + Token: cfg.VaultToken, + KeyName: cfg.VaultTransitKey, + }) + case "aws-kms": + awsCfg, err := awsconfig.LoadDefaultConfig(ctx, awsconfig.WithRegion(cfg.AWSRegion)) + if err != nil { + return nil, fmt.Errorf("load aws config: %w", err) + } + client := kms.NewFromConfig(awsCfg, func(opts *kms.Options) { + if strings.TrimSpace(cfg.AWSKMSEndpoint) != "" { + opts.BaseEndpoint = aws.String(strings.TrimSpace(cfg.AWSKMSEndpoint)) + } + }) + return crypto.NewAWSKMSEnvelope(crypto.AWSKMSEnvelopeConfig{ + KeyID: cfg.AWSKMSKeyID, + Client: client, + }) + default: + return nil, fmt.Errorf("unsupported secret box mode %q", cfg.SecretBoxMode) + } +} + +func serverTLSConfig(cfg config.Config) (*tls.Config, error) { + if cfg.TLSCertFile == "" && cfg.ProducerMTLSClientCAFile == "" { + return nil, nil + } + tlsConfig := &tls.Config{MinVersion: tls.VersionTLS12} + if cfg.ProducerMTLSClientCAFile != "" { + body, err := readSmallFile(cfg.ProducerMTLSClientCAFile, 1<<20) + if err != nil { + return nil, fmt.Errorf("read producer mTLS client CA file: %w", err) + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(body) { + return nil, fmt.Errorf("producer mTLS client CA file did not contain certificates") + } + tlsConfig.ClientCAs = pool + tlsConfig.ClientAuth = tls.VerifyClientCertIfGiven + } + return tlsConfig, nil +} + +func opsRuntimeConfig(cfg config.Config) domain.OpsConfig { + return domain.OpsConfig{ + Environment: cfg.Environment, + UIEnabled: cfg.EnableUI, + RawStorageMode: cfg.RawStorageMode, + ObjectStorageConfigured: cfg.RawStorageMode == domain.RawStorageS3, + SecretBoxMode: cfg.SecretBoxMode, + KeyCustodyConfigured: cfg.SecretBoxMode != "", + KeyCustodyKeyRef: keyCustodyKeyRef(cfg), + MaxIngressBodyBytes: 2 << 20, + MaxHeaderBytes: 64 << 10, + MaxHeaderPairs: 128, + MaxHeaderValueBytes: 8 << 10, + } +} + +func keyCustodyKeyRef(cfg config.Config) string { + if cfg.SecretBoxMode != "aws-kms" || strings.TrimSpace(cfg.AWSKMSKeyID) == "" { + return "" + } + sum := sha256.Sum256([]byte(strings.TrimSpace(cfg.AWSKMSKeyID))) + return "sha256:" + hex.EncodeToString(sum[:])[:12] +} + +func runtimeAuth(cfg config.Config, lookup apppkg.APIKeyLookup) apppkg.Authenticator { + authenticators := []apppkg.Authenticator{apppkg.APIKeyAuthenticator{Lookup: lookup}} + if cfg.BootstrapAPIKeyHash != "" { + authenticators = append(authenticators, apppkg.StaticAuthenticator{ + Hash: cfg.BootstrapAPIKeyHash, + Actor: authz.Actor{ + ID: "bootstrap", + TenantID: cfg.BootstrapTenantID, + Role: authz.RoleOwner, + Scopes: []string{"*"}, + }, + }) + } + return apppkg.MultiAuthenticator{Authenticators: authenticators} +} + +func readMTLSFiles(certPath, keyPath string) (string, string, error) { + if strings.TrimSpace(certPath) == "" || strings.TrimSpace(keyPath) == "" { + return "", "", fmt.Errorf("mtls-client-cert-file and mtls-client-key-file are required together") + } + cert, err := readSmallFile(certPath, 64<<10) + if err != nil { + return "", "", fmt.Errorf("read mTLS client certificate: %w", err) + } + key, err := readSmallFile(keyPath, 64<<10) + if err != nil { + return "", "", fmt.Errorf("read mTLS client key: %w", err) + } + return string(cert), string(key), nil +} + +func readSmallFile(path string, max int64) ([]byte, error) { + path = strings.TrimSpace(path) + if path == "" || strings.ContainsRune(path, 0) { + return nil, fmt.Errorf("invalid file path") + } + info, err := os.Lstat(path) // #nosec G703 -- explicit local operator PEM path; symlinks, directories, and size are checked before use. + if err != nil { + return nil, err + } + if info.IsDir() { + return nil, fmt.Errorf("path is a directory") + } + if info.Mode()&os.ModeSymlink != 0 { + return nil, fmt.Errorf("path must not be a symlink") + } + if info.Size() > max { + return nil, fmt.Errorf("file exceeds %d bytes", max) + } + body, err := os.ReadFile(path) // #nosec G304,G703 -- explicit local operator PEM path; no shell execution and bounded to small PEM files. + if err != nil { + return nil, err + } + if int64(len(body)) > max { + return nil, fmt.Errorf("file exceeds %d bytes", max) + } + return body, nil +} diff --git a/collections/README.md b/collections/README.md new file mode 100644 index 0000000..08b5d1c --- /dev/null +++ b/collections/README.md @@ -0,0 +1,86 @@ +# Request Collection Smoke Paths + +Webhookery includes small Postman and Bruno collections for local smoke checks. +They are not full API coverage and they do not replace `make docs-check`, +`make rc-check`, or OpenAPI contract tests. + +## Files + +| Collection | Path | +|------------|------| +| Postman | `collections/postman/webhookery.postman_collection.json` | +| Bruno | `collections/bruno/Webhookery/` | + +Run the static collection shape check with: + +```bash +make collections-check +``` + +That check verifies committed files and key routes. It does not send live HTTP +requests. + +## Local Variables + +Set these variables before sending requests: + +| Variable | Example | Notes | +|----------|---------|-------| +| `base_url` | `http://localhost:8080` | Local API URL. | +| `api_key` | `dev-bootstrap-key` | Local bootstrap key only. Replace with a database-backed API key outside local development. | +| `tenant_id` | `ten_dev` | Must match the local tenant. | +| `source_id` | `src_...` | Replace with a real generic HMAC source ID before sending ingest smoke requests. | + +The generic ingest request uses this placeholder header: + +```text +Webhook-Signature: sha256=replace-with-hmac-sha256-hex +``` + +Replace it with an HMAC-SHA256 hex digest over the exact raw request body using +the source verification secret. Do not put a real source secret into the +collection file. Keep it in the collection runner's local variable store or a +secret manager. + +## Smoke Requests + +| Request | Expected response | What it proves | +|---------|-------------------|----------------| +| Readiness | `200` from `/readyz` | API process can reach required dependencies. | +| List Events | `200` JSON page from `/v1/events` | Bearer auth works and the tenant can read event metadata. | +| Ingest Generic Event | `200` JSON with `received: true` when `source_id` exists and the HMAC is valid. Placeholder signatures should fail. | Durable provider-style capture path works for a signed generic HMAC source. | +| Audit Chain Head | `200` JSON from `/v1/audit-chain/head` | Audit-chain metadata is readable with the configured API key. | +| Verify Audit Chain | `200` JSON verification result from `/v1/audit-chain:verify` | Audit-chain verification endpoint is reachable and returns current chain status. | + +Inbound success proves durable capture and verification metadata for that +request. It does not prove downstream business processing succeeded. + +## Postman + +1. Import `collections/postman/webhookery.postman_collection.json`. +2. Select or create an environment with the variables above. +3. Start Webhookery locally: + + ```bash + cp .env.example .env + docker compose up --build + ``` + +4. Run `Readiness`, then authenticated read requests. +5. Before running `Ingest Generic Event`, replace `source_id` and + `Webhook-Signature` with values for a local generic HMAC source. + +## Bruno + +1. Open `collections/bruno/Webhookery/` in Bruno. +2. Use the committed `local` environment as a starting point. +3. Replace `source_id` and the ingest `Webhook-Signature` header before sending + the generic ingest request. +4. Run requests in sequence: readiness, list events, optional signed ingest, + audit-chain head, audit-chain verify. + +## Safety + +Do not commit modified collections containing real API keys, source secrets, +provider credentials, raw payload bodies, customer data, or generated evidence. +Placeholder signatures are intentional in committed collection files. diff --git a/collections/bruno/Webhookery/ingest-generic.bru b/collections/bruno/Webhookery/ingest-generic.bru index 10289ec..2f1c5a5 100644 --- a/collections/bruno/Webhookery/ingest-generic.bru +++ b/collections/bruno/Webhookery/ingest-generic.bru @@ -12,7 +12,7 @@ post { headers { Content-Type: application/json - X-Webhookery-Signature: t=replace,v1=replace + Webhook-Signature: sha256=replace-with-hmac-sha256-hex } body:json { diff --git a/collections/postman/webhookery.postman_collection.json b/collections/postman/webhookery.postman_collection.json index 66f2260..02aa359 100644 --- a/collections/postman/webhookery.postman_collection.json +++ b/collections/postman/webhookery.postman_collection.json @@ -34,7 +34,7 @@ "method": "POST", "header": [ {"key": "Content-Type", "value": "application/json"}, - {"key": "X-Webhookery-Signature", "value": "t=replace,v1=replace"} + {"key": "Webhook-Signature", "value": "sha256=replace-with-hmac-sha256-hex"} ], "body": { "mode": "raw", diff --git a/deploy/helm/webhookery/README.md b/deploy/helm/webhookery/README.md index fd71214..a075cc6 100644 --- a/deploy/helm/webhookery/README.md +++ b/deploy/helm/webhookery/README.md @@ -1,16 +1,83 @@ # Webhookery Helm Chart -This chart deploys the API, worker, scheduler, and optional migration job. It -does not deploy PostgreSQL or object storage; provide those as managed or -separately operated dependencies. +This chart deploys the API, worker, scheduler, Service, ConfigMap, Secret +reference, and optional migration Job. Use `docs/deployment.md` for common +production posture, `docs/configuration.md` for environment variables, and +`docs/operations.md` for readiness, backup, restore, and incident procedures. -By default the chart expects an existing Secret named `webhookery-secrets`. -Create it through your normal cluster secret workflow with -`WEBHOOKERY_DATABASE_URL`, `WEBHOOKERY_MASTER_KEY_BASE64`, and any object-store -or bootstrap variables you use. `secret.create=true` is available for local -testing and should be fed by operator-owned values files, not committed values. +The chart does not deploy PostgreSQL, object storage, ingress, DNS, TLS +certificates, network policies, service monitors, or an external secret manager. + +`values-production.example.yaml` is a hardened starting overlay with multiple +replicas, resource requests, S3-mode placeholders, and non-root security +contexts. It still expects operator-managed secrets and external dependencies. + +## Prerequisites + +- A Kubernetes cluster and Helm 3. +- PostgreSQL provisioned outside the chart. +- Optional S3-compatible object storage when `WEBHOOKERY_RAW_STORAGE_MODE=s3`. +- A real Kubernetes Secret containing database, master-key, object-store, and + bootstrap values required by your deployment. +- A pinned Webhookery image available to the cluster. + +## Secrets Boundary + +By default, the chart expects an existing Secret named `webhookery-secrets`. +Create or sync that Secret through your normal secret-management workflow. + +`secret.create=true` exists for local testing and controlled review +environments. Feed it with operator-owned values files that are not committed. +Do not put real database URLs, master keys, provider credentials, private keys, +raw signatures, raw payloads, or customer data in committed values. + +## Image Pinning + +The default image is the placeholder `webhookery:latest`. Override it for every +deployment: + +```bash +helm upgrade --install webhookery deploy/helm/webhookery \ + --namespace webhookery \ + --create-namespace \ + --set fullnameOverride=webhookery \ + --set secret.name=webhookery-secrets \ + --set image.repository=registry.example.com/webhookery \ + --set image.tag=2026.05.25 +``` + +Use an immutable release tag and an operator-owned image signing policy. If your +environment requires digest-only image references, update the chart values and +templates before relying on this chart for that deployment mode. + +## Validate ```bash helm lint deploy/helm/webhookery -helm template webhookery deploy/helm/webhookery --set secret.name=webhookery-secrets +helm template webhookery deploy/helm/webhookery \ + --set fullnameOverride=webhookery \ + --set secret.name=webhookery-secrets +helm upgrade --install webhookery deploy/helm/webhookery \ + --namespace webhookery \ + --create-namespace \ + --set fullnameOverride=webhookery \ + --set secret.name=webhookery-secrets \ + --dry-run ``` + +After install, wait for the migration Job and workload rollouts: + +```bash +kubectl -n webhookery wait --for=condition=complete job/webhookery-migrate --timeout=120s +kubectl -n webhookery rollout status deployment/webhookery-api +kubectl -n webhookery rollout status deployment/webhookery-worker +kubectl -n webhookery rollout status deployment/webhookery-scheduler +``` + +## Migration Job + +`migrate.enabled=true` renders a Job that runs +`migrate -dir migrations up` before the runtime workloads are considered ready. +Keep the migration image pinned to the same release as the API, worker, and +scheduler. Disable the Job only when another controlled process runs the same +migrations and records the release evidence. diff --git a/deploy/helm/webhookery/values-production.example.yaml b/deploy/helm/webhookery/values-production.example.yaml new file mode 100644 index 0000000..e83bca4 --- /dev/null +++ b/deploy/helm/webhookery/values-production.example.yaml @@ -0,0 +1,60 @@ +# Hardened example values for a self-hosted Webhookery release candidate. +# This file is an example overlay, not a managed production dependency stack. +# Provide PostgreSQL, object storage, TLS, ingress, secret manager, and backup +# procedures outside this chart. + +replicaCount: + api: 2 + worker: 2 + scheduler: 1 + +image: + repository: ghcr.io/aatuh/webhookery + tag: v0.0.0 + pullPolicy: IfNotPresent + +service: + type: ClusterIP + port: 8080 + +config: + environment: production + httpAddr: ":8080" + logLevel: info + enableUI: "false" + rawStorageMode: s3 + objectStorageEndpoint: "s3.example.internal" + objectStorageBucket: "webhookery-raw" + objectStorageRegion: "us-east-1" + objectStorageUseSSL: "true" + bootstrapTenantID: ten_bootstrap + bootstrapAPIKeyPrefix: "" + +secret: + create: false + name: webhookery-secrets + +migrate: + enabled: true + +resources: + requests: + cpu: 250m + memory: 256Mi + limits: + cpu: "1" + memory: 1Gi + +podSecurityContext: + runAsNonRoot: true + runAsUser: 10001 + runAsGroup: 10001 + fsGroup: 10001 + seccompProfile: + type: RuntimeDefault + +containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md index 01e14e6..26a1a53 100644 --- a/deploy/kubernetes/README.md +++ b/deploy/kubernetes/README.md @@ -1,32 +1,72 @@ -# Kubernetes Deployment Profile +# Kubernetes Profile -This directory contains a minimal self-hosted Kubernetes profile for Webhookery. -It assumes PostgreSQL and any S3-compatible object storage are provisioned -outside the manifests. +This directory is the minimal raw-manifest deployment profile for Webhookery. +Use `docs/deployment.md` for common production posture, `docs/configuration.md` +for environment variables, and `docs/operations.md` for readiness, backup, +restore, and incident procedures. -Before applying, create real secrets from `secret.example.yaml`: +The profile deploys the API, worker, scheduler, migration Job, ConfigMap, and a +placeholder Secret shape. It does not install PostgreSQL, object storage, +ingress, DNS, TLS certificates, network policies, service monitors, or an +external secret manager. + +`networkpolicy.example.yaml` is a starting point for ingress and egress +restriction. Review namespace selectors, CNI behavior, external database/object +storage access, and metadata-address blocking before applying it. + +## Prerequisites + +- A Kubernetes cluster with access to the Webhookery image registry. +- PostgreSQL provisioned outside these manifests. +- Optional S3-compatible object storage when `WEBHOOKERY_RAW_STORAGE_MODE=s3`. +- A real Kubernetes Secret named `webhookery-secrets`. +- TLS or ingress handled by an operator-owned profile layer. + +## Secrets Boundary + +`secret.example.yaml` documents the required key names with placeholders only. +Do not apply it unchanged to a shared or production cluster, and do not commit +real database URLs, master keys, provider credentials, raw signatures, raw +payloads, or customer data. + +Create the Secret through the cluster's normal secret-management workflow. For a +throwaway cluster, the equivalent shape is: ```bash kubectl apply -f deploy/kubernetes/namespace.yaml kubectl -n webhookery create secret generic webhookery-secrets \ - --from-literal=WEBHOOKERY_DATABASE_URL='postgres://...' \ - --from-literal=WEBHOOKERY_MASTER_KEY_BASE64='...' \ + --from-literal=WEBHOOKERY_DATABASE_URL='postgres://webhookery:replace-me@postgres.example.internal:5432/webhookery?sslmode=require' \ + --from-literal=WEBHOOKERY_MASTER_KEY_BASE64='replace-with-32-byte-base64-key' \ --from-literal=WEBHOOKERY_BOOTSTRAP_TENANT_ID='ten_prod' \ - --from-literal=WEBHOOKERY_BOOTSTRAP_API_KEY_HASH='sha256:...' \ + --from-literal=WEBHOOKERY_BOOTSTRAP_API_KEY_HASH='sha256:replace-with-bootstrap-key-hash' \ --from-literal=WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX='prod-bootstrap' ``` -Then apply the profile: +## Image Pinning + +The checked-in manifests use `webhookery:latest` as a placeholder. Before +promotion, replace every workload image with an immutable, signed release image +through a deployment overlay or manifest patch. Keep API, worker, scheduler, and +migration Job images aligned for the same release. + +## Apply And Validate ```bash kubectl apply -k deploy/kubernetes kubectl -n webhookery wait --for=condition=complete job/webhookery-migrate --timeout=120s kubectl -n webhookery rollout status deployment/webhookery-api kubectl -n webhookery rollout status deployment/webhookery-worker +kubectl -n webhookery rollout status deployment/webhookery-scheduler +kubectl -n webhookery get pods,jobs,svc ``` -The checked-in manifests use `webhookery:latest` as a placeholder image. Pin a -specific signed image digest for production and manage secrets with your -cluster's normal secret-management system. The profile does not install -PostgreSQL, ingress, TLS certificates, network policies, service monitors, or -object storage. +The API readiness endpoint is `/readyz`. The profile exposes an internal +ClusterIP Service; publish it through an operator-owned ingress or gateway. + +## Migration Job + +`migrate-job.yaml` runs `migrate up` with the same ConfigMap and Secret as the +runtime workloads. It uses `restartPolicy: OnFailure` and `backoffLimit: 3`. +Treat a failed migration Job as a deployment blocker: inspect the Job logs, +preserve the failed database state for analysis, and use `docs/operations.md` +before retrying against important data. diff --git a/deploy/kubernetes/networkpolicy.example.yaml b/deploy/kubernetes/networkpolicy.example.yaml new file mode 100644 index 0000000..baebc10 --- /dev/null +++ b/deploy/kubernetes/networkpolicy.example.yaml @@ -0,0 +1,47 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: webhookery-restrict-default + namespace: webhookery +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: webhookery + policyTypes: + - Ingress + - Egress + ingress: + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: ingress-nginx + ports: + - protocol: TCP + port: 8080 + egress: + # PostgreSQL. Replace the namespace/pod selector or use a CNI-supported + # external egress policy for managed databases. + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: data + ports: + - protocol: TCP + port: 5432 + # DNS for endpoint and object-store resolution. Use your cluster DNS + # namespace and labels. + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + ports: + - protocol: UDP + port: 53 + - protocol: TCP + port: 53 + # HTTPS egress for object storage, Vault/AWS KMS, notification channels, + # SIEM sinks, and customer endpoints. Layer this with firewall/CNI rules + # that block private, link-local, metadata, multicast, and reserved ranges. + - ports: + - protocol: TCP + port: 443 diff --git a/deploy/observability/prometheus-rules.example.yaml b/deploy/observability/prometheus-rules.example.yaml new file mode 100644 index 0000000..89fe72e --- /dev/null +++ b/deploy/observability/prometheus-rules.example.yaml @@ -0,0 +1,65 @@ +groups: + - name: webhookery-core + rules: + - alert: WebhookeryOutboxOld + expr: webhookery_outbox_oldest_age_seconds > 300 + for: 10m + labels: + severity: warning + annotations: + summary: Webhookery outbox has old pending work + description: Oldest pending outbox row is older than 5 minutes. Check API/worker health and PostgreSQL. + + - alert: WebhookeryOutboxBacklog + expr: webhookery_outbox_pending > 1000 + for: 10m + labels: + severity: warning + annotations: + summary: Webhookery outbox backlog is high + description: Durable outbox backlog is above the local threshold. Check worker capacity and downstream failures. + + - alert: WebhookeryDeadLetterOpen + expr: webhookery_dead_letter_open > 0 + for: 15m + labels: + severity: warning + annotations: + summary: Webhookery has open DLQ entries + description: Open dead-letter entries require operator triage and explicit replay/release decisions. + + - alert: WebhookeryQuarantineOpen + expr: webhookery_quarantine_open > 0 + for: 15m + labels: + severity: warning + annotations: + summary: Webhookery has open quarantine entries + description: Open quarantine entries indicate rejected or unsafe provider/request evidence requiring review. + + - alert: WebhookeryAuditChainFailures + expr: webhookery_audit_chain_verification_failures > 0 or webhookery_audit_chain_unchained_events > 0 + for: 5m + labels: + severity: critical + annotations: + summary: Webhookery audit chain verification has failures + description: Audit-chain continuity is failing. Preserve state and run audit-chain verification before remediation. + + - alert: WebhookeryReconciliationFailures + expr: webhookery_reconciliation_items{outcome=~"failed|unrecoverable"} > 0 + for: 30m + labels: + severity: warning + annotations: + summary: Webhookery reconciliation has failed or unrecoverable items + description: Reconciliation found provider-side gaps or failed recovery work. Review job items and provider limitations. + + - alert: WebhookeryEndpointCircuitOpen + expr: webhookery_endpoint_circuit_open > 0 + for: 10m + labels: + severity: warning + annotations: + summary: Webhookery endpoint circuit is open + description: One or more endpoint delivery circuits are open. Check receiver status and delivery attempts. diff --git a/deploy/terraform/webhookery-helm/README.md b/deploy/terraform/webhookery-helm/README.md index c9efcc2..a77dfac 100644 --- a/deploy/terraform/webhookery-helm/README.md +++ b/deploy/terraform/webhookery-helm/README.md @@ -1,15 +1,40 @@ # Webhookery Helm Terraform Module -This module installs the local Webhookery Helm chart. It is a deployment -wrapper only: it does not create PostgreSQL, object storage, ingress, DNS, TLS -certificates, or Kubernetes Secrets. +This module installs the local Webhookery Helm chart through `helm_release`. +Use `docs/deployment.md` for common production posture, `docs/configuration.md` +for environment variables, and `docs/operations.md` for readiness, backup, +restore, and incident procedures. -Create the Secret through your normal secret-management workflow before -applying this module. At minimum the Secret should contain -`WEBHOOKERY_DATABASE_URL` and `WEBHOOKERY_MASTER_KEY_BASE64`; include object -storage and bootstrap values only when you use those features. Secret values are -intentionally not accepted as module variables because Terraform state is not a -safe place for long-lived database URLs, master keys, or object-store keys. +The module is a deployment wrapper only. It does not create PostgreSQL, object +storage, ingress, DNS, TLS certificates, Kubernetes Secrets, network policies, +or external secret-manager resources. + +## Prerequisites + +- Terraform with the Helm provider. +- Cluster credentials for the target Kubernetes cluster. +- PostgreSQL provisioned outside this module. +- Optional S3-compatible object storage when `raw_storage_mode = "s3"`. +- A real Kubernetes Secret already present in the target namespace. +- A pinned Webhookery image available to the cluster. + +## Secrets Boundary + +Create the Secret through your normal secret-management workflow before applying +this module. At minimum, it should contain `WEBHOOKERY_DATABASE_URL` and +`WEBHOOKERY_MASTER_KEY_BASE64`; include object-storage and bootstrap values only +when you use those features. + +Secret values are intentionally not accepted as module variables because +Terraform state is not a safe place for long-lived database URLs, master keys, +provider credentials, object-store keys, raw signatures, raw payloads, or +customer data. + +## Image Pinning + +Do not leave the defaults `image_repository = "webhookery"` and +`image_tag = "latest"` in promoted environments. Set the repository and an +immutable release tag explicitly: ```hcl module "webhookery" { @@ -24,10 +49,25 @@ module "webhookery" { } ``` -Validate locally: +Use an operator-owned image signing policy. If your environment requires +digest-only image references, update the Helm chart and this module before +relying on them for that deployment mode. + +## Validate ```bash terraform fmt -check -recursive deploy/terraform terraform -chdir=deploy/terraform/webhookery-helm init -backend=false terraform -chdir=deploy/terraform/webhookery-helm validate +terraform -chdir=deploy/terraform/webhookery-helm plan ``` + +Run `plan` only with a workspace that is allowed to read the target cluster +state. Do not provide secret values through Terraform variables. + +## Migration Job + +`migrate_enabled = true` is the default. The module forwards that setting to the +Helm chart, which renders a migration Job and waits for the Helm release. Leave +the Job enabled unless another controlled migration process runs the same +migrations, blocks rollout on failure, and records release evidence. diff --git a/docs/articles/exactly-once-webhooks.md b/docs/articles/exactly-once-webhooks.md new file mode 100644 index 0000000..60a030c --- /dev/null +++ b/docs/articles/exactly-once-webhooks.md @@ -0,0 +1,61 @@ +# Exactly-Once Webhooks Are The Wrong Goal + +Webhook systems should not promise exactly-once delivery. Networks retry, +providers send duplicates, receivers time out after doing work, and operators +eventually replay events during incidents. + +The production goal should be evidence, idempotency, and explicit recovery. + +## Better Promise + +Webhookery uses this narrower promise: + +> If Webhookery returns inbound success, the configured durable capture path has +> recorded evidence. Loss boundaries remain explicit, and recovery/replay +> actions are auditable. + +This is stronger than a vague "never lose a webhook" claim because it states +what the system can and cannot prove. + +## Failure Modes That Break Exactly-Once Claims + +- provider retries after network or timeout failures +- receiver completes work but returns a timeout +- duplicate provider event IDs +- manual provider redelivery +- operator replay +- route or transformation changes between original delivery and replay +- retention deleting raw bodies while preserving metadata and hashes +- provider-side gaps that can only be reconciled when provider APIs allow it + +## What To Build Instead + +Use: + +- durable capture before inbound success +- exact raw body preservation +- provider-specific signature verification +- idempotency keys and dedupe evidence +- delivery attempt history +- DLQ and replay with reason capture +- audit-chain verification +- provider reconciliation where possible +- receiver-side idempotency for business effects + +## Webhookery Evaluation + +Run: + +```bash +examples/webhook-evidence-demo/run.sh +make rc-check +``` + +Then review: + +- `docs/security-promise.md` +- `docs/provider-conformance.md` +- `docs/release-evidence-template.md` + +Webhookery does not claim exactly-once delivery. It is designed to make +duplicates, retries, replay, and loss boundaries visible. diff --git a/docs/articles/self-hosted-webhook-gateway-architecture.md b/docs/articles/self-hosted-webhook-gateway-architecture.md new file mode 100644 index 0000000..361adcb --- /dev/null +++ b/docs/articles/self-hosted-webhook-gateway-architecture.md @@ -0,0 +1,85 @@ +# Self-Hosted Webhook Gateway Architecture + +Webhookery is not a generic message queue with HTTP adapters. Its architecture +starts from a narrower promise: if Webhookery returns inbound success, the +event evidence has been durably captured. + +This article explains the core shape for evaluators and reviewers. Exact +behavior remains owned by code, `openapi.yaml`, migrations, and the operations +docs. + +## Architecture Goals + +- preserve exact provider request bytes and headers before trust +- verify provider signatures using provider-specific rules +- write durable receipt, event, dedupe, audit, and outbox evidence before + downstream work +- route and transform events through versioned configuration +- snapshot outbound delivery payload bytes and hashes +- retry and replay without mutating original evidence +- verify audit-chain continuity and export evidence bundles + +## Core Components + +| Component | Responsibility | +| --- | --- | +| API process | Receives provider and product events, handles management APIs, and serves OpenAPI/UI surfaces. | +| Worker process | Claims durable work, delivers payload snapshots, retries failures, updates DLQ, emits signal egress, and processes recovery jobs. | +| Scheduler process | Runs bounded recurring work such as rollups, retention, alerts, and SIEM cursor processing. | +| PostgreSQL | Source of truth for receipts, events, payload metadata, deliveries, attempts, audit chains, config versions, and operational state. | +| Object storage | Optional strict backend for raw payload bodies while PostgreSQL remains metadata authority. | +| Static/operator surfaces | CLI, minimal UI, docs, collections, and release evidence for inspection and control. | + +## Evidence Flow + +1. A provider request reaches an ingest route. +2. Webhookery reads the raw body once and captures raw headers. +3. Provider verification uses exact raw bytes and constant-time comparison. +4. The transaction writes durable receipt, event/quarantine evidence, payload + metadata, audit evidence, and outbox work as appropriate. +5. Routing creates delivery work from versioned route/subscription/config + evidence. +6. Transformation output is snapshotted as exact delivery payload bytes with + a hash. +7. Workers deliver that snapshot, sign those exact bytes, and record attempts. +8. Replay creates new work linked to original evidence. +9. Audit-chain verification and evidence exports let operators inspect the + lifecycle later. + +## PostgreSQL-First Reasoning + +PostgreSQL is the MVP authority because the product depends on transactions, +indexes, leases, tenant predicates, and restore drills. Queue or cache systems +can accelerate future deployments, but accepted work must not depend on a +volatile queue as the only source of truth. + +Object storage is optional and strict when enabled. In S3 mode, inbound success +requires both the object write and metadata commit. Raw payload retention may +delete bodies later, but hashes and metadata remain. + +## OpenAPI And SDK Boundary + +`openapi.yaml` is the canonical REST contract. `sdk/openapi.yaml` is a derived +copy, and `make sdk-check` verifies alignment. UI and CLI actions are clients of +the same control plane rather than separate authority paths. + +## Audit-Chain Verification + +Audit events are chained per tenant. The chain covers audit-event metadata and +retention/tombstone continuity. It does not turn Webhookery into a compliance +certificate or external timestamping service. Payload integrity remains covered +by payload hashes, raw-body hashes, and export file hashes. + +## What This Architecture Does Not Claim + +- exactly-once delivery +- provider-side event completeness +- downstream business success +- universal recovery after provider-side loss +- compliance certification +- external timestamping +- multi-region active-active consistency + +Use `docs/evaluator-quickstart.md`, `docs/provider-conformance.md`, and +`docs/release-evidence-template.md` to inspect the architecture through local +fake-provider evidence. diff --git a/docs/articles/webhook-failure-modes.md b/docs/articles/webhook-failure-modes.md new file mode 100644 index 0000000..302f7d2 --- /dev/null +++ b/docs/articles/webhook-failure-modes.md @@ -0,0 +1,58 @@ +# Webhook Failure Modes + +Webhook reliability work starts with admitting where failure can happen. +Webhookery is designed to preserve evidence after receipt, not to claim control +over every provider or network boundary. + +## Common Failure Modes + +| Failure mode | Evidence-oriented response | +| --- | --- | +| Provider never sends the event | Reconciliation may detect gaps only where provider APIs allow it. | +| Webhook reaches Webhookery but storage is down | Do not return inbound success. | +| Signature is invalid | Store rejection/quarantine evidence where feasible and do not route by default. | +| Duplicate event arrives | Preserve duplicate evidence and use dedupe to suppress processing where configured. | +| Receiver times out | Record attempt evidence and retry according to policy. | +| Receiver succeeds after timeout | Receiver-side idempotency must handle duplicate business effects. | +| Retries exhaust | Move to DLQ with evidence and operator recovery path. | +| Operator replays event | Create new delivery work with reason and audit evidence. | +| Raw body is retained/deleted | Preserve metadata, hashes, receipts, and audit records. | + +## What Webhookery Controls + +- durable capture before inbound success +- provider verification evidence +- routing and delivery decisions +- retry and DLQ state +- replay authorization and reason capture +- retention metadata +- audit-chain verification +- evidence export contents + +## What Webhookery Does Not Control + +- provider-side event existence +- DNS or network failures before receipt +- downstream business processing +- customer receiver idempotency +- operator-managed backup quality +- external compliance certification + +## Operator Checks + +Use: + +```bash +make provider-conformance-check +make rc-check +make release-acceptance +``` + +For production-style evaluation with a disposable database: + +```bash +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +``` + +Review `docs/operations.md` and `docs/day-2-operations.md` before relying on a +self-hosted deployment. diff --git a/docs/articles/webhook-incident-report.md b/docs/articles/webhook-incident-report.md new file mode 100644 index 0000000..30b1b73 --- /dev/null +++ b/docs/articles/webhook-incident-report.md @@ -0,0 +1,60 @@ +# Building A Webhook Incident Report + +A useful webhook incident report should answer what happened, what was proven, +what could not be proven, and which recovery actions were taken. + +Webhookery is designed to make that report easier to assemble from durable +evidence. + +## Minimum Questions + +- Did Webhookery receive the event? +- Which tenant, source, provider, and event ID were involved? +- Was the provider signature valid? +- What raw payload hash was stored? +- Which route and route version matched? +- Which endpoint was targeted? +- Was the delivery payload snapshotted? +- Which attempts were made? +- What response status and truncated response body were recorded? +- Did the event enter DLQ or quarantine? +- Was replay requested, by whom, and for what reason? +- Did retention delete bodies while preserving metadata and hashes? +- Does audit-chain verification still pass? + +## Evidence Sources + +Use: + +- event detail and timeline APIs +- delivery attempts +- replay receipts +- DLQ and quarantine records +- retention run items +- audit events and audit-chain verification +- provider reconciliation jobs +- evidence exports +- release evidence for the deployed Webhookery version + +## Report Shape + +Recommended sections: + +1. Summary. +2. Timeline. +3. Impacted provider/source/endpoints. +4. Evidence that was durably captured. +5. Delivery and retry outcome. +6. Replay or reconciliation actions. +7. What could not be proven. +8. Follow-up actions. + +## Sensitive Data Boundary + +Incident reports must not include API keys, bearer tokens, webhook secrets, raw +signatures, private keys, provider credentials, raw customer payloads, customer +PII, or database URLs with passwords unless the report is stored in a controlled +private incident system with explicit approval. + +Public reports should use hashes, IDs, redacted metadata, and links to internal +evidence systems. diff --git a/docs/articles/webhook-security-review-checklist.md b/docs/articles/webhook-security-review-checklist.md new file mode 100644 index 0000000..29a380b --- /dev/null +++ b/docs/articles/webhook-security-review-checklist.md @@ -0,0 +1,91 @@ +# Webhook Security Review Checklist + +Use this checklist when reviewing a Webhookery deployment, pilot, or fork. +It is written for SaaS security reviewers and platform teams. It is not legal +advice and does not certify a deployment. + +Do not paste secrets, provider credentials, raw customer payloads, private keys, +bearer tokens, session cookies, raw signatures, or database URLs with passwords +into public issues, docs, or review packages. + +## Inbound Provider Boundary + +- Raw body is preserved before parsing or verification. +- Signature verification uses exact raw bytes. +- Timestamp or replay-window checks are enforced where provider semantics + support them. +- Invalid signatures do not route to side-effecting destinations by default. +- Durable capture succeeds before any inbound success response. +- Storage failure returns non-success and does not leak secrets or payloads. + +## Producer Boundary + +- Product-event producers authenticate with scoped API keys, producer OAuth + credentials, or verified producer mTLS identities. +- Producer credentials are tenant and source scoped where configured. +- Opaque tokens and client secrets are stored hashed or encrypted, not in + plaintext. +- Revoked or expired credentials cannot ingest events. + +## Tenant And Authorization Boundary + +- Every list, read, update, export, replay, retry, and admin path is tenant + scoped. +- Raw payload and transformed payload body reads require elevated permission + and audit evidence. +- Replay requires authorization, reason capture, and rate controls. +- Security-sensitive changes are audited and explainable. + +## Outbound Delivery Boundary + +- Endpoint URLs are validated on create/update and revalidated at delivery. +- Private, loopback, link-local, metadata, multicast, and reserved addresses + are blocked unless an explicit audited local/dev policy allows them. +- Redirects are not followed for delivery or signal egress. +- Delivery requests are signed over the exact payload bytes sent. +- Response bodies are truncated and redacted before storage. +- Receiver idempotency remains the receiver's responsibility. + +## Secrets And Privacy + +- Webhook secrets, endpoint signing secrets, API keys, OAuth client secrets, + SCIM tokens, session tokens, object-store credentials, database passwords, + and KMS details are redacted from logs, docs, UI, CLI output, and errors. +- Secret-box mode is configured deliberately for the environment. +- Key-custody mode and raw-storage mode are visible through redacted ops + status and `whcp doctor production`. +- Demo and release artifacts use synthetic data only. + +## Evidence And Retention + +- Raw body hashes and payload hashes remain after body retention. +- Audit-chain entries are not deleted by normal audit-event retention. +- Evidence exports include manifests and file hashes. +- Body-inclusive exports require explicit intent and elevated permission. +- Retention policies preserve metadata, hashes, receipts, deliveries, attempts, + and audit history. + +## Operations And Release Evidence + +- `make release-acceptance` passes. +- `make rc-check` passes locally, and DB-backed checks run with a disposable + `WEBHOOKERY_TEST_DATABASE_URL` before production use. +- `make finalize`, `make gosec`, and `make vuln` pass for the release commit. +- Docker image digest, SBOMs, Trivy results, and release evidence are attached + or linked from the release. +- Backup/restore drills have been rehearsed for the operator's deployment. +- Public metrics and dashboards do not include tenant labels or sensitive + payload data. + +## Review Exit Criteria + +A production-style review should end with one of: + +- approved for controlled self-hosted evaluation +- approved with accepted risks and owner/expiry/mitigation +- blocked until specific findings are fixed +- out of scope for Webhookery's current release-candidate maturity + +Use `docs/security-review-package.md`, +`docs/external-review-package.md`, and `docs/release-evidence-template.md` to +collect the supporting artifacts. diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 0000000..1865f24 --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,182 @@ +# Webhookery CLI Reference + +`whcp` is the operator and developer CLI for Webhookery processes, migrations, +control-plane APIs, evidence workflows, and release support. Exact behavior is +owned by `cmd/whcp`; this document is the human reference for current command +groups. + +Most API-backed commands accept: + +- `--base-url`, defaulting to `http://localhost:8080` +- `--api-key`, defaulting to `$WEBHOOKERY_API_KEY` + +When API calls fail, `whcp` surfaces the HTTP status, `stable_code` when the +server returns one, and `request_id` when present. The CLI must not include +bearer tokens, raw request bodies, webhook secrets, or payload bodies in those +errors. Stable codes are listed in `docs/error-codes.md`. + +Do not paste real API keys, provider credentials, webhook secrets, bearer +tokens, private keys, raw signatures, raw payload bodies, customer data, +database URLs with real credentials, or evidence bundles into examples, +terminals that are recorded, issues, or support artifacts. + +## Root Command Groups + +Running `go run ./cmd/whcp` prints the current root command list and exits +non-zero because a command is required. The groups are: + +| Group | Purpose | Typical scope | Elevated risk | +|-------|---------|---------------|---------------| +| `api`, `worker`, `scheduler`, `migrate` | Run processes and migrations. | Local environment access. | Migration and process lifecycle. | +| `admin`, `api-keys` | Bootstrap and database-backed API key lifecycle. | Owner or security-capable key. | Secret creation/revocation. | +| `producer-clients`, `producer-mtls-identities`, `key-custody` | Producer trust and key custody checks. | `security:write` for mutations. | Token/secret rotation and mTLS trust. | +| `doctor`, `ops` | Production preflight and operational visibility. | Local config or `ops:read`. | May expose operational posture; output must be redacted. | +| `identity-providers`, `scim-tokens`, `role-bindings`, `access-policies`, `authz` | Enterprise identity and authorization controls. | `security:write` for mutations. | High authorization impact. | +| `events`, `sources`, `provider-connections`, `adapters` | Event evidence, source config, provider credentials, and adapter governance. | `events:read`, `events:raw`, `sources:write`, or `security:write`. | Raw payloads, secrets, provider recovery. | +| `incidents` | Incident packets, attached event evidence, report snapshots, and incident evidence exports. | `incidents:read`, `incidents:write`, and `audit:read` for exports. | Evidence disclosure and support artifacts. | +| `endpoints`, `subscriptions`, `retry-policies`, `routes`, `transformations` | Outbound delivery configuration and reproducible payload shaping. | `routes:write` or related read scopes. | Delivery fanout and receiver impact. | +| `deliveries`, `replay-jobs`, `reconciliation-jobs`, `dead-letter`, `quarantine` | Delivery recovery, replay, provider reconciliation, DLQ, and quarantine decisions. | `deliveries:retry`, `replay:write`, or `security:write`. | Duplicate side effects and recovery claims. | +| `alerts`, `notification-channels`, `notification-deliveries` | Alert rules and signed notification egress. | `ops:read` or `ops:write`. | External egress and signing secrets. | +| `siem-sinks`, `siem-deliveries` | Signed audit metadata streaming. | `audit:read` for reads, `security:write` for mutations. | External egress and audit disclosure. | +| `audit`, `evidence`, `retention`, `schemas`, `signatures` | Audit evidence, local evidence-bundle inspection, retention, schema checks, and signature helpers. | `audit:read`, `events:raw`, `security:write`, `schemas:write`, or local file access. | Evidence export, retention, raw payload inclusion. | + +## Local And Validation + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Run unit tests | Local shell | `make test` | Go tests pass. | No | +| Run docs-adjacent gate | Local shell | `make docs-check` | OpenAPI, vectors, SDK, deployment, collections, and metadata checks pass. | No | +| Run full gate | Local shell | `make finalize` | Formatting, lint, vulnerability, gosec, unit, race, contract, SDK, and metadata checks pass. | No | +| Apply migrations | Database URL | `go run ./cmd/whcp migrate up` | PostgreSQL schema reaches latest migration. | Yes, schema mutation | +| Start API | Runtime env | `go run ./cmd/whcp api` | API listens on configured address. | Process lifecycle | +| Run production doctor | Local config | `go run ./cmd/whcp doctor production` | `blocker`, `warning`, and `ok` findings without secrets. | Config disclosure | +| Run pilot doctor | Local config, optional network | `go run ./cmd/whcp doctor pilot --no-network` | Pilot posture findings without contacting PostgreSQL or receivers. Remove `--no-network` only for explicit safe connectivity checks. | Config disclosure | +| Check key custody | Secret custody env | `go run ./cmd/whcp key-custody test` | Encrypt/decrypt smoke succeeds without plaintext or ciphertext output. | Secret custody | + +## Identity And Access + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Hash a local API key | Local shell | `go run ./cmd/whcp admin hash-key "$LOCAL_API_KEY"` | Prints a `sha256:` hash for bootstrap config. | Secret handling | +| Create API key | Owner or security-capable key | `go run ./cmd/whcp api-keys create --name local-operator --role owner --scopes '*' --api-key "$WEBHOOKERY_API_KEY"` | Returns a one-time API token and metadata. | Secret creation | +| Revoke API key | Owner or security-capable key | `go run ./cmd/whcp api-keys revoke --key-id key_... --reason "rotation" --api-key "$WEBHOOKERY_API_KEY"` | Key is revoked and cannot authenticate. | Access removal | +| Create identity provider | `security:write` | `go run ./cmd/whcp identity-providers create --name okta --issuer-url https://idp.example.com --client-id "$OIDC_CLIENT_ID" --client-secret "$OIDC_CLIENT_SECRET" --redirect-uri https://webhookery.example/v1/auth/oidc/callback --allowed-email-domains example.com --api-key "$WEBHOOKERY_API_KEY"` | OIDC provider metadata is created; secret is not returned. | Identity trust | +| Create SCIM token | `security:write` | `go run ./cmd/whcp scim-tokens create --name okta-scim --api-key "$WEBHOOKERY_API_KEY"` | Returns one-time token value and token metadata. | Secret creation | +| Bind role | `security:write` | `go run ./cmd/whcp role-bindings create --principal-type user --principal-id usr_... --role auditor --resource-family audit --environment production --reason "audit team access" --api-key "$WEBHOOKERY_API_KEY"` | Role binding is created and audited. | Authorization change | +| Add deny policy | `security:write` | `go run ./cmd/whcp access-policies create --name deny-prod-raw --action events:raw --effect deny --resource-family event --environment production --reason "limit raw payload exposure" --api-key "$WEBHOOKERY_API_KEY"` | Access policy is created and audited. | Authorization change | +| Explain authz | Relevant read access | `go run ./cmd/whcp authz explain --actor-id usr_... --action events:raw --resource-family event --resource-id evt_... --environment production --api-key "$WEBHOOKERY_API_KEY"` | Redacted decision with matched role/policy context. | No secrets expected | + +## Events, Sources, And Providers + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| List events | `events:read` | `go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY"` | Paginated event metadata. | No raw body | +| Search events | `events:read` | `go run ./cmd/whcp events search --provider stripe --external-id evt_... --verification invalid --status dlq --received-after 2026-06-04T10:00:00Z --api-key "$WEBHOOKERY_API_KEY"` | Tenant-scoped event metadata matching forensic filters. | No raw body | +| View timeline | `events:read` | `go run ./cmd/whcp events timeline --event-id evt_... --format table --api-key "$WEBHOOKERY_API_KEY"` | Versioned event timeline in `json`, `table`, or `markdown` format. | No raw body | +| Export raw payload | `events:raw` | `go run ./cmd/whcp events raw-export --event-id evt_... --reason "support case review" --output payload.bin --api-key "$WEBHOOKERY_API_KEY"` | Writes raw bytes to a private local file and records the reason on the raw-payload audit event. | Raw payload | +| Rotate source secret | `security:write` | `go run ./cmd/whcp sources rotate-secret --source-id src_... --secret "$NEXT_WEBHOOK_SECRET" --reason "scheduled rotation" --api-key "$WEBHOOKERY_API_KEY"` | New active secret version and bounded grace for prior version. | Secret rotation | +| Disable source | `sources:write` | `go run ./cmd/whcp sources update --source-id src_... --state disabled --reason "retire old webhook" --api-key "$WEBHOOKERY_API_KEY"` | Future ingress is rejected; historical evidence remains. | Ingress interruption | +| Create provider connection | `sources:write` | `go run ./cmd/whcp provider-connections create --name stripe-prod --provider stripe --credential "$PROVIDER_API_TOKEN" --config source_id=src_stripe --api-key "$WEBHOOKERY_API_KEY"` | Provider credential is encrypted and redacted metadata is returned. | Provider credential | +| Request adapter review | `security:write` | `go run ./cmd/whcp adapters transition --adapter-id pad_... --version-id adv_... --action request_review --reason "ready for security review" --api-key "$WEBHOOKERY_API_KEY"` | Adapter version moves through governance state. | Verification behavior | + +Raw payloads and provider API responses may contain PII or customer data. Keep +exports out of commits and public support artifacts. + +## Incidents And Reports + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Create incident | `incidents:write` | `go run ./cmd/whcp incidents create --title "Stripe payment webhook failed" --reason "support investigation" --api-key "$WEBHOOKERY_API_KEY"` | Incident metadata is created and audited. | Support artifact | +| Attach event | `incidents:write`, `events:read` | `go run ./cmd/whcp incidents add-event --incident-id inc_... --event-id evt_... --reason "failed downstream delivery" --api-key "$WEBHOOKERY_API_KEY"` | Event is linked to the incident in the same tenant. | Evidence grouping | +| Generate report | `incidents:write`, `events:read` | `go run ./cmd/whcp incidents generate-report --incident-id inc_... --reason "support handoff" --api-key "$WEBHOOKERY_API_KEY"` | JSON and Markdown report snapshot is generated and audited. | Evidence disclosure | +| Save report | `incidents:read` | `go run ./cmd/whcp incidents report --incident-id inc_... --format markdown --output incident-report.md --api-key "$WEBHOOKERY_API_KEY"` | Markdown report is written with private file permissions. | Support artifact | +| Export incident evidence | `incidents:write`, `events:read`, `audit:read` | `go run ./cmd/whcp incidents export --incident-id inc_... --reason "customer evidence package" --output incident-evidence.tar.gz --api-key "$WEBHOOKERY_API_KEY"` | Bundle includes `incident_report.json`, `incident_report.md`, timeline evidence, manifest, and hashes. | Evidence disclosure | + +Incident reports use event timelines and hashes. They do not include raw +payload bodies, webhook secrets, signatures, bearer tokens, or private keys by +default. Exported incident bundles should be handled like other evidence +bundles and kept out of commits and public support channels. + +## Routing, Delivery, And Replay + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Update endpoint URL | `routes:write` | `go run ./cmd/whcp endpoints update --endpoint-id end_... --url https://receiver.example/webhook --reason "move receiver" --api-key "$WEBHOOKERY_API_KEY"` | URL is SSRF-validated before commit. | Receiver egress | +| Rotate endpoint secret | `security:write` | `go run ./cmd/whcp endpoints rotate-secret --endpoint-id end_... --reason "scheduled rotation" --api-key "$WEBHOOKERY_API_KEY"` | New signing secret version is created; handle any rotation response as sensitive. | Secret rotation | +| Update route | `routes:write` | `go run ./cmd/whcp routes update --route-id rte_... --priority 10 --reason "prefer primary receiver" --api-key "$WEBHOOKERY_API_KEY"` | New route version is recorded. | Delivery fanout | +| Create retry policy | `routes:write` | `go run ./cmd/whcp retry-policies create --name standard --max-attempts 12 --max-duration-seconds 259200 --initial-delay-seconds 10 --max-delay-seconds 21600 --api-key "$WEBHOOKERY_API_KEY"` | Retry policy version is created. | Delivery volume | +| Retry delivery | `deliveries:retry` | `go run ./cmd/whcp deliveries retry --delivery-id del_... --reason "operator retry" --api-key "$WEBHOOKERY_API_KEY"` | New delivery attempt is scheduled. | Duplicate side effects | +| Preview replay | `replay:write` | `go run ./cmd/whcp replay-jobs preview --event-id evt_... --config-mode original --reason-code support_investigation --reason "inspect replay impact" --api-key "$WEBHOOKERY_API_KEY"` | Replay impact is returned without creating delivery jobs. | Duplicate side effect review | +| Create replay approval policy | `security:write` | `go run ./cmd/whcp replay-approval-policies create --scope-type source --scope-id src_... --default-expiry-seconds 86400 --reason "payment source replay requires approval" --api-key "$WEBHOOKERY_API_KEY"` | Matching future replays are created in pending approval before delivery work is enqueued. | Replay governance | +| Disable replay approval policy | `security:write` | `go run ./cmd/whcp replay-approval-policies disable --policy-id rap_... --reason "pilot route no longer sensitive" --api-key "$WEBHOOKERY_API_KEY"` | Future matching replays no longer require policy-driven approval. Existing pending jobs keep their recorded approval requirement. | Replay governance | +| Create replay job | `replay:write` | `go run ./cmd/whcp replay-jobs create --event-id evt_... --config-mode original --rate-limit-per-minute 60 --require-approval --approval-expires-at 2026-06-05T12:00:00Z --reason-code support_investigation --reason "customer replay request" --api-key "$WEBHOOKERY_API_KEY"` | Replay is scheduled or awaits approval with structured reason evidence. Pending approvals default to 24 hours when no expiry is supplied. | Duplicate side effects | +| Approve replay job | `replay:write` | `go run ./cmd/whcp replay-jobs approve --replay-job-id rpl_... --reason "approved replay window" --api-key "$WEBHOOKERY_API_KEY"` | Replay approval is audited and work can proceed when the approver is not the creator and the approval window is still open. | Duplicate side effects | +| Create reconciliation job | `replay:write` | `go run ./cmd/whcp reconciliation-jobs create --connection-id pcn_... --capture-missing --route-recovered --reason "recover missing provider events" --api-key "$WEBHOOKERY_API_KEY"` | Provider evidence job is created; recovered events route only when requested. | Recovery claims | +| Release DLQ entry | `deliveries:retry` | `go run ./cmd/whcp dead-letter release --entry-id dlq_... --reason-code receiver_fixed --reason "receiver recovered" --api-key "$WEBHOOKERY_API_KEY"` | Replay work is scheduled for the dead-letter entry with structured reason evidence. | Duplicate side effects | + +Replay and retry create new delivery work linked to existing evidence. They do +not mutate original event history. + +## Schemas And Transformations + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Create transformation | `routes:write` | `go run ./cmd/whcp transformations create --name redact-email --operations-file operations.json --api-key "$WEBHOOKERY_API_KEY"` | Transformation is created with immutable versions. | Payload shaping | +| Dry-run transformation | Local file | `go run ./cmd/whcp transformations dry-run --payload-file payload.json --operations-file operations.json` | Prints transformed result for local review. | PII in local files | +| Update event type | `schemas:write` | `go run ./cmd/whcp schemas event-type-update --name invoice.paid --description "Invoice paid events" --reason "clarify contract" --api-key "$WEBHOOKERY_API_KEY"` | Event type metadata changes and is audited. | Contract change | +| Validate payload | `schemas:read` | `go run ./cmd/whcp schemas validate --name invoice.paid --version 2026-05-01 --payload-file payload.json --api-key "$WEBHOOKERY_API_KEY"` | Validation result is returned. | PII in local files | +| Check compatibility | `schemas:read` | `go run ./cmd/whcp schemas check-compat --name invoice.paid --version 2026-05-01 --new-schema-file schema-next.json --api-key "$WEBHOOKERY_API_KEY"` | Compatibility result is returned. | Contract change | + +Payload and schema files can contain customer data or business-sensitive +contracts. Keep them out of commits unless deliberately sanitized. + +## Audit, Retention, And Evidence + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Export evidence | `audit:read`; add `events:raw` for payload bodies | `go run ./cmd/whcp audit export --include-timelines --include-payloads --reason "support case" --api-key "$WEBHOOKERY_API_KEY"` | Evidence bundle is created with manifest and hashes. | Raw payload inclusion | +| Download export | `audit:read`; add `events:raw` when export includes bodies | `go run ./cmd/whcp audit download --export-id exp_... --output evidence.tar.gz --api-key "$WEBHOOKERY_API_KEY"` | Bundle is written locally. | Evidence disclosure | +| Verify bundle locally | Local file | `go run ./cmd/whcp audit verify-bundle --file evidence.tar.gz` | Manifest schema, file hashes, and audit-chain proof verify. | Sensitive local file | +| View bundle locally | Local file | `go run ./cmd/whcp evidence view --file evidence.tar.gz` | Prints a JSON summary of manifest metadata, included files, timeline counts, audit-chain status, verification result, and redaction warnings without printing bundled file bodies. | Sensitive local file | +| Verify audit chain | `audit:read` | `go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY"` | Chain verification result is returned. | No raw body | +| Anchor audit chain | `security:write` | `go run ./cmd/whcp audit anchor --reason "daily anchor" --api-key "$WEBHOOKERY_API_KEY"` | Anchor is written after verification. | Evidence governance | +| Create retention policy | `security:write` | `go run ./cmd/whcp retention create --resource-type raw_payload --retention-days 30 --api-key "$WEBHOOKERY_API_KEY"` | Retention policy is created and audited. | Destructive retention | +| Place legal hold | `security:write` | `go run ./cmd/whcp retention update --policy-id ret_... --legal-hold --hold-reason "customer legal request" --api-key "$WEBHOOKERY_API_KEY"` | Policy is held and skipped by retention worker. | Legal/retention | + +Evidence exports with payloads are sensitive. Use scoped authorization, private +file permissions, and a recorded reason. +Support artifacts should omit raw payload bodies, webhook signatures, bearer +tokens, private keys, and customer data unless elevated raw-payload access and +a private handling path are explicitly approved. + +## Operations, Alerts, And Signal Egress + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Read metrics | `ops:read` | `go run ./cmd/whcp ops metrics --api-key "$WEBHOOKERY_API_KEY"` | Tenant-scoped operational metrics. | No secrets expected | +| Read queues | `ops:read` | `go run ./cmd/whcp ops queues --api-key "$WEBHOOKERY_API_KEY"` | Durable outbox and delivery queue status. | Operational posture | +| Create alert | `ops:write` | `go run ./cmd/whcp alerts create --name dlq-open --rule-type dead_letter_open --threshold 1 --reason "page on DLQ growth" --api-key "$WEBHOOKERY_API_KEY"` | Alert rule is created. | Paging behavior | +| Create notification channel | `ops:write` | `go run ./cmd/whcp notification-channels create --name ops-webhook --url https://ops.example/hooks/webhookery --signing-secret "$SIGNAL_SECRET" --api-key "$WEBHOOKERY_API_KEY"` | Signed alert egress channel is created. | External egress and secret | +| Retry notification | `ops:write` | `go run ./cmd/whcp notification-deliveries retry --delivery-id ndl_... --reason "receiver fixed" --api-key "$WEBHOOKERY_API_KEY"` | Notification delivery is rescheduled. | External egress | +| Create SIEM sink | `security:write` | `go run ./cmd/whcp siem-sinks create --name audit-stream --url https://siem.example/ingest --signing-secret "$SIEM_SIGNAL_SECRET" --api-key "$WEBHOOKERY_API_KEY"` | Signed SIEM sink is created. | Audit egress and secret | +| List SIEM failures | `audit:read` | `go run ./cmd/whcp siem-deliveries list --state failed --api-key "$WEBHOOKERY_API_KEY"` | Failed SIEM deliveries are listed. | Audit egress status | + +Signal and SIEM signing secrets are secrets. Use placeholders in docs and +managed secrets in deployments. + +## Backup, Restore, Release, And Collections + +| Task | Required scope | Example | Expected outcome | Elevated risk | +|------|----------------|---------|------------------|---------------| +| Back up PostgreSQL | Database URL | `scripts/backup_postgres.sh backups` | Timestamped dump is written with restrictive permissions. | Sensitive backup | +| Restore PostgreSQL | Database URL and confirmation | `WEBHOOKERY_RESTORE_CONFIRM=restore scripts/restore_postgres.sh backups/webhookery-20260525T000000Z.dump` | Target DB is restored with `pg_restore --clean --if-exists`. | Destructive restore | +| Start local MinIO profile | Local shell | `docker compose --profile object-storage up --build` | Compose starts object-storage services. | Local credentials only | +| Lint Helm profile | Local Helm | `helm lint deploy/helm/webhookery` | Chart lint passes. | No | +| Check Terraform formatting | Local Terraform | `terraform fmt -check -recursive deploy/terraform` | Terraform files are formatted. | No | +| Release evidence checks | Local shell | `make release-acceptance` | Release evidence metadata checks pass. | No live providers | +| RC checks | Local shell, optional test DB URLs | `make rc-check` | RC acceptance checks pass or skip live DB work when URLs are absent. | May run destructive restore drill when restore DB URL is set | +| Collection checks | Local shell | `make collections-check` | Postman and Bruno smoke files are present and shaped correctly. | No | + +Backups, restore targets, and evidence bundles can contain sensitive operational +data. Do not commit generated backup files, raw payload exports, or release +evidence containing real customer data. diff --git a/docs/commercial-evaluation.md b/docs/commercial-evaluation.md new file mode 100644 index 0000000..a3d5a61 --- /dev/null +++ b/docs/commercial-evaluation.md @@ -0,0 +1,103 @@ +# Commercial Evaluation + +Webhookery is publicly available under `AGPL-3.0-only`. Commercial license +exceptions and paid evaluation packages are available for organizations that +need proprietary use rights, deployment review, release evidence, or contracted +support. + +This page is business guidance, not legal advice. Have counsel review AGPL and +any commercial agreement before relying on it. + +## Evaluation Path + +1. Run `docs/evaluator-quickstart.md`. +2. Review `docs/pilot-topology.md`, `docs/security-promise.md`, + `docs/provider-conformance.md`, and `docs/release-evidence-template.md`. +3. Identify provider mix, deployment topology, data sensitivity, evidence + packet needs, and support expectations. +4. Request a commercial evaluation through the contact path in `COMMERCIAL.md`. +5. Agree scope, deliverables, support boundary, license exception, and + non-claims in writing. + +## Webhookery Evidence Pilot + +The recommended paid pilot shape is: + +```text +Webhookery Evidence Pilot -- 14 days +Connect one provider, one downstream receiver, one failure scenario, +one replay workflow, and one evidence export. +``` + +Pilot deliverables: + +- deployment topology review against `docs/pilot-topology.md`; +- provider setup and verification review for the agreed provider; +- failure/replay drill using a synthetic or sanitized event; +- generated incident evidence packet; +- evidence bundle verification result; +- production-readiness gap report; +- accepted-risk and non-claim review; and +- commercial license/support recommendation. + +Out of scope unless agreed in writing: + +- live provider certification; +- compliance or legal evidentiary certification; +- managed-service availability; +- multi-region active-active operation; +- broad marketplace/plugin work; +- provider-side completeness guarantees; and +- exactly-once delivery claims. + +## Starting Ranges + +These are starting ranges for planning. Final pricing depends on scope, +deployment risk, support expectations, and written agreement. + +| Offer | Starting range | Typical outcome | +| --- | ---: | --- | +| Commercial Evaluation | EUR 490-1,000 | Fit review, self-hosting path, license discussion, and next-step recommendation. | +| Webhookery Evidence Pilot | Fixed scope | One provider, one receiver, one failure/replay drill, one incident evidence packet, and a production-readiness gap report. | +| Release Evidence Package | EUR 2,500-5,000 | Release artifact review, SBOM/check evidence, known limits, and accepted-risk summary. | +| Production Readiness Review | EUR 7,500-12,500 | Deployment, backup/restore, security, retention, observability, and incident-readiness review. | +| Commercial License + Support | EUR 9,900-24,900 per year | Written license exception plus agreed support and update channel. | +| Custom Integration / Provider Adapter | Fixed scope or EUR 150-250/hour | Provider adapter, evidence workflow, deployment hardening, or receiver integration work. | + +No SLA, compliance certification, legal evidence certification, hosted service, +or provider-side completeness guarantee is included unless a written agreement +explicitly says so. + +## Required Inputs + +Provide only sanitized information: + +- expected provider list and event volume range +- self-hosting environment summary +- accepted or requested changes to `docs/pilot-topology.md` +- PostgreSQL and object-storage responsibility model +- security review requirements +- failure/replay scenario to test +- required support window +- desired license exception scope +- blocker list from local evaluation + +Do not send API keys, bearer tokens, webhook secrets, raw signatures, private +keys, provider credentials, raw customer payloads, customer data, database URLs +with passwords, or exploit payloads. + +## Evaluation Output + +A commercial evaluation can produce: + +- fit/non-fit recommendation +- deployment-risk notes +- evidence-package recommendation +- completed `docs/pilot-evidence-template.md` for a scoped pilot +- production-readiness review scope +- commercial license exception proposal +- support package proposal +- implementation backlog for agreed custom work + +The evaluation does not change Webhookery's canonical non-claims in +`docs/security-promise.md`. diff --git a/docs/comparisons/build-vs-buy.md b/docs/comparisons/build-vs-buy.md new file mode 100644 index 0000000..4a22d76 --- /dev/null +++ b/docs/comparisons/build-vs-buy.md @@ -0,0 +1,77 @@ +# Build Vs Buy: Webhook Evidence Infrastructure + +Webhookery is for teams that need self-hosted, inspectable webhook evidence. +It is not always the right choice. This guide helps decide whether to use +Webhookery, buy a hosted webhook platform, or keep a simpler internal tool. + +## Choose Webhookery When + +- You need self-hosted control over webhook evidence. +- You need durable capture before inbound success. +- You need replay, DLQ, retention, audit-chain verification, and release + evidence as first-class workflows. +- You need provider-specific signature evidence and raw payload preservation. +- You need commercial license exceptions or private modifications. +- Your security review prefers source-visible infrastructure and local release + evidence. + +## Choose A Hosted Vendor When + +- You want someone else to operate the control plane. +- You need hosted multi-region availability and vendor-managed scale. +- Your team does not want to own PostgreSQL, object storage, backups, + monitoring, upgrades, or incident response. +- Your primary need is outbound webhook delivery for your own API customers, + rather than inbound evidence and recovery. + +## Keep A Simpler Internal Tool When + +- Webhook volume is low and replay/audit evidence is not a business + requirement. +- Provider signatures and raw payload preservation are already covered by a + small internal service. +- Incidents can be resolved from existing logs without customer or auditor + evidence. +- You do not need tenant-scoped APIs, retention, audit exports, or operator + workflows. + +## Operational Ownership + +Self-hosting Webhookery means owning: + +- PostgreSQL durability and restore drills +- object-storage durability when S3 mode is enabled +- network policy and SSRF-safe egress posture +- TLS and mTLS configuration +- secret custody configuration +- monitoring, alerts, and incident response +- upgrade and migration review + +Use `docs/deployment.md`, `docs/operations.md`, and +`docs/day-2-operations.md` before production evaluation. + +## Honest Boundaries + +Webhookery does not claim: + +- exactly-once delivery +- provider-side event completeness +- downstream business success +- compliance certification +- hosted-service availability +- multi-region active-active operation + +Its narrower promise is more useful: if Webhookery returns inbound success, the +configured durable capture path has recorded evidence. Loss boundaries remain +explicit, and recovery/replay actions are auditable. + +## Evaluation Path + +1. Run `docs/evaluator-quickstart.md`. +2. Review `docs/security-promise.md`. +3. Review `docs/provider-conformance.md`. +4. Run `make rc-check` in a disposable environment. +5. Compare your incident and audit requirements against the ownership list + above. +6. Use `docs/commercial-evaluation.md` if commercial rights or support are + required. diff --git a/docs/comparisons/convoy.md b/docs/comparisons/convoy.md new file mode 100644 index 0000000..ac28be5 --- /dev/null +++ b/docs/comparisons/convoy.md @@ -0,0 +1,70 @@ +# Webhookery Vs Convoy + +Verification date: 2026-06-04 + +Official sources reviewed: + +- +- +- + +Current source notes: + +- Convoy's public site positions it as a webhook gateway for sending and + receiving events reliably. +- Convoy docs describe an open-source, high-performance, multi-tenant webhook + gateway for sending and receiving webhooks. +- The docs navigation includes endpoint management, signatures, sources, + subscriptions, events/deliveries, portal links, rate limits, filters, + metrics, circuit breaker, retention, and deployment topics. + +This page is a buyer-fit comparison, not a claim that one product is generally +better. Product surfaces change; re-check official sources before publishing or +using this page in sales material. + +## Public Positioning Summary + +Convoy positions itself as an open-source, high-performance webhooks gateway +for managing webhooks end to end. Its documentation describes support for both +sending and receiving webhooks, retries, rate limiting, circuit breaking, +customer-facing dashboards, and source verification for incoming provider +events. + +## Where Webhookery Differs + +Webhookery is audit-first and evidence-first. It focuses on durable capture +before inbound success, raw provider evidence, provider verification metadata, +versioned configuration evidence, payload snapshots, replay reason capture, +reconciliation gap evidence, retention metadata, evidence exports, and +audit-chain verification. + +Webhookery is a fit when the core question is: + +> Can we reconstruct and prove what Webhookery saw, decided, delivered, +> retained, replayed, or could not recover? + +Convoy may be a better fit when the core question is: + +> Can we run a broad open-source webhook gateway with sending, receiving, +> dashboards, rate limiting, and gateway features? + +## Evaluation Matrix + +| Need | Webhookery fit | Convoy fit | +| --- | --- | --- | +| Audit-first webhook evidence and release evidence | Strong fit | Evaluate current Convoy audit/evidence features. | +| Broad open-source webhook gateway | Narrower fit | Stronger fit. | +| Incoming provider source verification | Supported with provider-specific evidence | Supported by Convoy sources with provider/config-specific verification. | +| Reproducible route/retry/transformation/payload evidence | Strong fit | Evaluate current Convoy reproducibility model. | +| Commercial license exception for private self-hosted modifications | Available by written agreement | Review Convoy's current community/enterprise terms. | + +## Honest Recommendation + +Evaluate Webhookery if auditability, release evidence, exact raw capture, +configuration reproducibility, and recovery proof are primary requirements. + +Evaluate Convoy if you want a broader open-source webhook gateway surface and +its current sending/receiving feature set fits your needs. + +Webhookery does not claim exactly-once delivery, provider-side event +completeness, compliance certification, or hosted-service availability. diff --git a/docs/comparisons/hookdeck.md b/docs/comparisons/hookdeck.md new file mode 100644 index 0000000..0678b9f --- /dev/null +++ b/docs/comparisons/hookdeck.md @@ -0,0 +1,70 @@ +# Webhookery Vs Hookdeck + +Verification date: 2026-06-04 + +Official sources reviewed: + +- +- +- + +Current source notes: + +- Hookdeck positions Outpost as managed and self-hosted infrastructure for + outbound webhooks and event destinations. +- The Outpost page and repository describe a Go binary/Docker image, managed + or self-hosted operation, event destinations beyond HTTP webhooks, retries, + tenant isolation, observability, and a user portal. +- Hookdeck's public navigation also distinguishes receiving webhooks through + Event Gateway from sending webhooks through Outpost. + +This page is a buyer-fit comparison, not a claim that one product is generally +better. Product surfaces change; re-check official sources before publishing or +using this page in sales material. + +## Public Positioning Summary + +Hookdeck positions itself around reliable webhook infrastructure for working +with webhooks and external events. Its public site describes an Event Gateway +for receiving webhooks, tools for testing/debugging/monitoring, and Outpost +for sending webhooks and event destinations as managed or self-hosted +infrastructure. + +## Where Webhookery Differs + +Webhookery is narrower. It is self-hosted webhook evidence infrastructure: +durable capture, provider verification evidence, delivery evidence, replay, +retention, audit-chain verification, reconciliation evidence, and release +evidence. + +Webhookery is a fit when the core question is: + +> Can we prove what happened to this webhook and replay or recover safely? + +Hookdeck may be a better fit when the core question is: + +> Can we use a mature managed webhook infrastructure platform and avoid +> operating it ourselves? + +## Evaluation Matrix + +| Need | Webhookery fit | Hookdeck fit | +| --- | --- | --- | +| Self-hosted evidence control plane | Strong fit | Check current Hookdeck/Outpost deployment model and feature scope. | +| Managed webhook infrastructure | Not the goal | Stronger fit. | +| Inbound provider evidence and audit-chain review | Strong fit | Evaluate against current Hookdeck event history/audit features. | +| Outbound webhook platform for your API customers | Supported, but evidence-first | Hookdeck Outpost is specifically positioned for sending webhooks/event destinations. | +| Commercial license exception for private self-hosted modifications | Available by written agreement | Review Hookdeck's current commercial terms. | + +## Honest Recommendation + +Evaluate Webhookery if self-hosted durable capture, replay evidence, release +evidence, private modifications, or audit reviewability are central +requirements. + +Evaluate Hookdeck if you prefer managed webhook infrastructure, existing hosted +operations, or Hookdeck's broader workflow around testing, debugging, +monitoring, and Outpost delivery. + +Webhookery does not claim exactly-once delivery, provider-side event +completeness, compliance certification, or hosted-service availability. diff --git a/docs/comparisons/svix.md b/docs/comparisons/svix.md new file mode 100644 index 0000000..3b0a7ab --- /dev/null +++ b/docs/comparisons/svix.md @@ -0,0 +1,71 @@ +# Webhookery Vs Svix + +Verification date: 2026-06-04 + +Official sources reviewed: + +- +- +- +- + +Current source notes: + +- Svix's public site positions the product as webhooks as a service for making + webhook sending simple, secure, and scalable. +- The public site now also surfaces Svix products for sending, receiving, + testing, and portal workflows. +- Svix publishes documentation, API reference material, SDK/library links, and + open-source server/source-code links. + +This page is a buyer-fit comparison, not a claim that one product is generally +better. Product surfaces change; re-check official sources before publishing or +using this page in sales material. + +## Public Positioning Summary + +Svix positions itself as webhooks as a service. Its public site emphasizes +making webhook sending simple, secure, and scalable, with automatic retries, +logs and monitoring, security, developer experience, an application portal, and +API-first webhook sending. + +Svix also publishes docs and source links around webhook service operation, +webhook receiving/testing products, API references, SDKs, and verification. + +## Where Webhookery Differs + +Webhookery is not primarily an outbound webhook SaaS. It is a self-hosted +control plane for inbound and outbound webhook evidence: durable provider +capture, verification metadata, routing decisions, delivery payload snapshots, +replay evidence, reconciliation gaps, retention, exports, and audit-chain +verification. + +Webhookery is a fit when the core question is: + +> Can we self-host the evidence trail for webhook events and prove loss +> boundaries later? + +Svix may be a better fit when the core question is: + +> Can we outsource or standardize webhook sending for our own API customers? + +## Evaluation Matrix + +| Need | Webhookery fit | Svix fit | +| --- | --- | --- | +| Self-hosted inbound provider evidence | Strong fit | Evaluate current Svix self-hosted/open-source scope. | +| Hosted webhooks as a service | Not the goal | Stronger fit. | +| Outbound API-customer webhook sending | Supported, but evidence-first | Core Svix positioning. | +| Audit-chain and release-evidence package | Strong fit | Evaluate against current Svix audit and compliance surfaces. | +| Commercial license exception for private self-hosted modifications | Available by written agreement | Review Svix's current commercial terms. | + +## Honest Recommendation + +Evaluate Webhookery if your primary pain is self-hosted evidence around +provider receipt, raw payloads, route decisions, replay, retention, and audit. + +Evaluate Svix if your primary need is a mature webhook sending service, +developer portal, and delivery platform for events you publish to customers. + +Webhookery does not claim exactly-once delivery, provider-side event +completeness, compliance certification, or hosted-service availability. diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..dd564ec --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,128 @@ +# Configuration Reference + +This is the canonical reference for Webhookery environment variables. It covers +the current Go configuration loader, local examples, deployment profiles, and +test-only variables. + +Do not commit real database URLs with credentials, API keys, provider +credentials, webhook secrets, Vault tokens, AWS credentials, object-store +secrets, private keys, raw signatures, raw payload bodies, or customer data. +Values shown here are placeholders or local development values. + +## Source Files + +| File | Use | +|------|-----| +| `.env.example` | Docker Compose local API, worker, scheduler, and migration processes. | +| `.api.env.example` | Local API process without Compose. | +| `.test.env.example` | Optional live integration and RC test variables. | +| `deploy/kubernetes/configmap.yaml` | Non-secret Kubernetes profile defaults. | +| `deploy/kubernetes/secret.example.yaml` | Example Secret shape with placeholders only. | +| `deploy/helm/webhookery/values.yaml` | Helm values for common config and Secret data. | +| `internal/config/config.go` | Current runtime loader and validation behavior. | + +The example files are not production-safe. Production operators must replace +placeholder passwords, bootstrap hashes, object-store credentials, and local +master keys through their own secret manager. + +## Runtime Variables + +| Variable | Applies to | Default | Secret | Production guidance | +|----------|------------|---------|--------|---------------------| +| `WEBHOOKERY_DATABASE_URL` | API, worker, scheduler, migrate, backup, restore | none, required | Yes, when it contains credentials | Use a managed secret. Require TLS to PostgreSQL where available. Do not reuse test databases. | +| `WEBHOOKERY_HTTP_ADDR` | API | `:8080` | No | Bind behind a trusted ingress or load balancer. | +| `WEBHOOKERY_TLS_CERT_FILE` | API | empty | No | Required by `doctor production` unless TLS terminates elsewhere and the deployment accepts that boundary. Configure with `WEBHOOKERY_TLS_KEY_FILE`. | +| `WEBHOOKERY_TLS_KEY_FILE` | API | empty | Yes | Store as a mounted secret. Configure with `WEBHOOKERY_TLS_CERT_FILE`. | +| `WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE` | API | empty | No | Requires API TLS cert and key. Use only when producer mTLS is part of the trust model. | +| `WEBHOOKERY_ENABLE_UI` | API | `false` | No | Keep disabled unless the operator UI is required. The UI uses the same API authorization model. | +| `WEBHOOKERY_LOG_LEVEL` | API, worker, scheduler | `info` | No | Use `info` in production unless debugging a contained incident. Logs must not include secrets or raw payloads. | +| `WEBHOOKERY_ENVIRONMENT` | API, worker, scheduler, doctor | `development` | No | Set `production` before running `go run ./cmd/whcp doctor production`. | +| `WEBHOOKERY_TRUSTED_PROXY_CIDRS` | API | empty | No | Set only to CIDRs for reverse proxies that Webhookery owns or explicitly trusts. Leave empty for direct API exposure. | +| `WEBHOOKERY_SECRET_BOX_MODE` | API, worker, scheduler | `local` | No | Allowed values are `local`, `vault-transit`, and `aws-kms`. Choose one custody mode before writing production secrets. | +| `WEBHOOKERY_MASTER_KEY_BASE64` | API, worker, scheduler | empty | Yes | Required for `local` secret box mode. Must be base64-encoded 32 bytes. Replace the all-zero local example. | +| `WEBHOOKERY_VAULT_ADDR` | API, worker, scheduler | empty | No | Required for `vault-transit` mode. Use HTTPS in production. | +| `WEBHOOKERY_VAULT_TOKEN` | API, worker, scheduler | empty | Yes | Required for `vault-transit` mode. Store only in a secret manager. | +| `WEBHOOKERY_VAULT_TRANSIT_KEY` | API, worker, scheduler | empty | Usually no | Required for `vault-transit` mode. Treat key names as operational metadata. | +| `WEBHOOKERY_AWS_REGION` | API, worker, scheduler | empty | No | Required for `aws-kms` mode. | +| `WEBHOOKERY_AWS_KMS_KEY_ID` | API, worker, scheduler | empty | Sensitive metadata | Required for `aws-kms` mode. Avoid printing full key IDs in logs or support artifacts. | +| `WEBHOOKERY_AWS_KMS_ENDPOINT` | API, worker, scheduler | empty | No | Optional. Use only for LocalStack or controlled test endpoints; `doctor production` warns on HTTP endpoints. | +| `WEBHOOKERY_RAW_STORAGE_MODE` | API, worker | `postgres` | No | Allowed values are `postgres` and `s3`. PostgreSQL remains the metadata authority. | +| `WEBHOOKERY_OBJECT_STORAGE_ENDPOINT` | API, worker | empty | No | Required when raw storage mode is `s3`. Use an internal endpoint where possible. | +| `WEBHOOKERY_OBJECT_STORAGE_BUCKET` | API, worker | empty | No | Required when raw storage mode is `s3`. Use a dedicated bucket with backup and retention policy. | +| `WEBHOOKERY_OBJECT_STORAGE_ACCESS_KEY` | API, worker | empty | Yes | Required when raw storage mode is `s3`. Use a scoped credential. | +| `WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY` | API, worker | empty | Yes | Required when raw storage mode is `s3`. Store only in a secret manager. | +| `WEBHOOKERY_OBJECT_STORAGE_REGION` | API, worker | empty | No | Set according to the object store. | +| `WEBHOOKERY_OBJECT_STORAGE_USE_SSL` | API, worker | `true` | No | Keep `true` in production unless the object store is reached over a controlled private channel with separate transport protection. | +| `WEBHOOKERY_BOOTSTRAP_TENANT_ID` | API | `ten_bootstrap` | No | Use a stable tenant ID only for controlled bootstrap. | +| `WEBHOOKERY_BOOTSTRAP_API_KEY_HASH` | API | empty | Sensitive | Use only for initial bootstrap. Remove or rotate after creating database-backed API keys. | +| `WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX` | API | empty | No | Display prefix only. Do not use a real API key as the prefix. | +| `WEBHOOKERY_PROVIDER_PROOF_MANIFEST_PATH` | `doctor pilot` | `docs/provider-proof-manifest.json` | No | Optional override for local provider-proof metadata. The doctor reports only presence and schema shape. | +| `WEBHOOKERY_PILOT_RECEIVER_CHECK_URL` | `doctor pilot` | empty | Sensitive if it contains tenant names or URL tokens | Optional receiver URL for explicit pilot connectivity checks. Do not include URL credentials or tokens. | +| `WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK` | `doctor pilot` | empty | No | Must be exactly `true` before `doctor pilot` attempts receiver connectivity. SSRF validation still applies. | + +## Test And Release Variables + +| Variable | Applies to | Required when | Secret | Guidance | +|----------|------------|---------------|--------|----------| +| `WEBHOOKERY_TEST_DATABASE_URL` | `make live-postgres-check`, DB-backed `make rc-check`, integration tests | Running live PostgreSQL checks | Yes, when it contains credentials | Use a disposable database. Never point it at production. | +| `WEBHOOKERY_RC_RESTORE_DATABASE_URL` | `make rc-check` restore drill | Running the destructive restore drill | Yes, when it contains credentials | Must point to a separate disposable restore database. | +| `WEBHOOKERY_TEST_REDIS_ADDR` | `make redis-integration-test` | Running Redis integration tests | No | Redis is not an audit authority. | +| `WEBHOOKERY_TEST_MASTER_KEY_BASE64` | Test fixtures | Tests that need an explicit test key | Yes | Use only local test values. | +| `WEBHOOKERY_RESTORE_CONFIRM` | `scripts/restore_postgres.sh` | Restoring a PostgreSQL dump | No | Must be exactly `restore`; this is a destructive-action guard. | + +## Secret Custody + +Secret-bearing variables must be provided by the operator's secret manager or +orchestrator secret facility: + +- `WEBHOOKERY_DATABASE_URL` +- `WEBHOOKERY_TLS_KEY_FILE` +- `WEBHOOKERY_MASTER_KEY_BASE64` +- `WEBHOOKERY_VAULT_TOKEN` +- `WEBHOOKERY_OBJECT_STORAGE_ACCESS_KEY` +- `WEBHOOKERY_OBJECT_STORAGE_SECRET_KEY` +- `WEBHOOKERY_BOOTSTRAP_API_KEY_HASH` +- live test database URLs when they include credentials + +`WEBHOOKERY_AWS_KMS_KEY_ID` and `WEBHOOKERY_VAULT_TRANSIT_KEY` are usually +identifiers rather than secret values, but they can reveal infrastructure shape. +Avoid full values in public logs, issues, screenshots, and support requests. + +## Profile Notes + +Docker Compose reads `.env` from `.env.example` and starts PostgreSQL, the +migration job, API, and worker. The optional object-storage profile starts +MinIO and uses the local object-storage placeholders from `.env.example`. + +Kubernetes and Helm profiles expect an externally managed PostgreSQL database +and a separately managed Secret. The checked-in Secret examples use placeholders +only. They are shape examples, not credentials. + +Terraform wraps the Helm chart and intentionally does not accept secret values +as module variables. Create or rotate Kubernetes Secrets outside Terraform so +credentials do not enter Terraform state. + +## Production Review + +Before promoting a deployment, run: + +```bash +go run ./cmd/whcp doctor production +``` + +Fix all blockers. The doctor reads configuration and environment values, but it +must not print database passwords, API keys, webhook secrets, Vault tokens, AWS +credentials, raw KMS key IDs, object-store credentials, raw payloads, or raw +signatures. + +For bounded pilot readiness, run: + +```bash +go run ./cmd/whcp doctor pilot --no-network +``` + +Remove `--no-network` only when `WEBHOOKERY_DATABASE_URL` points at a +disposable or intended pilot database. Receiver checks require +`WEBHOOKERY_PILOT_RECEIVER_CHECK_URL` and +`WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK=true`; the URL is treated as hostile +input and is not printed. diff --git a/docs/customer-discovery-notes-template.md b/docs/customer-discovery-notes-template.md new file mode 100644 index 0000000..7075da6 --- /dev/null +++ b/docs/customer-discovery-notes-template.md @@ -0,0 +1,77 @@ +# Customer Discovery Notes Template + +Use this template for early Webhookery discovery calls before a formal pilot. +For pilot execution feedback, use `docs/pilot-feedback-template.md`. + +Do not record secrets, raw payloads, provider credentials, customer data, +private keys, bearer tokens, raw signatures, session cookies, exploit payloads, +or database URLs with passwords. + +## Call Metadata + +- Date: +- Interviewer: +- Organization / team: +- Contact: +- Segment: +- Permission to follow up: `yes | no` +- Public reference permission: `yes | no | later` + +## Current Webhook Surface + +- Providers used: +- Internal producers: +- Approximate monthly event volume: +- Critical event types: +- Existing webhook tooling: +- Existing self-hosting requirements: + +## Pain And Incidents + +- Recent webhook incident: +- Hardest question to answer during incidents: +- Current replay process: +- Current audit/evidence process: +- Current provider reconciliation process: +- Cost of a missed, duplicated, or late event: + +## Security And Procurement + +- Required deployment model: +- Data residency requirements: +- Security review requirements: +- License constraints: +- Support or SLA expectations: +- Budget owner: + +## Fit Assessment + +| Signal | Notes | +| --- | --- | +| Needs durable capture evidence | | +| Needs replay/DLQ/retry control | | +| Needs provider-aware verification | | +| Needs self-hosting | | +| Needs commercial license exception | | +| Needs production-readiness review | | +| Hosted vendor is a better fit | | +| Simpler internal tool is enough | | + +## Next Step + +- Suggested next action: +- Owner: +- Due date: +- Discovery classification: `docs gap | bug | evaluator friction | pilot candidate | paid custom integration | roadmap candidate | out of scope` +- Evidence required before engineering work: + +## Sanitization Check + +Before storing or sharing these notes, confirm: + +- no secrets +- no raw payloads +- no customer data +- no provider credentials +- no exploit payloads +- no database credentials diff --git a/docs/day-2-operations.md b/docs/day-2-operations.md new file mode 100644 index 0000000..1b4ac6d --- /dev/null +++ b/docs/day-2-operations.md @@ -0,0 +1,197 @@ +# Day-2 Operations Guide + +This guide is for controlled, single-region, self-hosted Webhookery +deployments after the first successful install. It links to canonical command +references instead of duplicating every CLI option. + +Use: + +- `docs/configuration.md` for environment variables. +- `docs/deployment.md` for deployment posture. +- `docs/stability.md` for compatibility and rollback boundaries. +- `docs/release-evidence-template.md` for release evidence. +- `docs/cli.md` for full `whcp` command syntax. + +Do not include real API keys, bearer tokens, session cookies, webhook secrets, +private keys, provider credentials, raw payload bodies, raw signatures, +database URLs with real credentials, or customer data in tickets, public logs, +or release artifacts. + +## Daily Checks + +Run these checks from a trusted operator workstation or CI job with a scoped +operator key: + +```bash +curl -fsS https://webhookery.example.com/readyz +whcp ops storage --base-url https://webhookery.example.com --api-key "$WEBHOOKERY_API_KEY" +whcp ops queues --base-url https://webhookery.example.com --api-key "$WEBHOOKERY_API_KEY" +whcp ops metrics --base-url https://webhookery.example.com --api-key "$WEBHOOKERY_API_KEY" +whcp alerts firings --base-url https://webhookery.example.com --api-key "$WEBHOOKERY_API_KEY" +whcp audit chain-head --base-url https://webhookery.example.com --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected result: readiness exits zero, storage reports configured backends +without secrets, queue age is within the local objective, no unacknowledged +critical firings exist, and the audit chain has a current head. + +## Backups And Restore Cadence + +At minimum: + +- back up PostgreSQL daily and before upgrades; +- back up object storage whenever `WEBHOOKERY_RAW_STORAGE_MODE=s3`; +- run a restore drill before each release candidate and after backup tooling + changes. + +PostgreSQL backup: + +```bash +WEBHOOKERY_DATABASE_URL=postgres://... scripts/backup_postgres.sh backups +``` + +Expected result: the script prints a `backups/webhookery-.dump` +path with restrictive permissions. + +Disposable restore drill: + +```bash +WEBHOOKERY_DATABASE_URL=postgres://... \ +WEBHOOKERY_RESTORE_CONFIRM=restore \ +scripts/restore_postgres.sh backups/webhookery-20260525T000000Z.dump + +WEBHOOKERY_DATABASE_URL=postgres://... go run ./cmd/whcp migrate up +curl -fsS http://localhost:8080/readyz +whcp audit verify-chain --base-url http://localhost:8080 --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected result: restore and migrations exit zero, readiness succeeds, and +audit-chain verification returns `valid=true`. + +Object bodies are not inside PostgreSQL dumps. If S3-compatible raw storage is +enabled, restore and verify the bucket separately before declaring the drill +complete. + +## Upgrade Flow + +1. Review `docs/stability.md` for migration and rollback boundaries. +2. Run `go run ./cmd/whcp doctor production` against the target configuration. +3. Run `make finalize` on the release commit. +4. Run `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check` against a + disposable database. +5. Back up PostgreSQL and object storage. +6. Run the migration job once. +7. Deploy API, worker, and scheduler images from the same release. +8. Verify readiness, queue status, audit-chain verification, and alert state. + +Expected result: the migration job exits zero, all process readiness checks +pass, and accepted events continue to produce delivery or explicit failure +evidence. + +Rollback is not only an image rollback. If migrations are not backward +compatible for the previous binary, restore from verified backup into a +controlled target instead of downgrading over live state. + +## Incident Triage + +Use this order during an incident: + +1. Check `/readyz` and process logs for API, worker, scheduler, and migration + jobs. +2. Check `whcp ops queues` for oldest pending outbox age, expired leases, + delivery queue depth, DLQ, and quarantine counts. +3. Check alert firings and acknowledge only after assigning an owner: + + ```bash + whcp alerts firings --api-key "$WEBHOOKERY_API_KEY" + whcp alerts ack --reason "owner: on-call; investigating queue age" --api-key "$WEBHOOKERY_API_KEY" + ``` + +4. Check storage posture: + + ```bash + whcp ops storage --api-key "$WEBHOOKERY_API_KEY" + ``` + +5. For provider gaps, create or inspect reconciliation jobs with fake/local + evidence first; do not call live providers from local acceptance gates. +6. For audit concerns, run: + + ```bash + whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" + whcp audit anchors --api-key "$WEBHOOKERY_API_KEY" + ``` + +7. Preserve logs and database state before attempting destructive restore, + bulk replay, retention changes, or credential rotation. + +## Alert Handling + +Alert rules and firings are operational state, not evidence authority. The +underlying evidence remains in events, receipts, deliveries, attempts, +quarantine, DLQ, reconciliation items, audit events, and audit-chain entries. + +For every alert: + +- assign an owner and incident link; +- acknowledge with a reason; +- resolve only after the underlying queue, storage, audit, or egress condition + is clear; +- export or record relevant audit evidence when the incident affects trust + boundaries. + +Notification channels and SIEM sinks send signed HTTPS operational signals. +They must not contain raw payload bodies, secrets, provider credentials, API +keys, bearer tokens, or URL credentials. + +## Key Rotation + +Rotate keys through their dedicated surfaces: + +- API keys: create replacement, update automation, revoke old key. +- Source verification secrets: rotate with grace period, then revoke old + version. +- Endpoint signing secrets: rotate and verify receiver compatibility. +- Producer OAuth client secrets: rotate and update producers; tokens expire. +- Secret custody: follow `docs/configuration.md`; cross-mode re-encryption is + not automatic. + +After rotation: + +```bash +go run ./cmd/whcp doctor production +whcp audit events --api-key "$WEBHOOKERY_API_KEY" +whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected result: doctor has no blockers, rotation audit events exist, and the +audit chain verifies. + +## Retention Review + +Review retention policies before enabling or changing them: + +```bash +whcp retention list --api-key "$WEBHOOKERY_API_KEY" +whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" +``` + +Retention may delete body/data material while preserving hashes and metadata. +Expected post-retention behavior is `410 Gone` for deleted bodies and retained +metadata for evidence review. + +## Audit Exports And Chain Verification + +Before handing evidence to another party: + +```bash +whcp audit export --reason "release evidence" --api-key "$WEBHOOKERY_API_KEY" +whcp audit export-status --api-key "$WEBHOOKERY_API_KEY" +whcp audit download --output release-evidence/audit-export.tar.gz --api-key "$WEBHOOKERY_API_KEY" +whcp audit verify-bundle --file release-evidence/audit-export.tar.gz +whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected result: export reaches ready state, bundle verification passes, and +audit-chain verification reports continuity. Body-inclusive exports require +explicit elevated permission and should remain out of public release evidence. diff --git a/docs/demo-media-checklist.md b/docs/demo-media-checklist.md new file mode 100644 index 0000000..88238aa --- /dev/null +++ b/docs/demo-media-checklist.md @@ -0,0 +1,86 @@ +# Demo Media Checklist + +Use this checklist before publishing screenshots, GIFs, short videos, or +slides made from Webhookery demos. + +The approved source demo is `examples/webhook-evidence-demo/`. It uses +synthetic provider payloads and fake local evidence paths. Do not record real +providers, customer receivers, or production databases. + +Prepare recording material with: + +```bash +scripts/demo_media.sh plan --output tmp/demo-media +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make demo-media +``` + +`plan` writes a sanitized script outline without running Webhookery. `make +demo-media` regenerates the local evidence demo under `tmp/demo-media/output` +and requires a disposable PostgreSQL URL. + +## Before Recording + +- [ ] Use a clean checkout or disposable branch. +- [ ] Run `make docs-check`. +- [ ] Run `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make demo-media` + against a disposable local PostgreSQL database. +- [ ] Use only fixture data from `examples/webhook-evidence-demo/fixtures/`. +- [ ] Set terminal scrollback low enough that old secrets cannot appear. +- [ ] Disable shell history capture if commands contain local connection URLs. +- [ ] Use a terminal profile without private hostnames, usernames, or cloud + account names in the prompt. + +## Allowed To Show + +- Local fake event IDs such as `evt_demo_invoice_paid`. +- Local fake source, route, delivery, replay, DLQ, retention, and audit-chain + evidence. +- `make rc-check`, `make release-acceptance`, and demo command output. +- `docs/security-promise.md`, `docs/provider-conformance.md`, and + `docs/release-evidence-template.md`. +- Local placeholder URLs such as `localhost`. + +## Do Not Show + +- API keys. +- bearer tokens. +- session cookies. +- OAuth or OIDC tokens. +- webhook signing secrets. +- raw provider signature headers. +- private keys or client certificates. +- provider API credentials. +- database URLs with passwords. +- AWS, Vault, object-store, or cloud account credentials. +- raw customer payload bodies. +- customer PII. +- private hostnames, VPN names, or internal IP addresses. +- exploit payloads or vulnerability proof-of-concept details. + +## Required On-Screen Boundaries + +At least one screen or narration segment must make these boundaries clear: + +- Webhookery is self-hosted software, not a hosted managed service. +- Inbound success means durable capture, not downstream business success. +- Delivery is at-least-once, not exactly once. +- Provider reconciliation cannot prove provider-side event completeness. +- Release evidence is not compliance certification. + +## Suggested Recording Flow + +1. Show README or the static landing page headline. +2. Run the evaluator quickstart command sequence. +3. Show the demo passing. +4. Show the release evidence and provider conformance docs. +5. End on the commercial evaluation or support path if the asset is + buyer-facing. + +## Final Review + +- [ ] No secrets, credentials, raw signatures, or private payloads are visible. +- [ ] No production hostnames, internal IPs, or customer names are visible. +- [ ] The asset does not claim exactly-once delivery. +- [ ] The asset does not claim provider-side completeness. +- [ ] The asset does not claim compliance certification. +- [ ] The asset links to the current release notes and release evidence. diff --git a/docs/deployment.md b/docs/deployment.md new file mode 100644 index 0000000..f7cbe0a --- /dev/null +++ b/docs/deployment.md @@ -0,0 +1,157 @@ +# Deployment Posture + +This is the common deployment guide for self-hosted Webhookery. Profile-specific +instructions live under `deploy/`, but production expectations belong here. + +Webhookery's release-candidate deployment posture is single-region, +PostgreSQL-first, and operator-managed. It does not claim managed-service +availability, multi-region active-active operation, exactly-once delivery, or +provider-side event completeness. + +## External Dependencies + +Production-like deployments must provide: + +- PostgreSQL for events, receipts, raw payload metadata, dedupe records, + deliveries, audit rows, retention state, evidence export metadata, and + durable outbox work. +- TLS-capable API ingress or direct API TLS configuration. +- Secret custody for database URLs, API bootstrap hashes, master keys, provider + credentials, endpoint signing secrets, object-store credentials, OIDC + secrets, SIEM secrets, and notification signing secrets. +- Optional S3-compatible object storage when `WEBHOOKERY_RAW_STORAGE_MODE=s3`. +- Backup and restore procedures for PostgreSQL and object storage when used. +- Network controls for API ingress, worker egress, object storage, Vault/AWS + KMS, and customer-controlled outbound delivery URLs. + +Deployment profiles do not install production PostgreSQL, object storage, +ingress, DNS, TLS certificates, network policies, service monitors, or external +secret managers for you. + +Hardened examples are included for adaptation: + +- `deploy/helm/webhookery/values-production.example.yaml` +- `deploy/kubernetes/networkpolicy.example.yaml` +- `deploy/observability/prometheus-rules.example.yaml` + +They are examples only. Review selectors, namespaces, resource requests, +egress rules, and alert thresholds against your environment before use. + +## TLS And Ingress + +Choose one API TLS boundary and document it: + +- terminate TLS in Webhookery with `WEBHOOKERY_TLS_CERT_FILE` and + `WEBHOOKERY_TLS_KEY_FILE`; or +- terminate TLS at a trusted ingress and route only trusted internal traffic to + the API. + +If producer mTLS is required at the app process, configure +`WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE` with API TLS certificate and key +files. Webhookery does not trust proxy-supplied mTLS identity headers in this +slice. + +If the API sits behind a reverse proxy and session IP metadata should use +`X-Forwarded-For`, set `WEBHOOKERY_TRUSTED_PROXY_CIDRS` only to immediate +proxy CIDRs that the operator controls. + +## Secret Custody + +Use `docs/configuration.md` as the canonical variable reference. + +Minimum secret-bearing values usually include: + +- `WEBHOOKERY_DATABASE_URL` +- `WEBHOOKERY_MASTER_KEY_BASE64` for local secret-box mode +- `WEBHOOKERY_VAULT_TOKEN` for Vault Transit mode +- object-store access and secret keys for S3-compatible raw storage +- bootstrap API key hash during controlled bootstrap + +Terraform module inputs intentionally do not accept secret values. Create or +rotate Kubernetes Secrets outside Terraform so credentials do not enter +Terraform state. + +Do not commit real secrets, provider credentials, private keys, database URLs +with real credentials, raw signatures, raw payloads, or customer data. + +## Object Storage + +PostgreSQL is always the metadata and evidence authority. S3-compatible object +storage can hold raw bodies when `WEBHOOKERY_RAW_STORAGE_MODE=s3`. + +In S3 mode: + +- inbound success requires the object write and PostgreSQL metadata commit to + both succeed; +- backup and restore must cover both PostgreSQL and the bucket; +- object-store TLS should remain enabled in production; +- bucket retention and lifecycle rules must match the retention posture in + Webhookery. + +PostgreSQL dumps do not include S3 object bodies. + +## Network Policy And Egress + +Workers deliver to customer-controlled URLs. Treat those URLs as hostile input: + +- allow HTTPS egress only where possible; +- block private, loopback, link-local, multicast, reserved, and metadata + addresses at the network layer in addition to application SSRF checks; +- re-resolve and revalidate destinations at delivery time; +- keep redirects disabled unless an audited policy says otherwise. + +Also restrict egress to PostgreSQL, object storage, Vault/AWS KMS, notification +receivers, SIEM sinks, and customer endpoints according to the deployment's +network model. + +## Readiness And Promotion + +Before promotion: + +```bash +go run ./cmd/whcp doctor production +make finalize +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +``` + +Use a disposable database for live checks. Do not point test gates at +production databases or live provider accounts. + +After deployment: + +- `/readyz` succeeds; +- API, worker, scheduler, and migration job status is healthy; +- `whcp ops storage`, `whcp ops queues`, and `whcp ops metrics` return + redacted operational state; +- audit-chain verification succeeds; +- bootstrap access has been removed, rotated, or restricted. + +## Backup, Restore, Upgrade, And Rollback + +Before upgrades that touch migrations, storage, retention, audit chain, +exports, or secret custody: + +1. Back up PostgreSQL with `scripts/backup_postgres.sh`. +2. Back up object storage separately when S3 mode is enabled. +3. Restore into a disposable database with `scripts/restore_postgres.sh`. +4. Run migrations on the restored database. +5. Verify `/readyz`, event timelines, audit-chain verification, evidence bundle + verification, storage status, and queue status. + +Rollback is not only an image rollback. Check migration compatibility first. +If a migration is not safe to roll back automatically, restore from a verified +backup into a controlled target and preserve the failed state for analysis. + +## Deployment Profiles + +| Profile | Path | Boundary | +|---------|------|----------| +| Docker Compose | `docker-compose.yml` | Local development and evaluation. Starts PostgreSQL, migration, API, worker, and optional MinIO profile. | +| Kubernetes | `deploy/kubernetes/` | Minimal manifests for API, worker, scheduler, migration job, config, and placeholder Secret shape. | +| Helm | `deploy/helm/webhookery/` | Chart for the same workload shape with existing Secret support. | +| Terraform | `deploy/terraform/webhookery-helm/` | Wrapper around the Helm chart. Does not manage secrets or external dependencies. | +| Observability examples | `deploy/observability/` | Prometheus starter rules for aggregate metrics. Does not install Prometheus or Alertmanager. | + +Use the profile README for exact commands, and use this document for shared +production posture. diff --git a/docs/documentation-maintenance.md b/docs/documentation-maintenance.md new file mode 100644 index 0000000..4465dc4 --- /dev/null +++ b/docs/documentation-maintenance.md @@ -0,0 +1,93 @@ +# Documentation Maintenance + +This document defines how Webhookery docs stay current without turning every +page into a duplicate source of truth. + +## Provider Claim Freshness + +Provider behavior changes over time. Any documentation or code review that +changes provider-specific semantics must verify current official upstream docs +before changing claims about signatures, retry windows, timeout behavior, +redelivery, reconciliation, event ordering, payload shape, CloudEvents support, +or SSRF guidance. + +The author of the change owns the freshness record. The reviewer owns checking +that the record is present before merge. + +For each dated provider-specific claim, record: + +- owner or reviewer; +- review date in `YYYY-MM-DD` format; +- official source URL; +- scope checked, such as signature verification, redelivery, retries, + timestamp window, or SSRF guidance; +- follow-up date or release milestone for the next review. + +Dated claims older than 90 days must be rechecked before they are used in +release evidence, security review material, provider adapter changes, or +operator-facing runbooks. If an official source no longer supports the claim, +update the claim in the owning canonical doc, adjust tests or behavior when +needed, and record the old claim as stale in the change description. + +Historical design claims in `.initial_design.md` are not implementation proof. +Several provider behavior claims there were originally captured during planning +and include May 25, 2026 examples. Treat them as design context until current +official docs are checked and the maintained docs or implementation are updated. + +## Freshness Records + +| Area | Owner | Review date | Official sources | Scope checked | Follow-up | +|------|-------|-------------|------------------|---------------|-----------| +| Stripe operator and live-proof docs | Codex | 2026-06-04 | , | Signature verification with exact raw body, timestamp replay context, Stripe CLI test-mode forwarding, retry and manual resend boundaries. | Before v0.2 pilot release evidence or by 2026-09-02. | +| GitHub operator and live-proof docs | Codex | 2026-06-04 | , , , | `X-Hub-Signature-256` verification with exact raw body, `X-GitHub-Delivery` identity, repository ping/push proof flow, manual redelivery boundaries. | Before v0.2 pilot release evidence or by 2026-09-02. | +| Shopify operator and live-proof docs | Codex | 2026-06-04 | , , , | `X-Shopify-Hmac-SHA256` verification with exact raw body, `X-Shopify-Webhook-Id` identity, `X-Shopify-Topic` metadata, development-store proof flow, retry and topic-specific recovery boundaries. | Before v0.2 pilot release evidence or by 2026-09-02. | + +## Official Source Registry + +These are the current official source locations to start from. URL availability +was checked on 2026-05-27; that check does not certify every behavior claim as +current. + +| Area | Official source | +|------|-----------------| +| Stripe webhooks | | +| GitHub webhooks | | +| GitHub redelivery | | +| Shopify webhooks | | +| Shopify webhook troubleshooting | | +| Slack request signing | | +| Slack Events API | | +| CloudEvents | | +| OWASP SSRF guidance | | + +Prefer official provider documentation over blog posts, memory, generated +answers, SDK behavior, or third-party examples. When official docs conflict +with implementation behavior, describe the gap as a current limitation rather +than rewriting the docs to imply support. + +## Documentation Review Checklist + +Use this checklist for documentation-only changes and for code changes that +alter docs, examples, contracts, deployment profiles, or release evidence. + +- Audience: name the reader and task the changed doc serves. +- Doc type: classify the change as entry point, reference, runbook, policy, + contract, deployment profile, generated artifact, or archive. +- Source of truth: update the owning canonical file first and link from + secondary docs instead of duplicating long tables or command catalogs. +- Implemented vs intended: do not describe planned behavior as current unless + code, OpenAPI, migrations, deployment profiles, or scripts prove it. +- Examples: use placeholders only; do not include real credentials, raw + signatures, raw payload bodies, private keys, customer data, or production + database URLs. +- Commands: run the narrow relevant command, usually `make docs-check`, and + record skipped checks with the reason. +- Provider and security claims: refresh provider-specific claims through the + freshness rule above and link to official sources when behavior changed. +- Non-claims: keep durable capture, at-least-once delivery, no exactly-once, + no provider-side completeness, no compliance certification, and no live- + provider acceptance language aligned with `docs/security-promise.md`. +- Generated artifacts: keep `openapi.yaml`, `sdk/openapi.yaml`, SDK docs, and + collections aligned when contracts or examples change. +- Evidence: do not edit audit artifacts except explicit remediation checkmarks + or requested evidence updates. diff --git a/docs/error-codes.md b/docs/error-codes.md new file mode 100644 index 0000000..d392e4f --- /dev/null +++ b/docs/error-codes.md @@ -0,0 +1,46 @@ +# Error Codes + +Webhookery API problem responses include two code fields: + +- `code`: a short legacy problem code kept for compatibility. +- `stable_code`: a namespaced code intended for CLI output, SDK handling, + support triage, and incident reports. + +Problem responses also include `request_id`. Do not include bearer tokens, +webhook secrets, raw signatures, raw payload bodies, database URLs, private +keys, provider credentials, or unnecessary PII in problem details. + +## Major Stable Codes + +| Stable code | Typical status | Meaning | Operator action | +|-------------|----------------|---------|-----------------| +| `WEBHOOKERY_AUTHENTICATION_REQUIRED` | 401 | Bearer token, session, or client credential is missing or invalid. | Retry with a valid credential; rotate if exposure is suspected. | +| `WEBHOOKERY_TENANT_ACCESS_DENIED` | 403 | Actor lacks tenant membership, scope, role, or raw-payload permission. | Check API key scopes, role bindings, access policies, and tenant context. | +| `WEBHOOKERY_VALIDATION_FAILED` | 400 | Request body, query, path, or form input is malformed or unsupported. | Fix the request shape using `openapi.yaml` and preserve `request_id` for support. | +| `WEBHOOKERY_PROVIDER_SIGNATURE_INVALID` | 401 | Provider webhook evidence was captured, but signature verification failed. | Check provider secret, exact raw body handling, timestamp policy, and source configuration. | +| `WEBHOOKERY_DURABLE_CAPTURE_UNAVAILABLE` | 503 | Required durable capture dependency is unavailable before acknowledgement. | Do not force success; restore PostgreSQL/object-storage health and retry. | +| `WEBHOOKERY_RAW_PAYLOAD_RETAINED_METADATA_ONLY` | 410 | Raw body was removed or expired by retention while metadata and hashes remain. | Use metadata, hashes, timeline, and audit evidence; do not treat body absence as silent loss. | +| `WEBHOOKERY_SSRF_BLOCKED_DESTINATION` | 400 | Customer-controlled endpoint, notification, or SIEM URL failed SSRF policy. | Use an allowed HTTPS destination and revalidate redirects/DNS behavior. | +| `WEBHOOKERY_PAYLOAD_TOO_LARGE` | 413 | Request body exceeds configured capture limit. | Adjust provider/source configuration or size limits only after risk review. | +| `WEBHOOKERY_HEADERS_TOO_LARGE` | 431 | Header count or total header bytes exceed ingress limits. | Reduce headers or review configured limits. | +| `WEBHOOKERY_RESOURCE_NOT_FOUND` | 404 | Resource does not exist or is not visible to the actor's tenant. | Confirm ID and tenant scope. | +| `WEBHOOKERY_INTERNAL_ERROR` | 500 | Unexpected server-side failure. | Preserve `request_id`, check logs/metrics, and avoid exposing internal detail. | +| `WEBHOOKERY_UNKNOWN_ERROR` | varies | Fallback for a future or unmapped problem code. | Preserve response body and `request_id`; update client handling if this recurs. | + +## CLI Behavior + +For API calls that decode a response internally, `whcp` returns errors that +include the HTTP status, `stable_code` when present, and `request_id` when +present. The CLI must not include bearer tokens or raw request bodies in those +errors. + +For commands that stream raw JSON responses, the API problem body is written as +returned by the server and the command exits non-zero on non-2xx status. + +## Incident Reports + +Incident reports may reference stable error codes from delivery attempts, +replay previews, retention reads, or support notes. A stable code is evidence +for local Webhookery behavior; it does not prove downstream business success, +provider-side completeness, exactly-once delivery, or legal/compliance +certification. diff --git a/docs/evaluator-quickstart.md b/docs/evaluator-quickstart.md new file mode 100644 index 0000000..8740f61 --- /dev/null +++ b/docs/evaluator-quickstart.md @@ -0,0 +1,236 @@ +# Evaluator Quickstart + +This guide takes a new evaluator from a clean checkout to a local incident +packet that shows the Webhookery evidence loop: + +> A Stripe-style payment webhook is captured, downstream delivery fails, the +> event reaches DLQ, replay succeeds after receiver recovery, and Webhookery +> writes a verifiable incident evidence packet. + +The flow is local-only. It uses synthetic provider payloads, a fake receiver, +and a disposable PostgreSQL database. Do not use live provider credentials, +customer endpoints, real webhook secrets, raw customer payloads, or production +databases. + +## Prerequisites + +- Go matching `go.mod` +- Docker and Docker Compose +- `make` +- a clean checkout of this repository + +## 1. Start PostgreSQL + +```bash +docker compose up -d postgres +export WEBHOOKERY_TEST_DATABASE_URL='postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable' +``` + +Expected result: + +```text +Container webhookery-postgres-1 Running +``` + +The exact container name and status text may differ by Compose version. The URL +above matches the default `docker-compose.yml` values. + +## 2. Run The Evidence Demo + +```bash +examples/webhook-evidence-demo/run.sh +``` + +Expected result: + +```text +demo: running local webhook evidence demo +demo: failed payment webhook incident packet +ok webhookery/internal/e2e +demo: provider ingest to signed delivery +ok webhookery/internal/e2e +demo: invalid signature quarantine +ok webhookery/internal/e2e +demo: retry, DLQ release, and replay modes +ok webhookery/internal/e2e +demo: retention, export, and audit-chain permission gates +ok webhookery/internal/e2e +demo: scenario result: downstream failure recorded before replay +demo: scenario result: replay delivery succeeded after receiver recovery +demo: output: .../examples/webhook-evidence-demo/output +demo: completed +``` + +Durations after the `ok` lines vary by machine. + +## 3. Inspect The Incident Packet + +The demo writes sanitized output to +`examples/webhook-evidence-demo/output/`: + +```text +incident-report.md +incident-report.json +evidence-manifest.json +verify-output.json +README.md +evidence.tar.gz +``` + +Read the Markdown report first: + +```bash +sed -n '1,180p' examples/webhook-evidence-demo/output/incident-report.md +``` + +Expected result: the report includes summary, event identity, provider +verification, raw capture evidence, route/configuration snapshot, delivery +attempt timeline, retry/DLQ state, replay history, retention state, +audit-chain references, and known gaps/non-claims. + +Verify the generated bundle: + +```bash +go run ./cmd/whcp audit verify-bundle --file examples/webhook-evidence-demo/output/evidence.tar.gz +``` + +Expected result: + +```json +{"valid":true,"manifest_sha256":"sha256:...","checked_files":4,"checked_chain_entries":0,"failures":null} +``` + +`verify-output.json` records the same local verification result from the demo +run. A successful run has `result.valid: true`. + +## 4. What This Proves + +- Webhookery accepts the synthetic Stripe-style event only after durable local + evidence writes. +- Raw capture evidence is represented by IDs and hashes instead of raw payload + bodies in the incident packet. +- Invalid signatures are persisted as evidence and not routed. +- Delivery failure, DLQ transition, endpoint recovery, replay work, and + successful replay delivery are visible in the incident report. +- The local evidence bundle verifies by manifest and file hashes. +- Retention and export checks preserve metadata and permission boundaries. + +## 5. What This Does Not Prove + +- It does not prove downstream business processing succeeded. +- It does not claim exactly-once delivery or global ordering. +- It does not prove provider-side event completeness. +- It does not certify live Stripe, GitHub, Shopify, Slack, AWS, Vault, or + customer receiver behavior. +- It is not compliance certification, legal evidentiary certification, a + restore drill, or a production deployment review. + +See `docs/security-promise.md` for the canonical promise and non-claims. + +## 6. Optional Live-Provider Proof Guides + +The local evaluator path above uses synthetic provider vectors. For manual +sanitized proof against real provider test flows, use: + +- `docs/live-provider-proof/stripe.md` +- `docs/live-provider-proof/github.md` +- `docs/live-provider-proof/shopify.md` +- `docs/providers/stripe.md` +- `docs/providers/github.md` +- `docs/providers/shopify.md` + +These guides are external/manual proof procedures. They are not provider +certification, do not require committed secrets, and do not replace the local +release gates. + +## 7. Run Release-Candidate Acceptance + +```bash +make rc-check +``` + +Expected result: + +```text +rc-check: release-candidate acceptance checks passed +``` + +When `WEBHOOKERY_TEST_DATABASE_URL` is set, `make rc-check` includes the +DB-backed release-candidate E2E checks. If the variable is not set, the script +prints that those DB-backed checks were skipped. + +## 8. Optional Local API Smoke + +Start the local API stack: + +```bash +cp .env.example .env +docker compose up --build +``` + +In another shell: + +```bash +curl -fsS http://localhost:8080/readyz +export WEBHOOKERY_API_KEY=dev-bootstrap-key +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected result: + +- `/readyz` returns success. +- `whcp events list` returns JSON. +- `whcp audit verify-chain` returns a JSON verification result. + +The bootstrap key is for local development only. Do not use it for production +or production-like evaluation. + +## 9. Review Before Production Decisions + +- `docs/security-promise.md` +- `docs/why-webhookery.md` +- `docs/provider-conformance.md` +- `docs/provider-proof-manifest.json` +- `docs/evidence-bundle-profiles.md` +- `docs/release-evidence-template.md` +- `docs/stability.md` +- `docs/operations.md` +- `docs/day-2-operations.md` +- `COMMERCIAL.md` + +## Troubleshooting + +If the demo says `WEBHOOKERY_TEST_DATABASE_URL is required`, start PostgreSQL +with Docker Compose and export the variable exactly as shown above. + +If the demo cannot connect to `localhost:5432`, confirm that your Compose +PostgreSQL service publishes port `5432`: + +```bash +docker compose ps postgres +``` + +If PostgreSQL is already running on another port, update the URL before running +the demo: + +```bash +export WEBHOOKERY_TEST_DATABASE_URL='postgres://USER:PASSWORD@HOST:PORT/DATABASE?sslmode=disable' +``` + +If the output directory check fails, make sure `WEBHOOKERY_DEMO_OUTPUT_DIR` +points inside the repository and does not resolve through a symlink outside the +repository. + +Do not paste production database URLs, provider secrets, raw payload bodies, or +generated evidence bundles into issues, support requests, screenshots, or demo +recordings. + +## Cleanup + +```bash +docker compose down --remove-orphans +``` + +If you created a disposable database outside Docker Compose, drop that database +using your normal local PostgreSQL tooling after the evaluation. diff --git a/docs/evidence-bundle-profiles.md b/docs/evidence-bundle-profiles.md new file mode 100644 index 0000000..a8099a9 --- /dev/null +++ b/docs/evidence-bundle-profiles.md @@ -0,0 +1,64 @@ +# Evidence Bundle Profiles + +This reference defines approved export profiles for common review audiences. +The current implementation exposes export inclusion flags, not a named +`--profile` CLI option. Use these profiles as policy labels when choosing +`whcp audit export` flags, reviewing incident exports, and deciding whether a +bundle is safe to share. + +Raw payload bodies are never part of a default profile. Including raw payloads +or normalized/delivery payload bodies requires elevated permission, a reason, +and a separate review before sharing. + +## Profile Matrix + +| Profile | Audience | Include | Exclude by default | Review rule | +|---------|----------|---------|--------------------|-------------| +| `minimal-hash-proof` | External reviewer who only needs bundle integrity proof. | `manifest.json`, file hashes, audit-chain proof when present, non-claims. | Timelines, raw payload bodies, normalized payload bodies, provider response bodies. | Safe starting point for public examples after path and identifier review. | +| `customer-support` | Customer support or customer-facing incident handoff. | Incident report, event identity, verification status, delivery/replay timeline, hashes, redacted errors, non-claims. | Raw payload bodies, secrets, signatures, bearer tokens, provider credentials, private endpoint URLs. | Share only after support owner confirms identifiers and messages are sanitized. | +| `commercial-evaluation` | Paid evaluator or production-readiness reviewer. | Incident report, manifest, timelines, audit proof, provider conformance references, pilot evidence checklist, accepted risks. | Raw bodies unless explicitly approved in private scope. | Attach to `docs/pilot-evidence-template.md` and keep completed evidence outside public source. | +| `security-review` | Security reviewer under private review scope. | Manifest, audit events, audit-chain proof, config evidence, timelines, redaction policy, relevant incident report. | Secrets, bearer tokens, private keys, plaintext webhook secrets, unnecessary PII. | May include sensitive metadata only under the private review scope in `docs/security-review-package.md`. | +| `internal-forensics` | Internal SRE/security investigation. | Full manifest, timelines, audit events, chain proof, incident report, reconciliation evidence, hashes. | Raw bodies unless the investigator has `events:raw` and a recorded reason. | Keep in restricted storage; do not forward as a support artifact without downscoping. | + +## CLI Flag Mapping + +Use the current flags to approximate a profile: + +| Profile | Example flags | +|---------|---------------| +| `minimal-hash-proof` | `whcp audit export --reason "hash proof for review"` | +| `customer-support` | `whcp audit export --include-timelines --reason "customer support incident handoff"` | +| `commercial-evaluation` | `whcp audit export --include-timelines --reason "commercial evaluation evidence"` | +| `security-review` | `whcp audit export --include-timelines --reason "security review evidence"` | +| `internal-forensics` | `whcp audit export --include-timelines --reason "internal incident investigation"` | + +Only add `--include-raw` or `--include-payloads` when the actor has the +required raw-payload permission, the reason is specific, and the destination is +private. Those flags can expose sensitive payload data and should not be used +for customer-support or public examples by default. + +## Incident Exports + +Incident evidence exports should default to the `customer-support` shape: + +- include `incident_report.json` and `incident_report.md`; +- include timelines, manifest, hashes, and audit references; +- include non-claims from `docs/security-promise.md`; and +- omit raw payload bodies, webhook secrets, signatures, bearer tokens, + private keys, provider credentials, and endpoint secrets. + +If a reviewer needs more than the customer-support shape, create a private +review scope first and record the reason in the export request. + +## Verification + +Every shared bundle should be verified locally before handoff: + +```bash +go run ./cmd/whcp audit verify-bundle --file evidence.tar.gz +``` + +Expected result: verification returns `valid: true`. A valid bundle proves the +local manifest and file hashes are consistent; it does not prove provider-side +event completeness, downstream business success, legal admissibility, or +exactly-once delivery. diff --git a/docs/external-review-accepted-risks.md b/docs/external-review-accepted-risks.md new file mode 100644 index 0000000..0f46eaf --- /dev/null +++ b/docs/external-review-accepted-risks.md @@ -0,0 +1,30 @@ +# External Review Accepted Risks + +This file tracks accepted risks that affect production-maturity claims. It is a +template and current registry. Do not include exploit payloads, raw payload +bodies, secrets, private keys, bearer/session tokens, provider credentials, raw +signatures, customer data, or database URLs with real credentials. + +Release evidence must copy relevant rows into the release-specific evidence +package. A risk with missing owner, expiry, mitigation, or release decision is +not accepted. + +## Current Registry + +| ID | Source | Severity | Risk | Owner | Expiry | Mitigation | Release decision | +|----|--------|----------|------|-------|--------|------------|------------------| +| _none_ | | | | | | | | + +## Status Values + +- `pass`: reviewed and closed. +- `fail`: unresolved and release-blocking. +- `blocked`: review cannot complete because evidence or access is missing. +- `skipped`: only allowed when copied into release evidence as accepted risk. +- `accepted_risk`: owner, expiry, mitigation, and decision are recorded. + +## Non-Certification Boundary + +Accepted risk tracking does not make Webhookery compliance-certified, legally +evidentiary-certified, or externally timestamped. It only records release +decisions for controlled self-hosted adoption. diff --git a/docs/external-review-findings-template.md b/docs/external-review-findings-template.md new file mode 100644 index 0000000..d4073ad --- /dev/null +++ b/docs/external-review-findings-template.md @@ -0,0 +1,44 @@ +# External Review Findings Template + +Use this template to track findings from an external security or production- +maturity review. Keep sensitive proof material outside public source control. +Do not include secrets, raw payload bodies, customer data, exploit payloads, +private keys, bearer/session tokens, provider credentials, raw signatures, or +database URLs with real credentials. + +## Review + +- Review name: +- Reviewer: +- Commit or tag: +- Report date: +- Evidence package: +- Overall status: `pass|fail|blocked|accepted_risk` + +## Findings + +| ID | Severity | Title | Affected area | Owner | Status | Release blocking | Target date | +|----|----------|-------|---------------|-------|--------|------------------|-------------| +| EXT-001 | | | | | `open|fixed|accepted_risk|wontfix` | `yes|no` | | + +## Finding Detail + +### EXT-001 + +- Severity: +- Affected files or endpoints: +- Reproduction summary: +- Evidence location: +- Risk: +- Fix or mitigation: +- Tests or checks: +- Release decision: + +## Closure + +- Fixed findings: +- Accepted risks: +- Deferred non-blockers: +- Release-blocking open findings: +- Reviewer sign-off: +- Maintainer sign-off: diff --git a/docs/external-review-package.md b/docs/external-review-package.md new file mode 100644 index 0000000..18cc8ec --- /dev/null +++ b/docs/external-review-package.md @@ -0,0 +1,54 @@ +# External Review Package + +This document is the public index for external review material. It does not +replace a paid or private security review, and it must not contain secrets, +raw payloads, exploit payloads, customer data, provider credentials, private +keys, bearer tokens, session cookies, or database URLs with passwords. + +## Review Inputs + +| Artifact | Purpose | +| --- | --- | +| `README.md` | Product summary and evaluator routing. | +| `docs/security-promise.md` | Core promise, security invariants, and non-claims. | +| `docs/feature-behavior.md` | Implemented behavior summary. | +| `docs/provider-conformance.md` | Dated provider support and limitations. | +| `docs/release-evidence-template.md` | Required release evidence fields and gates. | +| `docs/release-evidence-sample.md` | Public example of a completed release packet. | +| `docs/external-review-scope.md` | Scope template for independent review. | +| `docs/external-review-findings-template.md` | Finding tracker template. | +| `docs/external-review-accepted-risks.md` | Public sanitized accepted-risk registry. | +| `docs/articles/webhook-security-review-checklist.md` | Security-review checklist for SaaS reviewers. | + +## Review Questions + +- Does Webhookery preserve raw evidence before trust? +- Can the reviewer reproduce durable capture and invalid-signature rejection + with local fakes? +- Are tenant boundaries explicit in code, API, docs, and tests? +- Are secrets redacted from logs, errors, CLI output, UI, docs, and release + artifacts? +- Are replay, retention, audit export, audit-chain verification, reconciliation, + and signal egress claims supported by repository evidence? +- Are non-claims preserved in public docs and release notes? + +## Review Outputs + +External review output should be tracked in a sanitized package: + +- review scope +- reviewed commit/tag +- findings +- fixed findings +- accepted risks with owner, expiry, and mitigation +- release-blocking decision +- production-maturity language review + +Private exploit details, credentials, customer evidence, and raw payloads should +remain outside public source control. + +## Release Impact + +Broad production-maturity language is blocked unless critical/high findings are +fixed or explicitly accepted with owner, expiry, mitigation, and release +decision. diff --git a/docs/external-review-scope.md b/docs/external-review-scope.md new file mode 100644 index 0000000..4181c3e --- /dev/null +++ b/docs/external-review-scope.md @@ -0,0 +1,80 @@ +# External Review Scope Template + +Use one completed copy per external security or production-maturity review. +Do not include real API keys, webhook secrets, bearer tokens, session cookies, +private keys, provider credentials, raw payload bodies, raw signatures, +database URLs with real credentials, customer data, exploit payloads, or +unredacted logs in public review packages. + +## Review Identity + +- Review name: +- Reviewer organization: +- Review owner: +- Start date: +- End date: +- Commit or tag: +- Deployment profile reviewed: +- Evidence package location: +- Status: `planned|in_progress|complete|accepted_risk|blocked` + +## Scope + +Include: + +- API, worker, scheduler, migration, CLI, and minimal UI code. +- PostgreSQL migrations and store methods. +- Provider verification, normalization, reconciliation, and recovery paths. +- Outbound delivery, notification, and SIEM egress. +- Authentication, sessions, producer OAuth, producer mTLS, OIDC, SCIM, RBAC, + ABAC, and API-key behavior. +- Secret custody modes: local, Vault Transit, and AWS KMS envelope encryption. +- Raw payload/object storage, retention, audit exports, audit chains, anchors, + and bundle verification. +- Docker Compose, Dockerfile, Kubernetes, Helm, Terraform, CI, release, and + evidence scripts. + +Exclude unless explicitly contracted: + +- Live provider accounts or customer receivers. +- Legal evidentiary certification. +- Compliance certification. +- External timestamping services. +- Multi-region active-active deployment. +- SAML, HSM/PKCS#11, marketplace plugins, Kafka/NATS backends, or vendor- + specific notification apps. + +## Review Questions + +- Can Webhookery return success before durable capture under any configured + mode? +- Can raw bytes be mutated before provider signature verification? +- Can unverified provider payloads route by default? +- Can one tenant read, export, replay, mutate, or infer another tenant's data? +- Can secrets, raw payloads, signatures, tokens, URL credentials, or private + key material leak through logs, errors, UI, CLI, metrics, exports, backups, + or CI artifacts? +- Can SSRF controls be bypassed at endpoint create/test/delivery time? +- Can replay, DLQ release, retention, reconciliation, or audit export be abused + without authorization and audit evidence? +- Can audit-chain or bundle tampering be detected? +- Are migration, restore, and rollback boundaries explicit and rehearsable? + +## Required Evidence + +- `make finalize` output. +- `make release-acceptance` output. +- `make rc-check` output with and without `WEBHOOKERY_TEST_DATABASE_URL` where + feasible. +- `make perf-smoke` output from disposable local PostgreSQL. +- `make provider-conformance-check` output. +- Backup/restore drill output when persistence or evidence behavior changed. +- SBOM, vulnerability, gosec, Trivy, Docker build, and OpenAPI/SDK checks. +- Production doctor output with secrets redacted. +- Branch protection status or accepted-risk record. + +## Exit Criteria + +Broad production-maturity language is allowed only when findings are fixed or +recorded in `docs/external-review-accepted-risks.md` with owner, severity, +expiry, mitigation, and release-blocking decision. diff --git a/docs/failure-drills.md b/docs/failure-drills.md new file mode 100644 index 0000000..f38f3ff --- /dev/null +++ b/docs/failure-drills.md @@ -0,0 +1,67 @@ +# Failure Drills + +This runbook defines safe local and pilot failure drills for Webhookery. The +goal is repeated evidence, not chaos testing. Run destructive drills only +against disposable local or pilot-approved resources. + +The helper script can list drills, write a sanitized plan, or run the local +evidence demo drill: + +```bash +scripts/failure_drills.sh list +scripts/failure_drills.sh plan --output tmp/failure-drills +WEBHOOKERY_TEST_DATABASE_URL=postgres://... scripts/failure_drills.sh run local-demo +``` + +`plan` is non-live and safe for documentation/release checks. `run local-demo` +requires the same disposable PostgreSQL setup as `docs/evaluator-quickstart.md` +and reuses `examples/webhook-evidence-demo/run.sh`. + +## Drill Catalog + +| Drill | Expected result | Evidence | +|-------|-----------------|----------| +| downstream receiver fails | A valid synthetic event is captured, downstream delivery fails, and failure is visible before replay. | Incident report delivery timeline. | +| downstream recovers | Replay succeeds after receiver recovery. | Incident report replay section and `verify-output.json`. | +| invalid signature | Invalid provider signature is persisted as evidence and not routed to side-effecting destinations. | Local E2E output and quarantine evidence. | +| replay after DLQ | DLQ release or replay creates new work with reason-code evidence. | Incident report replay and DLQ sections. | +| PostgreSQL unavailable before capture | Ingress does not return success before durable capture is available. | Readiness/API error evidence from disposable stack. | +| object storage unavailable in S3 mode | Object-backed raw payload capture is not acknowledged when required object writes fail. | Storage drill notes and redacted API output. | +| audit-chain verification failure | Verification reports a failure in a disposable altered database copy. | `whcp audit verify-chain` output from the copy. | +| retention raw-payload tombstone | Raw body reads show retained/tombstoned state while metadata remains queryable. | Retention run, event timeline, and audit entries. | + +## Restore Drill + +Use `make restore-drill` or the script directly when a source database and a +separate disposable restore target are available: + +```bash +WEBHOOKERY_DATABASE_URL=postgres://source \ +WEBHOOKERY_RESTORE_DRILL_DATABASE_URL=postgres://disposable-restore \ +make restore-drill +``` + +The restore target is destructive. The script refuses to run when the restore +URL is missing or equal to the source URL. It writes +`tmp/restore-drill/restore-drill.json` without database URLs. + +PostgreSQL restore drills do not verify S3 or MinIO object bodies. If +`WEBHOOKERY_RAW_STORAGE_MODE=s3` is in scope, record a separate object-storage +read/write drill in `docs/pilot-evidence-template.md`. + +## Recording Results + +For pilot evidence, record: + +- drill name and date; +- Webhookery version or commit; +- source of synthetic or sanitized event; +- incident ID, event ID, and evidence export ID; +- bundle verification result; +- raw payload inclusion status; +- known gaps and accepted risks; and +- follow-up decision from `docs/pilot-review-checklist.md`. + +Do not store database URLs, provider credentials, webhook secrets, raw +signatures, raw payload bodies, customer data, or private receiver URLs in +public drill output. diff --git a/docs/feature-behavior.md b/docs/feature-behavior.md new file mode 100644 index 0000000..9607b4d --- /dev/null +++ b/docs/feature-behavior.md @@ -0,0 +1,454 @@ +# Feature Behavior Reference + +This reference summarizes implemented platform behavior that is too dense for +the operations runbook. It is for maintainers, API reviewers, security +reviewers, and operators who need to understand behavior boundaries before +changing code, contracts, migrations, or runbooks. + +Canonical sources still win for exact behavior: + +- `openapi.yaml` for API routes, schemas, auth, examples, and status codes. +- `migrations/` for persistence shape. +- `cmd/`, `internal/`, and `pkg/` for implementation. +- `docs/operations.md` for incident and recovery runbooks. +- `docs/configuration.md` for environment variables and secret handling. + +Provider-specific behavior changes over time. Before changing provider +semantics for Stripe, GitHub, Shopify, Slack, CloudEvents, or SSRF guidance, +check current official upstream documentation and record the freshness context +using `docs/documentation-maintenance.md`. + +## Inbound Capture And Acknowledgement + +Inbound provider endpoints may return success only after these records are +durably committed: + +- raw body bytes or raw body object metadata +- raw headers and request metadata +- source identity +- verification result +- event metadata +- dedupe result +- durable outbox work + +Inbound 2xx never means downstream business processing succeeded. Unverified +events may be retained as evidence, but they must not route to side-effecting +destinations unless an explicit unsafe policy is implemented and audited. + +Request guardrails are enforced before capture: + +- body limit: 2 MiB +- maximum header pairs: 128 +- maximum total header name/value bytes: 64 KiB +- maximum single header value: 8 KiB + +Provider adapters verify exact raw bytes. Provider IP allowlists are not a +substitute for cryptographic verification. + +## Authentication And Authorization + +Normal management access uses database-backed API keys. Rows store token +hashes, key prefixes, last four characters, scopes, state, and membership +linkage, not plaintext keys. + +Authorization requires both: + +- tenant membership and role +- API key scope or session permission for the requested action + +The bootstrap API key hash is a recovery and initial setup mechanism. Remove, +rotate, or restrict it after creating database-backed owner or security keys. + +Every primary resource is tenant-scoped. List, read, update, delete, replay, +export, and admin-scope paths must include tenant-aware authorization checks. + +API problem responses include a legacy `code` and a namespaced `stable_code` +for client handling. `docs/error-codes.md` lists the major stable codes. + +## Sources, Endpoints, Routes, And Subscriptions + +Sources, endpoints, subscriptions, routes, retry policies, event types, schemas, +transformations, and adapter versions retain reproducibility evidence when +their configuration affects routing or replay. + +Important lifecycle behavior: + +| Resource | Mutation behavior | Evidence preserved | +|----------|-------------------|--------------------| +| Source | Disable instead of hard-deleting historical receipts. Secret rotation creates active and previous versions with bounded grace. | Events, receipts, raw payload metadata, source versions, audit rows. | +| Endpoint | URL updates rerun SSRF policy before commit. Delete disables future delivery. | Historical deliveries, attempts, payload snapshots, signing metadata, audit rows. | +| Subscription | Updates write immutable subscription versions. Delete disables future fanout. | Subscription versions, delivery decisions, audit rows. | +| Route | Updates check source and endpoint references in the same tenant and write immutable route versions. | Route versions, delivery decisions, replay receipts, audit rows. | +| Retry policy | Updates create new policy versions. Delete disables future use. | Delivery references, retry evidence, audit rows. | +| Event schema | Schema bodies and versions are immutable. State changes are versioned and audited. | Schema versions, validation evidence, replay evidence, audit rows. | + +Deletes are generally disabling operations. They must not erase evidence needed +to understand prior capture, routing, delivery, replay, or audit decisions. + +## Delivery, Retry, Replay, And DLQ + +The worker claims durable outbox rows with PostgreSQL leases, evaluates active +subscriptions and routes, creates delivery jobs, then claims scheduled +deliveries. Delivery attempts are signed, recorded, retried on retryable +failures, and moved to dead letter after terminal failure. + +Fairness rules are implemented in PostgreSQL claim ordering: + +- live route work before replay and reconciliation work +- live due deliveries before replay deliveries +- tenant round-robin within priority classes + +Routes and subscriptions are snapshotted through `route_versions` and +`subscription_versions`. Delivery evidence records the selected version IDs. + +Default retry behavior remains 12 attempts over a 72-hour maximum with +full-jitter exponential backoff between 10 seconds and 6 hours when no retry +policy is selected. Deliveries store `retry_seed`; retryable attempts record +the deterministic jitter delay and `next_retry_at`. + +Replay creates new delivery work linked to the original event or delivery. It +does not mutate original receipt or delivery history. + +Replay modes: + +- `config_mode=current`: evaluate current active subscriptions and routes. +- `config_mode=original`: clone recorded non-replay delivery decisions and + preserve route, subscription, retry policy, and payload evidence where + available. + +Replay jobs may be paused, resumed, canceled, rate-limited, or created with +`require_approval=true`. Approval records approver metadata and a chained audit +event. Pending approval jobs default to a 24-hour approval window unless +`approval_expires_at` is provided, and approval must come from a different +authorized actor than the creator. Canceling a pending job is the deny path. +Active replay approval policies can also require approval for tenant, source, +or route scopes. Matching policies are evaluated route first, then source, then +tenant, and they make replay jobs pending approval before delivery work is +enqueued. Disabling a policy stops automatic gating for future non-sensitive +replays; existing pending jobs keep their recorded approval requirement. +Replay creation, preview, and dry-run requests require both a structured +`reason_code` and a free-text `reason`. Implemented reason codes are `receiver_fixed`, +`provider_reconciliation`, `operator_requested`, `support_investigation`, +`customer_dispute`, `test_drill`, and `incident_recovery`. Replay job rows, +scope JSON, audit evidence, event timelines, and incident reports preserve the +reason code, free-text reason, replay mode, actor, and selected event or +delivery scope. + +`POST /v1/replay-jobs/preview` is the preferred replay preview route. +`POST /v1/replay-jobs:dry-run` remains a compatibility alias. Both return +planning output without creating delivery jobs. + +`GET /v1/events/{event_id}/timeline` returns `webhookery.event_timeline.v1` +entries for the shared investigation surface. Timeline rows include received +events, provider receipts, raw payload metadata, normalized envelope evidence, +delivery decisions, delivery payload hashes, delivery attempts, reconciliation, +replay, and audit references when present. `whcp events timeline` renders the +same response as `json`, `table`, or `markdown`. + +Dead-letter entries can be released individually or in bounded batches with an +operator reason code and reason because release creates replay work. + +## Provider Reconciliation + +Provider reconciliation jobs compare provider-side API evidence to local +Webhookery evidence when provider APIs permit it. + +Provider API credentials are stored through the same envelope encryption +interface used for webhook and endpoint secrets. API and CLI responses expose +only redacted credential metadata. + +Implemented reconciliation outcomes include: + +- `matched` +- `missing` +- `captured` +- `redelivery_requested` +- `unrecoverable` +- `failed` + +Recovered events use `verification_reason=provider_api_reconciliation`. They +are not marked as signed webhook deliveries and route only when +`route_recovered=true`. + +Provider API call evidence records request method, redacted request URL, +response status, response hash, response size, storage status, and optional +response body. Provider API response bodies are sensitive payload data and +require raw payload permissions when included in exports. Provider tokens must +not appear in logs, UI tables, audit metadata, or export metadata. + +## Normalization, Transformations, And Schemas + +Verified inbound events are normalized after raw body capture and provider +verification. Raw payloads remain authoritative. Unverified requests do not +produce routed normalized payloads by default. + +Normalized metadata is available with `events:read`; normalized body data and +raw payload body access require elevated raw-payload permission and emit audit +events. + +Custom adapter governance is tenant-scoped. Adapter definitions and versions +are hashed, versioned, audited, and moved through approval states before +activation. Active declarative HMAC-SHA256 adapters can verify inbound requests +using exact raw bytes, configured signature/timestamp headers, and replay +windows. Webhookery records code-plugin package metadata for review, but it +does not execute custom plugin code in this slice. + +Transformations are immutable, declarative, tenant-scoped versions. Implemented +operations are JSON Pointer based only: + +- `set` +- `copy` +- `drop` +- `redact` + +Transformations cannot change provider evidence, verification fields, +tenant/source identifiers, hashes, or audit metadata. There is no arbitrary +scripting, network access, plugin marketplace, or custom runtime. + +New delivery work snapshots exact outbound bytes into `delivery_payloads` +before delivery becomes claimable. Workers deliver and sign stored bytes. + +Event schemas support a conservative JSON Schema subset: + +- `type` +- `required` +- object `properties` +- array `items` + +Compatibility checks reject newly required fields, removed existing properties, +and changed property types. Unsupported advanced JSON Schema features are not +treated as compatibility proof. + +## Event Search + +Event search is tenant-scoped and returns event metadata only. It does not +return raw payload bodies by default. + +Forensic filters include: + +- provider name; +- provider event ID as `external_id`; +- Webhookery delivery ID; +- DLQ status preset; +- provider signature verification result; +- received-after timestamp; and +- route ID linked through delivery evidence. + +Results include event IDs, source/provider metadata, raw payload IDs, raw +payload hashes, verification metadata, dedupe status, receipt time, and trace +ID. Raw payload body export remains a separate elevated `events:raw` operation +with an operator reason. + +## Incident Packets And Reports + +Incidents are tenant-scoped investigation records. Operators can create an +incident, attach captured events, generate a report snapshot, and create an +incident evidence export. Event attachment validates the incident and event in +the actor tenant before writing the link. + +Incident report snapshots are generated from existing event metadata and event +timeline entries. The report includes: + +- incident title, reason, state, creator, and timestamps +- event identity, provider, event type, provider event ID, source ID, and + received time +- provider verification result, verification reason, and dedupe status +- raw payload ID and raw payload hash +- route, subscription, delivery, retry, DLQ, replay, retention, and audit + timeline references when those entries exist +- explicit non-claims that inbound capture is not downstream business success, + delivery is at-least-once, and local evidence is not provider-side + completeness + +Reports and incident evidence exports omit raw payload bodies, webhook secrets, +signatures, bearer tokens, private keys, and provider credentials by default. +Generated reports are auditable through `incident_report.generated`; incident +evidence exports write `incident_evidence_export.created` and include +`incident_report.json`, `incident_report.md`, timeline evidence, and bundle +hashes. + +## Retention, Raw Payloads, And Exports + +Raw payload retrieval is elevated and audited. Operators should keep raw +retention shorter than metadata retention when payloads may contain personal +data. + +If retention deletes a raw body or object, body reads return HTTP 410. Event, +receipt, delivery, hash, storage metadata, and audit evidence remain queryable. + +Retention policy resource types: + +- `raw_payload` +- `normalized_envelope_data` +- `delivery_payload` +- `provider_api_evidence` +- `audit_event` + +The worker applies retention in bounded batches and records `retention_runs` +and `retention_run_items`. Policy changes and completed runs write chained +audit events. Legal hold pauses policy execution while preserving visibility +and auditability. + +Evidence exports are tenant-scoped `tar.gz` bundles. They include: + +- `manifest.json` +- `audit_events.jsonl` +- `payload_evidence.jsonl` +- `audit_chain_proof.jsonl` +- optional `timelines.jsonl` +- optional `raw_payloads.jsonl` +- `reconciliation_evidence.jsonl` + +`manifest.json` is versioned as `webhookery.evidence_bundle.v1`. It includes +the generated time, bundle ID, tenant ID hash, included event IDs, included +incident IDs when applicable, file hashes, audit-chain references where +available, redaction policy, and explicit non-claims. It does not serialize the +raw tenant ID. + +`docs/evidence-bundle-profiles.md` defines approved sharing profiles for common +audiences. The implemented export API uses inclusion flags; profile names are +operator policy labels unless a future contract adds a first-class profile +field. + +Raw payload bodies and payload-body exports require both audit read permission +and raw payload permission. Export creation verifies chain proof before marking +the export ready. `whcp audit verify-bundle --file evidence.tar.gz` checks tar +entry safety, manifest schema version, file hashes, manifest hash references, +and audit-chain continuity. + +## Audit Chain + +Implemented API, CLI, worker, retention, replay, export, reconciliation, and +configuration paths append audit events to a tenant-scoped SHA-256 chain in the +same transaction as the audit row. + +Chain entries store: + +- audit event hash +- previous chain hash +- current chain hash +- canonicalization version +- source +- state +- tombstone metadata + +Backfill is explicit and bounded. It processes deterministic per-tenant batches +ordered by `occurred_at, id`. Backfilled chains prove continuity from the +current database state; they cannot prove history from before the chain feature +existed. + +Audit-event retention marks chain entries as retained tombstones before +deleting audit rows. Verification treats retained entries as hash-only +evidence. Missing non-retained audit rows or mismatched hashes are failures. + +## Metrics, Readiness, Alerts, And Signal Egress + +`/readyz` checks PostgreSQL. `/metrics` exposes aggregate Prometheus text +metrics without tenant labels. + +Authenticated tenant-scoped operations APIs expose: + +- `/v1/ops/metrics` +- `/v1/ops/metrics/rollups` +- `/v1/ops/storage` +- `/v1/ops/config` +- `/v1/ops/workers` +- `/v1/ops/queues` +- `/v1/alerts` +- `/v1/alert-firings` + +Runtime config responses expose only safe metadata such as environment, UI +state, raw storage mode, secret box mode, and request limits. They must not +expose payload bodies, endpoint URLs, database URLs, object-store credentials, +API keys, webhook secrets, master keys, Vault tokens, or tenant labels on +public metrics. + +Alert notification channels and SIEM sinks are generic signed HTTPS receivers. +Their URLs use the same SSRF protections as customer webhook endpoints. + +Notification payloads contain alert metadata only. SIEM payloads contain +chained audit-event metadata only. Neither includes raw webhook bodies, +provider headers, API keys, bearer tokens, endpoint credentials, channel +secrets, sink secrets, or egress secrets. + +SIEM cursors advance only after signed HTTPS delivery succeeds. Failed +deliveries retry from PostgreSQL state and leave the cursor unchanged. + +## Enterprise Identity And Access + +Management API and UI access can use API keys or OIDC-backed sessions. API keys +remain the bootstrap and break-glass path. + +The minimal operator UI is API-backed and keeps API keys in memory only. It can +list and search event metadata, open event timelines, view normalized event +metadata, list incidents, and load incident reports as JSON or Markdown text. +It does not display raw payload bodies by default; raw payload access remains a +separate elevated API/CLI operation. + +OIDC identity providers are tenant-scoped and support Authorization Code + +PKCE. The callback validates state, nonce, issuer, audience/client ID, expiry, +and signed ID token before creating a hashed session cookie. Session cookies +are HttpOnly, SameSite=Lax, and marked Secure. Logout revokes the server-side +session hash. + +Disabling an identity provider revokes active sessions created through that +provider. Session token hashes are never returned. + +When the API is behind a trusted reverse proxy, set +`WEBHOOKERY_TRUSTED_PROXY_CIDRS` to a comma-separated CIDR allowlist for the +immediate proxy peers. Only then is the first `X-Forwarded-For` address used +for session metadata. Invalid or untrusted forwarded values fall back to +`RemoteAddr`. + +SCIM bearer tokens are returned exactly once and stored only as SHA-256 hashes +with prefix and last-four metadata. SCIM delete deactivates users or groups +instead of hard-deleting them. + +Resource-aware role bindings and access policy rules can scope decisions by +principal, resource family, resource ID, and environment. Deny rules take +precedence in explain output. + +Emergency recovery remains API-key based. Keep a tightly controlled owner or +security-capable bootstrap/recovery key, rotate it after use, and audit every +identity or access-control change. + +## Enterprise Producer Trust + +Product-event ingestion at `POST /v1/events` accepts: + +- API keys with `events:write` +- OAuth client-credentials bearer tokens +- verified producer mTLS identities + +Producer credentials can be source-bound. When a credential has `source_id`, +the submitted event body must contain the same `source_id` or ingestion is +denied before the event service is called. + +Producer OAuth client secrets are generated once, returned only in create or +rotate responses, and stored as SHA-256 hashes. Access tokens are opaque bearer +values, stored hashed, have no refresh tokens, default to 15 minutes, and may +not exceed one hour. + +Producer mTLS identities store public certificate metadata only: SHA-256 +fingerprint, subject/SAN metadata, validity timestamps, state, and optional +source binding. Private keys are never submitted or persisted. This slice does +not trust proxy-supplied mTLS or authentication identity headers. + +## SSRF Protection + +Customer endpoint URLs are hostile input. Endpoint creation, endpoint updates, +endpoint test sends, delivery attempts, alert notification channels, and SIEM +sinks must use SSRF-safe URL handling. + +Default production policy: + +- require HTTPS +- reject embedded credentials +- reject private, loopback, link-local, multicast, reserved, and cloud metadata + destinations +- resolve hostnames at validation time and delivery time +- revalidate redirects when redirects are ever explicitly allowed +- fail closed on invalid URL, DNS, TLS, or destination checks + +Do not validate endpoint URLs with ad hoc regular expressions. Use the +implemented SSRF package and its tests. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..6562741 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,113 @@ +# Webhookery Documentation Map + +Use this map to find the canonical document for a task. Prefer editing the +owner document for a topic and linking to it from secondary docs. + +| Document | Audience | Purpose | Source-of-truth boundary | +|----------|----------|---------|--------------------------| +| `README.md` | New readers, evaluators, developers | Product framing, current implementation status, local quickstart, and first smoke paths. | Entry point only. Do not maintain long command catalogs, route catalogs, or deployment runbooks here. | +| `site/index.html` | Evaluators, commercial buyers | Static product landing page for Webhookery positioning, quickstart CTA, commercial path, and non-goals. | Public landing surface. Keep operational detail in docs. | +| `AGENTS.md` | Coding agents and maintainers | Repository operating rules, implementation loop, security classification, and validation expectations. | Agent guidance only. It must reflect current repo evidence. | +| `.initial_design.md` | Maintainers, architects, agents | Historical design input, product framing, architecture rationale, and intended direction. | Not proof of implemented behavior. Current code, contracts, migrations, and maintained docs override it. | +| `openapi.yaml` | API consumers, SDK maintainers, reviewers | Canonical REST API contract. | API paths, schemas, status codes, auth schemes, and examples. | +| `sdk/openapi.yaml` | SDK maintainers | SDK-ready OpenAPI copy. | Derived from `openapi.yaml`; keep aligned with `make sdk-generate` and `make sdk-check`. | +| `docs/reference/openapi.md` | API consumers, SDK maintainers, reviewers | Rendered OpenAPI reference pointers, operation count, and generation rule. | Derived documentation. Regenerate with `make openapi-reference-generate` after `openapi.yaml` changes. | +| `docs/openapi/index.html` | API consumers, evaluators | Static rendered API reference generated from `openapi.yaml`. | Derived documentation. Do not edit manually. | +| `docs/reference/api-contract-matrix.md` | API consumers, SDK maintainers, reviewers | Generated operation matrix with method, path, operation ID, auth, request, and response metadata. | Derived documentation. `openapi.yaml` remains canonical. | +| `cmd/`, `internal/`, `pkg/` | Developers, reviewers | Go implementation for processes, app logic, adapters, and public helpers. | Implemented behavior. Docs must not claim behavior not supported by these files. | +| `migrations/` | DB reviewers, operators, developers | PostgreSQL schema evolution. | Database schema history and migration ordering. | +| `docs/schema-migrations.md` | DB reviewers, operators, release managers | Migration runner behavior, ordering, evidence-authority tables, rollback stance, and restore compatibility review. | Human operations guide for schema changes. Exact DDL remains in `migrations/`. | +| `Makefile` | Contributors, CI maintainers, release operators | Project-owned commands and validation gates. | Command names and check composition. Confirm with `make help`. | +| `docs/configuration.md` | Operators, deployment maintainers, contributors | Environment variables, defaults, safe production values, secret sensitivity, and process applicability. | Canonical configuration reference. Keep env examples and deployment profile references aligned here. | +| `docs/operations.md` | Self-hosted operators and SREs | Production doctor, RC checks, backup/restore, incident triage, audit verification, and recovery guidance. | Operator runbooks. Avoid moving API reference or command catalogs back into this file. | +| `docs/evaluator-quickstart.md` | Evaluators | Guided local path from checkout to failed-payment incident packet, bundle verification, and RC checks. | Tutorial. Do not turn it into a full operations guide. | +| `examples/webhook-evidence-demo/` | Evaluators, demo authors | Deterministic local evidence demo and synthetic fixtures. | Demo fixtures only. Do not store real provider/customer data here. | +| `docs/why-webhookery.md` | Evaluators, buyers, maintainers | Product explanation for the evidence-first wedge, fit, non-fit, and investigation surfaces. | Explanation. Keep operational commands in quickstart, CLI, and operations docs. | +| `docs/use-cases/stripe-payment-investigation.md` | Support engineers, SREs, platform teams | Stripe-style payment incident workflow from search to incident report. | Use-case guide. Provider behavior belongs in `docs/providers/stripe.md`. | +| `docs/use-cases/github-automation-webhooks.md` | Platform teams, maintainers | GitHub automation webhook investigation and replay workflow. | Use-case guide. Provider behavior belongs in `docs/providers/github.md`. | +| `docs/use-cases/shopify-order-webhooks.md` | Ecommerce platform teams | Shopify order webhook investigation workflow for controlled pilots. | Use-case guide. Provider behavior belongs in `docs/providers/shopify.md`. | +| `docs/use-cases/internal-integration-replay.md` | Platform teams | Internal producer/receiver replay workflow with incident evidence. | Use-case guide. Replay behavior remains in code, OpenAPI, and feature docs. | +| `docs/demo-media-checklist.md` | Maintainers, marketers, demo authors | Safety checklist for screenshots, GIFs, short videos, and slide material. | Media safety checklist. It is not product behavior documentation. | +| `docs/day-2-operations.md` | Self-hosted operators and SREs | Post-install backup cadence, restore drills, upgrades, incident triage, alert handling, key rotation, retention review, and audit evidence handoff. | Day-2 operating guide. Link to command references instead of duplicating them. | +| `docs/failure-drills.md` | Self-hosted operators, SREs, release managers | Local and pilot failure drills, sanitized plan generation, and restore-drill evidence rules. | Drill runbook. Destructive drills require disposable or pilot-approved resources. | +| `docs/feature-behavior.md` | Maintainers, API reviewers, security reviewers, operators | Implemented behavior reference for capture, auth, routing, delivery, replay, reconciliation, transformations, retention, identity, producer trust, and SSRF. | Behavior summary. Code, OpenAPI, and migrations remain exact. | +| `docs/security-promise.md` | All readers | Durable-capture promise, security invariants, and canonical non-claims. | Canonical non-claims reference. Link here instead of repeating caveat lists. | +| `docs/error-codes.md` | API consumers, SDK maintainers, CLI users, support owners | Stable problem-code reference and CLI rendering behavior. | Error-code reference. Exact response schema remains in `openapi.yaml`. | +| `docs/stability.md` | Release managers, operators, API consumers | Semver, API/CLI compatibility, migration compatibility, support windows, and deprecation rules. | Stability and compatibility policy. Keep release evidence and versioning claims aligned here. | +| `docs/performance-envelope.md` | Operators, release managers, platform teams | Local performance smoke usage, capacity inputs, storage growth, and sizing caveats. | Performance evidence interpretation. It is not an SLA or benchmark certification. | +| `docs/provider-conformance.md` | Release managers, provider-adapter reviewers, security reviewers | Dated provider support matrix, local vector evidence, official-doc source list, and unsupported recovery limits. | Provider conformance evidence. It does not prove live provider completeness. | +| `docs/provider-proof-manifest.json` | Release managers, provider-adapter reviewers, security reviewers | Machine-readable freshness metadata for manual live-provider proof guides. | Proof metadata. It does not store completed live evidence or call providers. | +| `docs/providers/stripe.md` | Operators, evaluators, provider-adapter reviewers | Stripe setup, signature verification, retry context, duplicate handling, replay, incident packets, and non-claims. | Stripe operator guide. Implementation remains in code and OpenAPI; provider behavior must be refreshed through official docs. | +| `docs/providers/github.md` | Operators, evaluators, provider-adapter reviewers | GitHub setup, secret handling, `X-Hub-Signature-256`, `X-GitHub-Delivery`, redelivery, replay, evidence workflow, and non-claims. | GitHub operator guide. Implementation remains in code and OpenAPI; provider behavior must be refreshed through official docs. | +| `docs/providers/shopify.md` | Operators, evaluators, provider-adapter reviewers | Shopify setup, HMAC verification, topic metadata, duplicate handling, replay, incident packets, and topic-specific recovery limits. | Shopify operator guide. Implementation remains in code and OpenAPI; provider behavior must be refreshed through official docs. | +| `docs/live-provider-proof/stripe.md` | Evaluators, commercial operators, release managers | Manual Stripe test-mode proof flow from signed delivery to incident packet. | External/manual proof guide. Completed live proof artifacts stay outside public source. | +| `docs/live-provider-proof/github.md` | Evaluators, commercial operators, release managers | Manual GitHub test-repository proof flow from signed delivery to replay and incident packet. | External/manual proof guide. Completed live proof artifacts stay outside public source. | +| `docs/live-provider-proof/shopify.md` | Evaluators, commercial operators, release managers | Manual Shopify development-store proof flow from signed delivery to replay and incident packet. | External/manual proof guide. Completed live proof artifacts stay outside public source. | +| `docs/live-provider-proof/run-record-template.md` | Evaluators, commercial operators, release managers | Private run-record template for completed live-provider proof evidence. | External/manual evidence template. Completed run records stay outside public source. | +| `docs/live-provider-proof/stripe-redaction-policy.md` | Evaluators, commercial operators, release managers | Redaction rules for public Stripe, GitHub, and Shopify proof samples. | Public-sample policy. It does not authorize sharing private live proof bundles. | +| `docs/observability.md` | Self-hosted operators and platform teams | Public metric names, Prometheus scrape example, alert rule examples, and dashboard starter panels. | Observability examples. Public metrics remain aggregate-only. | +| `docs/documentation-maintenance.md` | Contributors, maintainers, agents | Provider-claim freshness rules, official source registry, and documentation maintenance discipline. | Documentation maintenance policy. | +| `docs/cli.md` | Operators and developers using `whcp` | CLI command reference and moved README command catalog. | Human CLI reference. `cmd/whcp` remains exact behavior. | +| `sdk/README.md` | SDK users and maintainers | Committed SDK artifact guidance. | SDK usage and artifact expectations. | +| `collections/README.md` and `collections/` | API evaluators, operators | Postman and Bruno smoke request usage, local variables, placeholder signatures, and expected smoke responses. | Smoke examples, not full API coverage. | +| `docker-compose.yml` | Local developers, evaluators | Local API, worker, migration, PostgreSQL, and optional MinIO topology. | Local runtime example. Not production deployment guidance. | +| `docs/deployment.md` | Self-hosted operators, platform teams | Common deployment posture for dependencies, TLS/ingress, secret custody, object storage, network policy, readiness, backup/restore, upgrades, and rollback. | Shared production expectations. Profile READMEs own exact profile commands. | +| `docs/pilot-topology.md` | Evaluators, commercial operators, maintainers | Narrow supported pilot topology, operator responsibilities, storage drill expectations, and out-of-scope requests. | Pilot scope boundary. It is not broad production support or managed-service documentation. | +| `docs/pilot-evidence-template.md` | Evaluators, commercial operators, maintainers | Sanitized evidence template for each pilot: topology, providers, failure/replay drill, evidence packet, audit chain, restore drill, gaps, and follow-up. | Pilot evidence template. Do not store sensitive completed evidence in public source. | +| `docs/evidence-bundle-profiles.md` | Operators, support owners, security reviewers, commercial evaluators | Approved evidence bundle profile policy and current CLI flag mapping. | Sharing policy. It does not add a named CLI profile flag. | +| `deploy/kubernetes/`, `deploy/helm/`, `deploy/terraform/` | Platform operators | Profile-specific deployment manifests, chart, and Terraform module. | Deployment profile specifics. Common production posture belongs in shared deployment docs. | +| `docs/security-review-package.md` | Security reviewers | Artifact map, trust boundaries, review controls, and exit criteria. | Security review packet. It should route to canonical implementation and operations docs. | +| `docs/external-review-package.md` | External reviewers, maintainers, release managers | Public index for external review inputs, questions, outputs, and release impact. | External review router. Sensitive review evidence stays outside public source. | +| `docs/external-review-scope.md` | External reviewers, maintainers, release managers | Scope, exclusions, review questions, required evidence, and exit criteria for external maturity review. | Review planning template. Store completed sensitive evidence outside public source. | +| `docs/external-review-findings-template.md` | External reviewers, maintainers, release managers | Finding tracker template with severity, ownership, release-blocking decision, and closure fields. | Finding tracking template. Do not store exploit material or secrets. | +| `docs/external-review-accepted-risks.md` | Maintainers, release managers | Accepted-risk registry and status vocabulary for release decisions. | Public sanitized registry. Release-specific evidence owns exact decision copies. | +| `docs/release-evidence-template.md` | Release managers, security reviewers | Canonical release evidence checklist and template. | Release evidence requirements. Other docs should link here instead of duplicating gates. | +| `docs/reference/release-evidence-index.md` | Release managers, evaluators, security reviewers | Public release artifact map, current release-candidate evidence, and verification notes. | Public release metadata. GitHub Releases remains external source of truth. | +| `docs/reference/release-validation.md` | Release managers, maintainers, security reviewers | Release validation commands, metadata gates, and evidence to record. | Validation guide. Exact command composition remains in `Makefile`. | +| `release/current.json` | Release managers, evaluators, security reviewers | Machine-readable pointer to the current public release candidate and next pilot package. | Public metadata pointer. It is not a release artifact itself. | +| `docs/reference/source-of-truth.md` | Maintainers, reviewers, evaluators | Public source-of-truth map for release, API, workflow, deployment, and documentation artifacts. | Metadata index. It should link to canonical sources instead of replacing them. | +| `docs/release-evidence-sample.md` | Release managers, evaluators, security reviewers | Public example of a completed release evidence packet. | Reader aid only. Keep required fields in `docs/release-evidence-template.md`. | +| `docs/production-rc-checklist.md` | Release managers, operators | Ordered release-candidate readiness checklist for controlled self-hosted adoption. | RC checklist. Link to canonical operations docs instead of duplicating runbooks. | +| `docs/releases/v0.1.0-rc1.md` | Evaluators, release managers, commercial reviewers | First release-candidate notes, implemented behavior, limitations, and validation commands. | Release-specific narrative. Keep canonical release gates in `docs/release-evidence-template.md`. | +| `docs/releases/v0.2.0-pilot.md` | Evaluators, release managers, commercial reviewers | Pilot-readiness checklist, launch blockers, known limitations, and required pre-tag gates. | Pilot checklist. It is not a tagged release note or production certification. | +| `RELEASE_EVIDENCE.md` | Release readers | Short router to the release evidence template. | Current release evidence pointer, not a parallel checklist. | +| `SECURITY.md` | Security researchers | Vulnerability reporting policy and sensitive-data handling. | Reporting process. Keep project architecture details elsewhere. | +| `CONTRIBUTING.md` | Contributors | Contribution policy, checks, and sensitive-data rules. | Contribution entry point. Link to canonical docs for details. | +| `CODE_OF_CONDUCT.md` | Contributors, issue reporters, maintainers | Public conduct baseline and reporting route. | Conduct policy. Security-sensitive reports still go through `SECURITY.md`. | +| `CODEOWNERS` | Maintainers, reviewers | GitHub review ownership hints for sensitive repo areas. | Review routing metadata. It does not replace branch protection. | +| `.github/pull_request_template.md` | Contributors and reviewers | PR checklist for security context, validation, and sensitive-data checks. | GitHub intake metadata. | +| `GOVERNANCE.md` | Maintainers, contributors, commercial users | Decision model, maintainer role, and invariant governance. | Governance policy, not operations reference. | +| `SUPPORT.md` | Users and customers | Public and private support paths. | Support policy and sensitive-data warning. | +| `COMMERCIAL.md` | Commercial users | AGPL and commercial licensing boundary. | Business and licensing information. | +| `docs/commercial-evaluation.md` | Commercial evaluators | Evaluation path, starting ranges, required inputs, and safe information boundaries. | Commercial evaluation guide. It is not legal advice. | +| `docs/production-readiness-review.md` | Commercial evaluators, operators | Paid production-readiness review scope, inputs, outputs, and limits. | Review-offer guide. It is not certification. | +| `docs/support-packages.md` | Users and customers | Support options, starting ranges, request quality, and non-claims. | Support package guide. Contract terms override public examples. | +| `docs/comparisons/build-vs-buy.md` | Evaluators, buyers | Decision guide for self-hosting Webhookery vs hosted vendors or simpler internal tools. | Buyer-fit comparison. Not a benchmark or legal recommendation. | +| `docs/comparisons/hookdeck.md` | Evaluators, buyers | Factual buyer-fit comparison against Hookdeck based on dated official-source review. | Comparison page. Re-check official sources before publishing externally. | +| `docs/comparisons/svix.md` | Evaluators, buyers | Factual buyer-fit comparison against Svix based on dated official-source review. | Comparison page. Re-check official sources before publishing externally. | +| `docs/comparisons/convoy.md` | Evaluators, buyers | Factual buyer-fit comparison against Convoy based on dated official-source review. | Comparison page. Re-check official sources before publishing externally. | +| `docs/articles/exactly-once-webhooks.md` | Evaluators, practitioners | Educational article explaining why Webhookery designs for evidence, replay, and idempotency instead of exactly-once claims. | Educational content. Keep aligned with `docs/security-promise.md`. | +| `docs/articles/webhook-incident-report.md` | Operators, incident responders | Educational article and report outline for webhook incidents. | Educational content. Do not store real incident data here. | +| `docs/articles/webhook-failure-modes.md` | Operators, evaluators | Educational article about webhook loss boundaries and operational checks. | Educational content. Keep provider claims aligned with conformance docs. | +| `docs/articles/self-hosted-webhook-gateway-architecture.md` | Evaluators, architects, security reviewers | Educational architecture article covering PostgreSQL-first capture, OpenAPI, payload evidence, and audit-chain verification. | Educational content. Exact behavior remains in code, OpenAPI, migrations, and operations docs. | +| `docs/articles/webhook-security-review-checklist.md` | Security reviewers, platform teams | SaaS webhook security-review checklist for inbound trust, producer auth, tenant isolation, SSRF, secrets, and release evidence. | Checklist. It is not certification or legal advice. | +| `docs/launch-copy.md` | Maintainers, launch authors | Draft public launch copy for release announcement, communities, outreach, and product channels. | Prepared copy only. Do not treat as approval to post. | +| `docs/launch-metrics.md` | Maintainers, commercial operators | Privacy-safe launch measurement plan focused on qualified evaluations. | Metrics plan. Does not add runtime analytics. | +| `docs/customer-discovery-notes-template.md` | Maintainers, commercial operators | Sanitized early discovery-call template before a formal pilot. | Discovery notes template. Do not store secrets or customer data. | +| `docs/pilot-feedback-template.md` | Maintainers, commercial operators | Sanitized template for evaluator and pilot feedback. | Feedback template. Do not store secrets or customer data. | +| `.github/ISSUE_TEMPLATE/evaluator-feedback.yml` | Evaluators, pilot users, maintainers | Public issue form for sanitized evaluator and pilot feedback. | Public intake template. It must reject secrets, raw payloads, and customer data. | +| `docs/roadmap-intake-policy.md` | Maintainers | Policy for classifying pilot feedback into docs, bugs, paid work, roadmap, future, or out-of-scope. | Roadmap discipline. Does not override product invariants. | +| `docs/pilot-review-checklist.md` | Maintainers | Checklist for reviewing pilot findings and choosing the next engineering slice. | Review checklist. Keep production claims evidence-backed. | +| `TRADEMARKS.md` | Forks, redistributors, commercial users | Naming and trademark guidance. | Trademark policy only. | + +## Maintenance Rule + +When a behavior changes, update the smallest canonical source first: + +1. Code, OpenAPI, migrations, deployment profile, or executable script. +2. The owning documentation page from the table above. +3. Short links or summaries in secondary docs. + +Do not duplicate environment tables, command catalogs, route lists, provider +semantics, release gates, or non-claim language unless one document is clearly +named as the owner. diff --git a/docs/launch-copy.md b/docs/launch-copy.md new file mode 100644 index 0000000..303ed6d --- /dev/null +++ b/docs/launch-copy.md @@ -0,0 +1,135 @@ +# Launch Copy Templates + +These drafts are prepared copy only. Do not post them externally until the +`v0.1.0-rc1` release, release evidence, static landing page, and evaluator +quickstart are live and verified. + +## GitHub Release Announcement + +Title: + +```text +Webhookery v0.1.0-rc1: self-hosted webhook evidence infrastructure +``` + +Body: + +```text +Webhookery v0.1.0-rc1 is a release candidate for teams evaluating self-hosted +webhook evidence infrastructure. + +It focuses on durable capture before inbound success, provider-aware +verification, signed delivery, retry/DLQ/replay evidence, retention, evidence +exports, provider conformance checks, audit-chain verification, and +release-candidate acceptance gates. + +Start here: +- Evaluator quickstart: docs/evaluator-quickstart.md +- Local evidence demo: examples/webhook-evidence-demo/ +- Release evidence template: docs/release-evidence-template.md +- Provider conformance: docs/provider-conformance.md +- Commercial evaluation: docs/commercial-evaluation.md + +Non-claims: +- no exactly-once delivery +- no provider-side event completeness guarantee +- no compliance certification +- no hosted-service availability + +This release uses local/fake provider acceptance tests. It does not call live +providers or customer receivers. +``` + +## Self-Hosted Community Post + +```text +I released Webhookery v0.1.0-rc1, a self-hosted webhook evidence and delivery +control plane. + +The angle is not "another webhook gateway." It is evidence: durable capture +before success, provider signature verification, delivery attempts, replay, +DLQ, retention, audit-chain verification, and release evidence. + +It is for teams that need to prove what happened to webhook events and prefer +self-hosting over a managed platform. + +Good fit: +- regulated or security-reviewed SaaS/platform teams +- internal platform teams receiving provider webhooks +- teams that need replay/audit evidence and commercial license exceptions + +Not a fit: +- teams wanting a hosted managed service +- teams expecting exactly-once delivery +- teams expecting provider-side completeness guarantees + +Quickstart: docs/evaluator-quickstart.md +Demo: examples/webhook-evidence-demo/ +Commercial path: docs/commercial-evaluation.md +``` + +## Direct Outreach + +```text +Subject: Self-hosted webhook evidence/replay control plane + +Hi {name}, + +I am looking for teams with webhook incident, replay, audit, or self-hosting +requirements to evaluate Webhookery. + +Webhookery is a self-hosted webhook evidence and delivery control plane. It is +designed around durable capture before inbound success, provider verification, +delivery evidence, replay, retention, and audit-chain verification. + +The first release candidate includes a local fake-provider demo and release +evidence package so your team can inspect the trust boundaries before any +commercial discussion. + +Useful links: +- quickstart: docs/evaluator-quickstart.md +- release notes: docs/releases/v0.1.0-rc1.md +- security promise: docs/security-promise.md +- commercial evaluation: docs/commercial-evaluation.md + +This is not a hosted service and does not claim exactly-once delivery or +provider-side completeness. It is for teams that need self-hosted evidence and +operational control. + +Would it be useful to compare this against your current webhook incident and +replay workflow? +``` + +## Product Launch Channel + +```text +Webhookery is self-hosted webhook evidence infrastructure. + +It helps teams receive, verify, store, route, deliver, replay, audit, and debug +webhooks while keeping the loss boundaries explicit. + +What it proves: +- whether Webhookery durably captured an event +- whether provider verification passed +- which route matched +- which delivery attempts happened +- whether retry, DLQ, replay, retention, and audit evidence exists + +What it does not claim: +- exactly-once delivery +- provider-side event completeness +- downstream business success +- compliance certification + +Try the local release-candidate demo: docs/evaluator-quickstart.md +``` + +## Posting Checklist + +- [ ] `v0.1.0-rc1` release exists. +- [ ] Release evidence artifacts are attached or linked. +- [ ] Static landing page is linked. +- [ ] Evaluator quickstart works from a clean checkout. +- [ ] No secrets, raw payloads, provider credentials, or customer data appear. +- [ ] Non-claims are present. +- [ ] Commercial CTA is direct but not spammy. diff --git a/docs/launch-metrics.md b/docs/launch-metrics.md new file mode 100644 index 0000000..e733d7d --- /dev/null +++ b/docs/launch-metrics.md @@ -0,0 +1,92 @@ +# Launch Metrics Plan + +Webhookery's first public release should optimize for qualified evaluations and +conversations, not vanity metrics alone. + +Do not add invasive product analytics, runtime tracking scripts, customer +payload collection, or tenant-labeled public metrics for launch measurement. + +## Primary Metrics + +| Metric | Why it matters | Collection method | +| --- | --- | --- | +| Qualified commercial inquiries | Best early revenue signal. | Manual CRM or private tracker. | +| Evaluation calls booked | Shows real buyer pain. | Calendar or manual tracker. | +| Evaluator quickstart completions | Shows the local path works. | Voluntary feedback, issues, or calls. | +| Release downloads / image pulls | Shows install interest. | GitHub release and GHCR stats where available. | +| Issues from real evaluators | Shows friction and missing docs. | GitHub issues with sanitized templates. | +| Pilot requests | Shows commercial intent. | Manual tracker. | + +## Secondary Metrics + +- GitHub stars. +- repository clones. +- docs page visits if a privacy-respecting static-site analytics setup is + approved later. +- support-package inquiries. +- release evidence package requests. + +Secondary metrics help with distribution but should not drive product scope by +themselves. + +## Review Cadence + +Review launch signals weekly for the first four weeks after `v0.1.0-rc1`: + +1. Count qualified conversations. +2. Classify issues as bug, docs gap, evaluator friction, missing integration, + or unrelated feature request. +3. Identify repeated blockers. +4. Update the pilot feedback tracker. +5. Decide whether the next implementation slice is docs, hardening, provider + compatibility, commercial packaging, or bug fixes. + +## Private Tracker Template + +Keep the launch tracker private unless every row is sanitized for public +sharing. A spreadsheet, CRM, or private issue board is enough; do not add +runtime analytics or product telemetry to collect these fields. +If you use a file-based tracker in this checkout, keep it under +`launch-metrics-private/`; that path is ignored by git and Docker build +contexts. + +| Week | Metric | Count | Source | Quality notes | Follow-up owner | Next action | +| --- | --- | ---: | --- | --- | --- | --- | +| `2026-W__` | Qualified commercial inquiries | 0 | Manual CRM/private tracker | Segment, provider mix, and urgency only. | | | +| `2026-W__` | Evaluation calls booked | 0 | Calendar/private tracker | Record pain category, not sensitive details. | | | +| `2026-W__` | Evaluator quickstart completions | 0 | Voluntary feedback/issues/calls | Link sanitized issue or notes. | | | +| `2026-W__` | Release downloads/image pulls | 0 | GitHub/GHCR stats | Aggregate counts only. | | | +| `2026-W__` | Issues from real evaluators | 0 | GitHub issues/private tracker | Classify as bug, docs gap, or evaluator friction. | | | +| `2026-W__` | Pilot requests | 0 | Manual tracker | Track stage and next decision. | | | + +For each serious evaluator, link to `docs/pilot-feedback-template.md` or a +private sanitized equivalent. Store customer-identifying details only in the +private tracker, never in public release evidence. + +## Privacy Boundary + +Launch metrics must not collect: + +- API keys +- bearer tokens +- session cookies +- webhook secrets +- raw provider signatures +- private keys +- provider credentials +- raw payload bodies +- customer PII +- tenant IDs in public metrics +- database URLs with passwords + +## Success Criteria For The First Release Candidate + +The release candidate is successful if it produces: + +- at least a few qualified conversations with teams that have webhook evidence, + replay, audit, or self-hosting pain +- actionable evaluator feedback +- a short list of repeated blockers +- no need to weaken the product's non-claims + +It is not a failure if stars are modest while qualified evaluations are strong. diff --git a/docs/live-provider-proof/github.md b/docs/live-provider-proof/github.md new file mode 100644 index 0000000..9e1aa5a --- /dev/null +++ b/docs/live-provider-proof/github.md @@ -0,0 +1,254 @@ +# GitHub Live-Provider Proof Guide + +This manual guide shows how to prove a GitHub repository webhook flow through +Webhookery without committing secrets or raw repository payloads. + +Status: external/manual. Completing this guide produces private evidence for a +specific environment; it is not provider certification. + +Official docs checked on 2026-06-04: + +- +- +- +- + +## What This Proves + +- A real GitHub repository `ping` or `push` webhook can reach Webhookery. +- Webhookery verifies `X-Hub-Signature-256` using the raw request body. +- `X-GitHub-Delivery` is captured as the provider delivery identity. +- Manual GitHub redelivery is visible as duplicate provider delivery evidence. +- Webhookery replay and incident packet generation are linked to original + evidence. + +## What This Does Not Prove + +- Provider certification or GitHub endorsement. +- Provider-side event completeness. +- Exactly-once delivery or global ordering. +- Downstream business processing success. +- Legal, regulatory, or compliance certification. + +## Prerequisites + +- Admin access to a disposable GitHub repository. +- Local Webhookery API and worker. +- A public HTTPS endpoint or temporary webhook proxy that forwards to local + Webhookery. +- A disposable downstream receiver that can fail first and recover later. +- A private directory for generated reports and bundles. + +No GitHub token is required for the basic ping/push proof unless you test REST +API redelivery or Webhookery reconciliation. + +## 1. Prepare Webhookery + +```bash +cp .env.example .env +docker compose up --build +export WEBHOOKERY_API_KEY=dev-bootstrap-key +export WEBHOOKERY_GITHUB_WEBHOOK_SECRET='replace-with-random-local-secret' +``` + +Create the source and save the returned source ID: + +```bash +go run ./cmd/whcp sources create \ + --name github-live-proof \ + --provider github \ + --secret "$WEBHOOKERY_GITHUB_WEBHOOK_SECRET" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_GITHUB_SOURCE_ID=src_replace_me +``` + +## 2. Configure The Repository Webhook + +In the test repository, create a webhook: + +- Payload URL: + `https://webhookery.example.test/v1/ingest/github/${WEBHOOKERY_GITHUB_SOURCE_ID}` +- Content type: `application/json` +- Secret: the value of `WEBHOOKERY_GITHUB_WEBHOOK_SECRET` +- SSL verification: enabled +- Events: `ping` and `push` + +For local-only proof, point GitHub at a temporary webhook proxy and forward the +proxy to: + +```text +http://localhost:8080/v1/ingest/github/${WEBHOOKERY_GITHUB_SOURCE_ID} +``` + +Do not put API keys, bearer tokens, or secrets in the payload URL. + +## 3. Configure The Failing Receiver Route + +```bash +export WEBHOOKERY_RECEIVER_URL='https://receiver.example.test/fail-first' + +go run ./cmd/whcp endpoints validate-url \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp endpoints create \ + --name github-live-proof-receiver \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ENDPOINT_ID=end_replace_me + +go run ./cmd/whcp routes create \ + --name github-live-proof-route \ + --source-id "$WEBHOOKERY_GITHUB_SOURCE_ID" \ + --endpoint-id "$WEBHOOKERY_ENDPOINT_ID" \ + --event-types ping,push \ + --state draft \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ROUTE_ID=rte_replace_me + +go run ./cmd/whcp routes activate \ + --route-id "$WEBHOOKERY_ROUTE_ID" \ + --reason "github live-proof route" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 4. Trigger And Capture A Delivery + +Creating the webhook sends a `ping`. To trigger `push`, commit and push a +change to the disposable repository. + +Then find the Webhookery event: + +```bash +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_EVENT_ID=evt_replace_me + +go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected evidence: + +- provider verification is valid; +- `X-GitHub-Delivery` is recorded as provider identity; +- `X-GitHub-Event` is recorded as event type; +- delivery to the failing receiver is recorded. + +## 5. Prove Redelivery And Dedupe Shape + +From the repository webhook settings, open the webhook, choose a recent +delivery from the past three days, and click redeliver. GitHub reuses the +delivery GUID for redelivery. In Webhookery, confirm that the duplicate +delivery identity remains visible: + +```bash +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Record the observed duplicate or dedupe evidence in the private incident +packet. Do not paste raw GitHub payloads into public docs or issues. + +## 6. Recover And Replay + +Change the receiver to return success, then run a dry-run and replay: + +```bash +go run ./cmd/whcp replay-jobs dry-run \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "github live-proof receiver fixed" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs create \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "github live-proof receiver fixed" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +If the event is in DLQ, release the DLQ entry instead: + +```bash +go run ./cmd/whcp dead-letter list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_DLQ_ID=dlq_replace_me + +go run ./cmd/whcp dead-letter release \ + --entry-id "$WEBHOOKERY_DLQ_ID" \ + --reason-code receiver_fixed \ + --reason "github live-proof receiver recovered" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 7. Generate A Private Incident Packet + +```bash +mkdir -p live-proof-private/github + +go run ./cmd/whcp incidents create \ + --title "GitHub test repository webhook failed then replayed" \ + --reason "github live-provider proof" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_INCIDENT_ID=inc_replace_me + +go run ./cmd/whcp incidents add-event \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --reason "attach GitHub proof event" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents generate-report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "github proof report" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --format markdown \ + --output live-proof-private/github/incident-report.private.md \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents export \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "github proof bundle" \ + --output live-proof-private/github/evidence.private.tar.gz \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +The `live-proof-private/` directory is an operator convention for local proof +artifacts. Do not commit it. + +## 8. Redact A Shareable Sample + +Use the same public-sample rules as +`docs/live-provider-proof/stripe-redaction-policy.md`. A committed sample +shape lives at +`docs/live-provider-proof/samples/github-incident-report.redacted.md`. + +Before sharing any proof: + +```bash +go run ./cmd/whcp audit verify-bundle \ + --file live-proof-private/github/evidence.private.tar.gz +make provider-proof-check +``` + +## Cleanup + +1. Delete the test repository webhook. +2. Rotate or delete the Webhookery source secret. +3. Disable proof routes and endpoints. +4. Delete temporary webhook proxy channels. +5. Remove private proof artifacts from shared machines. + diff --git a/docs/live-provider-proof/run-record-template.md b/docs/live-provider-proof/run-record-template.md new file mode 100644 index 0000000..65f6f4e --- /dev/null +++ b/docs/live-provider-proof/run-record-template.md @@ -0,0 +1,97 @@ +# Live-Provider Proof Run Record Template + +Use this template for private Stripe, GitHub, or Shopify proof runs. Store the +completed record with private release or pilot evidence, not in public source +control. + +Do not commit completed run records, raw payloads, provider signatures, +webhook secrets, bearer tokens, customer data, private repository data, +development-store customer data, database URLs, or private evidence bundles. + +## Run Metadata + +| Field | Value | +|-------|-------| +| Provider | `stripe | github | shopify` | +| Proof guide version / commit | | +| Run date | | +| Operator | | +| Environment | `local | pilot | disposable test` | +| Provider account/repository/store | private reference only | +| Webhookery version / commit | | +| Webhookery deployment profile | | + +## Scope And Non-Claims + +- Proof scope: +- Provider event type or topic: +- Downstream receiver behavior: +- Replay path tested: +- Evidence bundle generated: `yes | no` + +Confirm before using this run as release or pilot evidence: + +- not provider certification; +- no provider-side completeness guarantee; +- no exactly-once delivery claim; +- no downstream business-success claim; and +- no compliance certification claim. + +## Setup Evidence + +| Check | Result | Evidence location | +|-------|--------|-------------------| +| Provider proof guide followed | `pass | fail | skipped` | | +| Provider secret created only for test proof | `pass | fail | skipped` | | +| Webhookery source configured | `pass | fail | skipped` | | +| Receiver route configured | `pass | fail | skipped` | | +| Receiver starts in failing mode | `pass | fail | skipped` | | +| Private proof output directory created | `pass | fail | skipped` | | + +## Capture And Verification + +| Check | Result | Evidence location | +|-------|--------|-------------------| +| Provider delivered event to Webhookery | `pass | fail | skipped` | | +| Inbound response returned only after durable capture | `pass | fail | skipped` | | +| Provider signature verified as valid | `pass | fail | skipped` | | +| Raw payload body omitted from shared artifacts | `pass | fail | skipped` | | +| Event timeline includes capture and verification | `pass | fail | skipped` | | +| Provider delivery identity captured when available | `pass | fail | skipped` | | + +## Failure, Replay, And Incident Evidence + +| Check | Result | Evidence location | +|-------|--------|-------------------| +| Downstream failure recorded | `pass | fail | skipped` | | +| Retry or DLQ state visible | `pass | fail | skipped` | | +| Replay dry-run completed | `pass | fail | skipped` | | +| Replay or DLQ release completed with reason code | `pass | fail | skipped` | | +| Original event remained immutable | `pass | fail | skipped` | | +| Incident created and event attached | `pass | fail | skipped` | | +| Markdown incident report generated | `pass | fail | skipped` | | +| Evidence bundle generated | `pass | fail | skipped` | | +| `whcp audit verify-bundle` passed | `pass | fail | skipped` | | + +## Sanitization Review + +Use `docs/live-provider-proof/stripe-redaction-policy.md` before sharing any +sample outside the private evidence location. + +| Check | Result | Evidence location | +|-------|--------|-------------------| +| Secret-shaped strings removed | `pass | fail | skipped` | | +| Raw signatures removed | `pass | fail | skipped` | | +| Raw payload bodies removed | `pass | fail | skipped` | | +| Provider/customer/private repository data removed | `pass | fail | skipped` | | +| Public sample states required non-claims | `pass | fail | skipped` | | +| `make provider-proof-check` passed after sample edits | `pass | fail | skipped` | | + +## Outcome + +- Run status: `passed | failed | partial | abandoned` +- External evidence location: +- Public sample updated: `yes | no` +- Follow-up issue or private tracker link: +- Release or pilot decision: +- Accepted risk, if any: diff --git a/docs/live-provider-proof/samples/github-incident-report.redacted.md b/docs/live-provider-proof/samples/github-incident-report.redacted.md new file mode 100644 index 0000000..17d1d65 --- /dev/null +++ b/docs/live-provider-proof/samples/github-incident-report.redacted.md @@ -0,0 +1,66 @@ +# GitHub Incident Report Sample + +Sample status: redacted public shape only. + +Provider proof status: manual test-repository proof, not provider +certification. This is not provider certification. + +## Summary + +| Field | Value | +|-------|-------| +| Provider | GitHub | +| Event type | `push` | +| Delivery GUID | `00000000-0000-0000-0000-000000000000` | +| Source ID | `src_redacted` | +| Incident ID | `inc_redacted` | +| Report schema | `webhookery.incident_report.v1` | + +## Verification + +| Field | Value | +|-------|-------| +| Signature header | `X-Hub-Signature-256` present, value omitted | +| Signature result | `valid` | +| Raw payload | omitted | +| Raw payload SHA-256 | `sha256:redacted` | +| Event header | `X-GitHub-Event: push` | + +## Delivery And Dedupe Timeline + +| Sequence | State | Evidence | +|----------|-------|----------| +| 1 | captured | durable receipt and raw payload metadata stored | +| 2 | verified | GitHub signature accepted | +| 3 | routed | route version `rtv_redacted` matched | +| 4 | failed | receiver returned a test `500` | +| 5 | duplicate_visible | manual GitHub redelivery reused delivery GUID | +| 6 | redelivery_requested | operator replay reason `receiver_fixed` | +| 7 | succeeded | Webhookery replay delivery returned test `204` | + +## Replay Evidence + +| Field | Value | +|-------|-------| +| Replay job | `rpl_redacted` | +| Reason code | `receiver_fixed` | +| Reason | receiver fixed in GitHub proof environment | +| Config mode | `original` | +| Original event mutation | none | + +## Evidence Bundle + +| Field | Value | +|-------|-------| +| Bundle ID | `exp_redacted` | +| Manifest schema | `webhookery.evidence_bundle.v1` | +| Manifest SHA-256 | `sha256:redacted` | +| Audit-chain verification | valid in private proof | + +## Non-Claims + +- Raw payload bodies, webhook secrets, provider signatures, tenant IDs, and + repository-private data are omitted. +- This sample is not provider certification. +- This sample does not prove provider-side event completeness. +- This sample does not claim exactly-once delivery. diff --git a/docs/live-provider-proof/samples/shopify-incident-report.redacted.md b/docs/live-provider-proof/samples/shopify-incident-report.redacted.md new file mode 100644 index 0000000..481793d --- /dev/null +++ b/docs/live-provider-proof/samples/shopify-incident-report.redacted.md @@ -0,0 +1,68 @@ +# Shopify Incident Report Sample + +Sample status: redacted public shape only. + +Provider proof status: manual development-store proof, not provider +certification. This is not provider certification. + +## Summary + +| Field | Value | +|-------|-------| +| Provider | Shopify | +| Event type | `products/create` | +| Webhook ID | `00000000-0000-0000-0000-000000000000` | +| Event ID | `evt_shopify_redacted` | +| Source ID | `src_redacted` | +| Incident ID | `inc_redacted` | +| Report schema | `webhookery.incident_report.v1` | + +## Verification + +| Field | Value | +|-------|-------| +| Signature header | `X-Shopify-Hmac-SHA256` present, value omitted | +| Signature result | `valid` | +| Raw payload | omitted | +| Raw payload SHA-256 | `sha256:redacted` | +| Topic header | `X-Shopify-Topic: products/create` | + +## Delivery Timeline + +| Sequence | State | Evidence | +|----------|-------|----------| +| 1 | captured | durable receipt and raw payload metadata stored | +| 2 | verified | Shopify HMAC accepted | +| 3 | routed | route version `rtv_redacted` matched topic | +| 4 | failed | receiver returned a test `500` | +| 5 | redelivery_requested | operator replay reason `receiver_fixed` | +| 6 | succeeded | Webhookery replay delivery returned test `204` | + +## Replay Evidence + +| Field | Value | +|-------|-------| +| Replay job | `rpl_redacted` | +| Reason code | `receiver_fixed` | +| Reason | receiver fixed in Shopify proof environment | +| Config mode | `original` | +| Original event mutation | none | + +## Evidence Bundle + +| Field | Value | +|-------|-------| +| Bundle ID | `exp_redacted` | +| Manifest schema | `webhookery.evidence_bundle.v1` | +| Manifest SHA-256 | `sha256:redacted` | +| Audit-chain verification | valid in private proof | + +## Non-Claims + +- Raw payload bodies, webhook secrets, provider signatures, tenant IDs, shop + domains, and customer data are omitted. +- This sample is not provider certification. +- This sample does not prove provider-side event completeness. +- This sample does not claim exactly-once delivery. +- This sample does not claim universal recovery across Shopify topics. + diff --git a/docs/live-provider-proof/samples/stripe-incident-report.redacted.md b/docs/live-provider-proof/samples/stripe-incident-report.redacted.md new file mode 100644 index 0000000..6fe0eef --- /dev/null +++ b/docs/live-provider-proof/samples/stripe-incident-report.redacted.md @@ -0,0 +1,66 @@ +# Stripe Incident Report Sample + +Sample status: redacted public shape only. + +Provider proof status: manual Stripe test-mode proof, not provider +certification. This is not provider certification. + +## Summary + +| Field | Value | +|-------|-------| +| Provider | Stripe | +| Event type | `payment_intent.succeeded` | +| Provider event ID | `evt_redacted` | +| Source ID | `src_redacted` | +| Incident ID | `inc_redacted` | +| Report schema | `webhookery.incident_report.v1` | + +## Verification + +| Field | Value | +|-------|-------| +| Signature result | `valid` | +| Signature header | omitted | +| Timestamp window | five minutes | +| Raw payload | omitted | +| Raw payload SHA-256 | `sha256:redacted` | + +## Delivery Timeline + +| Sequence | State | Evidence | +|----------|-------|----------| +| 1 | captured | durable receipt and raw payload metadata stored | +| 2 | verified | Stripe signature accepted | +| 3 | routed | route version `rtv_redacted` matched | +| 4 | failed | receiver returned a test `500` | +| 5 | dead_lettered | retry policy exhausted in proof environment | +| 6 | redelivery_requested | operator replay reason `receiver_fixed` | +| 7 | succeeded | replay delivery returned test `204` | + +## Replay Evidence + +| Field | Value | +|-------|-------| +| Replay job | `rpl_redacted` | +| Reason code | `receiver_fixed` | +| Reason | receiver fixed in Stripe proof environment | +| Config mode | `original` | +| Original event mutation | none | + +## Evidence Bundle + +| Field | Value | +|-------|-------| +| Bundle ID | `exp_redacted` | +| Manifest schema | `webhookery.evidence_bundle.v1` | +| Manifest SHA-256 | `sha256:redacted` | +| Audit-chain verification | valid in private proof | + +## Non-Claims + +- Raw payload bodies, webhook secrets, provider signatures, tenant IDs, and + customer data are omitted. +- This sample is not provider certification. +- This sample does not prove provider-side event completeness. +- This sample does not claim exactly-once delivery. diff --git a/docs/live-provider-proof/shopify.md b/docs/live-provider-proof/shopify.md new file mode 100644 index 0000000..a9225eb --- /dev/null +++ b/docs/live-provider-proof/shopify.md @@ -0,0 +1,236 @@ +# Shopify Live-Provider Proof Guide + +This manual guide shows how to prove a Shopify development-store webhook flow +through Webhookery without committing secrets, raw shop payloads, or customer +data. + +Status: external/manual. Completing this guide produces private evidence for a +specific environment; it is not provider certification. + +Official docs checked on 2026-06-04: + +- +- +- +- + +## What This Proves + +- A real Shopify development-store delivery can reach Webhookery. +- Webhookery verifies `X-Shopify-Hmac-SHA256` using the raw request body. +- `X-Shopify-Webhook-Id` and `X-Shopify-Topic` are captured as provider + metadata. +- Topic-based routing, downstream failure, replay, and incident packet + generation are visible. +- A sanitized report can be produced without exposing raw payloads or secrets. + +## What This Does Not Prove + +- Provider certification or Shopify endorsement. +- Provider-side event completeness. +- Exactly-once delivery or global ordering. +- Universal missed-event recovery across every Shopify topic. +- Downstream business processing success. +- Legal, regulatory, or compliance certification. + +## Prerequisites + +- A Shopify development store and app. +- Local Webhookery API and worker. +- An HTTPS endpoint or temporary development tunnel that forwards to + Webhookery. +- A disposable downstream receiver that can fail first and recover later. +- A private directory for generated reports and bundles. + +## 1. Prepare Webhookery + +```bash +cp .env.example .env +docker compose up --build +export WEBHOOKERY_API_KEY=dev-bootstrap-key +export WEBHOOKERY_SHOPIFY_CLIENT_SECRET='replace-with-app-client-secret' +``` + +Create the source and save the returned source ID: + +```bash +go run ./cmd/whcp sources create \ + --name shopify-live-proof \ + --provider shopify \ + --secret "$WEBHOOKERY_SHOPIFY_CLIENT_SECRET" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_SHOPIFY_SOURCE_ID=src_replace_me +``` + +## 2. Configure A Development-Store Subscription + +Create or update a Shopify webhook subscription for a development-store topic, +for example `products/create`. + +Use this URI: + +```text +https://webhookery.example.test/v1/ingest/shopify/${WEBHOOKERY_SHOPIFY_SOURCE_ID} +``` + +For a local tunnel, forward the public tunnel URL to: + +```text +http://localhost:8080/v1/ingest/shopify/${WEBHOOKERY_SHOPIFY_SOURCE_ID} +``` + +Do not put access tokens, API keys, or client secrets in the subscription URI. + +## 3. Configure The Failing Receiver Route + +```bash +export WEBHOOKERY_RECEIVER_URL='https://receiver.example.test/fail-first' + +go run ./cmd/whcp endpoints validate-url \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp endpoints create \ + --name shopify-live-proof-receiver \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ENDPOINT_ID=end_replace_me + +go run ./cmd/whcp routes create \ + --name shopify-live-proof-route \ + --source-id "$WEBHOOKERY_SHOPIFY_SOURCE_ID" \ + --endpoint-id "$WEBHOOKERY_ENDPOINT_ID" \ + --event-types products/create \ + --state draft \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ROUTE_ID=rte_replace_me + +go run ./cmd/whcp routes activate \ + --route-id "$WEBHOOKERY_ROUTE_ID" \ + --reason "shopify live-proof route" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 4. Trigger And Capture A Delivery + +Trigger the subscribed topic in the development store. For `products/create`, +create a disposable product. + +Then find the Webhookery event: + +```bash +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_EVENT_ID=evt_replace_me + +go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected evidence: + +- provider verification is valid; +- raw payload evidence is represented by IDs and hashes; +- `X-Shopify-Webhook-Id` and `X-Shopify-Topic` are visible in normalized + metadata; +- delivery to the failing receiver is recorded. + +## 5. Recover And Replay + +Change the receiver to return success, then run a dry-run and replay: + +```bash +go run ./cmd/whcp replay-jobs dry-run \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "shopify live-proof receiver fixed" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs create \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "shopify live-proof receiver fixed" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +If the event is in DLQ, release the DLQ entry instead: + +```bash +go run ./cmd/whcp dead-letter list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_DLQ_ID=dlq_replace_me + +go run ./cmd/whcp dead-letter release \ + --entry-id "$WEBHOOKERY_DLQ_ID" \ + --reason-code receiver_fixed \ + --reason "shopify live-proof receiver recovered" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 6. Generate A Private Incident Packet + +```bash +mkdir -p live-proof-private/shopify + +go run ./cmd/whcp incidents create \ + --title "Shopify development-store webhook failed then replayed" \ + --reason "shopify live-provider proof" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_INCIDENT_ID=inc_replace_me + +go run ./cmd/whcp incidents add-event \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --reason "attach Shopify proof event" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents generate-report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "shopify proof report" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --format markdown \ + --output live-proof-private/shopify/incident-report.private.md \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents export \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "shopify proof bundle" \ + --output live-proof-private/shopify/evidence.private.tar.gz \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +The `live-proof-private/` directory is an operator convention for local proof +artifacts. Do not commit it. + +## 7. Redact A Shareable Sample + +Use the public-sample rules in +`docs/live-provider-proof/stripe-redaction-policy.md`. A committed sample +shape lives at +`docs/live-provider-proof/samples/shopify-incident-report.redacted.md`. + +Before sharing any proof: + +```bash +go run ./cmd/whcp audit verify-bundle \ + --file live-proof-private/shopify/evidence.private.tar.gz +make provider-proof-check +``` + +## Cleanup + +1. Delete the development-store webhook subscription. +2. Rotate or delete the Webhookery source secret. +3. Disable proof routes and endpoints. +4. Delete temporary tunnel or proxy configuration. +5. Remove private proof artifacts from shared machines. + diff --git a/docs/live-provider-proof/stripe-redaction-policy.md b/docs/live-provider-proof/stripe-redaction-policy.md new file mode 100644 index 0000000..d1f4fb6 --- /dev/null +++ b/docs/live-provider-proof/stripe-redaction-policy.md @@ -0,0 +1,50 @@ +# Live-Proof Redaction Policy + +Use this policy before sharing Stripe, GitHub, or Shopify live-provider proof +output from Webhookery. Do not commit completed live proof files unless they +are reduced to a sample that follows this policy. + +## Remove + +- Stripe API keys, restricted keys, session tokens, bearer tokens, and CLI + authentication material. +- Stripe webhook signing secrets and any `Stripe-Signature` header values. +- Raw request bodies, raw response bodies, customer email addresses, card + details, addresses, phone numbers, names, tax IDs, and account identifiers. +- Full downstream receiver URLs when they contain tenant names, tokens, or + internal hostnames. +- Unhashed tenant IDs and organization IDs. +- Publicly routable proof URLs after the proof is complete. + +## Allowed In Public Samples + +- Provider name, event type, and redacted event ID shape. +- Timestamped step names. +- Verification result as `valid` or `invalid`. +- Hashes such as `sha256:...` for raw payload and bundle files. +- Source, endpoint, route, delivery, replay, incident, and export IDs when + generated from a disposable local or test environment. +- Status codes, retry state, DLQ state, replay reason code, and non-secret + error classes. +- Links to official provider documentation. + +## Required Notes + +Every public sample must state: + +- test mode or sandbox only; +- not provider certification; +- no provider-side completeness guarantee; +- no exactly-once delivery claim; +- raw payload bodies and secrets omitted; and +- completed private evidence bundles must not be committed. + +## Review + +Before sharing: + +1. Search for common secret prefixes and bearer tokens. +2. Search for email addresses and internal hostnames. +3. Confirm raw payload files are omitted. +4. Confirm bundle manifests contain hashes, not raw bodies. +5. Run `make provider-proof-check` after updating committed samples. diff --git a/docs/live-provider-proof/stripe.md b/docs/live-provider-proof/stripe.md new file mode 100644 index 0000000..b316e49 --- /dev/null +++ b/docs/live-provider-proof/stripe.md @@ -0,0 +1,236 @@ +# Stripe Live-Provider Proof Guide + +This manual guide shows how to prove a Stripe test-mode webhook flow through +Webhookery without committing secrets or live customer data. + +Status: external/manual. Completing this guide produces private evidence for a +specific environment; it is not provider certification. + +Official docs checked on 2026-06-04: + +- +- + +## What This Proves + +- A real Stripe test-mode event can reach Webhookery. +- Webhookery verifies `Stripe-Signature` using the raw request body. +- Webhookery records event timeline evidence before downstream success. +- Downstream failure, retry or DLQ, replay, and incident packet generation are + visible. +- A sanitized report can be produced without exposing raw payloads or secrets. + +## What This Does Not Prove + +- Provider certification or Stripe endorsement. +- Provider-side event completeness. +- Exactly-once delivery or global ordering. +- Downstream business processing success. +- Legal, regulatory, or compliance certification. + +## Prerequisites + +- Stripe CLI logged into a sandbox or test-mode account. +- Local Webhookery API and worker. +- A disposable downstream receiver that can fail first and recover later. +- A private directory for generated reports and bundles. +- The redaction policy in + `docs/live-provider-proof/stripe-redaction-policy.md`. + +## 1. Prepare Webhookery + +```bash +cp .env.example .env +docker compose up --build +export WEBHOOKERY_API_KEY=dev-bootstrap-key +``` + +Create a source with a temporary placeholder, then save the returned source ID: + +```bash +go run ./cmd/whcp sources create \ + --name stripe-live-proof \ + --provider stripe \ + --secret temporary-local-placeholder \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_STRIPE_SOURCE_ID=src_replace_me +``` + +## 2. Start Stripe Test-Mode Forwarding + +Run this in another shell: + +```bash +stripe listen \ + --events payment_intent.succeeded,payment_intent.payment_failed \ + --forward-to "http://localhost:8080/v1/ingest/stripe/${WEBHOOKERY_STRIPE_SOURCE_ID}" +``` + +Copy the webhook signing secret from the `stripe listen` output into a local +shell variable, then rotate the Webhookery source. Do not commit or screenshot +the value. + +```bash +export WEBHOOKERY_STRIPE_WEBHOOK_SECRET='replace-with-local-listener-secret' + +go run ./cmd/whcp sources rotate-secret \ + --source-id "$WEBHOOKERY_STRIPE_SOURCE_ID" \ + --secret "$WEBHOOKERY_STRIPE_WEBHOOK_SECRET" \ + --reason "stripe live-proof listener secret" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 3. Configure The Failing Receiver Route + +Use a receiver URL that returns an error before you flip it to success: + +```bash +export WEBHOOKERY_RECEIVER_URL='https://receiver.example.test/fail-first' + +go run ./cmd/whcp endpoints validate-url \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp endpoints create \ + --name stripe-live-proof-receiver \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ENDPOINT_ID=end_replace_me + +go run ./cmd/whcp routes create \ + --name stripe-live-proof-route \ + --source-id "$WEBHOOKERY_STRIPE_SOURCE_ID" \ + --endpoint-id "$WEBHOOKERY_ENDPOINT_ID" \ + --event-types payment_intent.succeeded,payment_intent.payment_failed \ + --state draft \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ROUTE_ID=rte_replace_me + +go run ./cmd/whcp routes activate \ + --route-id "$WEBHOOKERY_ROUTE_ID" \ + --reason "stripe live-proof route" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 4. Trigger And Capture A Test Event + +```bash +stripe trigger payment_intent.succeeded +``` + +Then find the Webhookery event: + +```bash +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_EVENT_ID=evt_replace_me + +go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected evidence: + +- provider verification is valid; +- raw payload evidence is represented by IDs and hashes; +- delivery to the failing receiver is recorded; +- retry or DLQ state is visible when the receiver remains failing long enough. + +## 5. Recover And Replay + +Change the receiver to return success, then run a dry-run and replay: + +```bash +go run ./cmd/whcp replay-jobs dry-run \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "stripe live-proof receiver fixed" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs create \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "stripe live-proof receiver fixed" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +If the event is in DLQ, release the DLQ entry instead: + +```bash +go run ./cmd/whcp dead-letter list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_DLQ_ID=dlq_replace_me + +go run ./cmd/whcp dead-letter release \ + --entry-id "$WEBHOOKERY_DLQ_ID" \ + --reason-code receiver_fixed \ + --reason "stripe live-proof receiver recovered" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## 6. Generate A Private Incident Packet + +```bash +mkdir -p live-proof-private/stripe + +go run ./cmd/whcp incidents create \ + --title "Stripe test-mode webhook failed then replayed" \ + --reason "stripe live-provider proof" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_INCIDENT_ID=inc_replace_me + +go run ./cmd/whcp incidents add-event \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --reason "attach Stripe proof event" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents generate-report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "stripe proof report" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --format markdown \ + --output live-proof-private/stripe/incident-report.private.md \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents export \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "stripe proof bundle" \ + --output live-proof-private/stripe/evidence.private.tar.gz \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +The `live-proof-private/` directory is an operator convention for local proof +artifacts. Do not commit it. + +## 7. Redact A Shareable Sample + +Use `docs/live-provider-proof/stripe-redaction-policy.md`. A committed sample +shape lives at +`docs/live-provider-proof/samples/stripe-incident-report.redacted.md`. + +Before sharing any proof: + +```bash +go run ./cmd/whcp audit verify-bundle \ + --file live-proof-private/stripe/evidence.private.tar.gz +make provider-proof-check +``` + +## Cleanup + +1. Stop `stripe listen`. +2. Delete or disable the Stripe test endpoint if you created one in Workbench. +3. Rotate or delete the Webhookery source secret. +4. Disable proof routes and endpoints. +5. Remove private proof artifacts from shared machines. + diff --git a/docs/observability.md b/docs/observability.md new file mode 100644 index 0000000..d9a5505 --- /dev/null +++ b/docs/observability.md @@ -0,0 +1,75 @@ +# Observability Examples + +Webhookery exposes operational state through authenticated ops APIs and public +aggregate Prometheus metrics. These examples are starting points for self- +hosted operators. They are not hosted dashboards or managed monitoring. + +Public `/metrics` output intentionally avoids tenant labels. Tenant-scoped +views belong behind authenticated APIs such as `/v1/ops/metrics`, +`/v1/ops/metrics/rollups`, `/v1/alerts`, and `/v1/alert-firings`. + +## Prometheus Scrape + +Example scrape target: + +```yaml +scrape_configs: + - job_name: webhookery + metrics_path: /metrics + static_configs: + - targets: + - webhookery-api.webhookery.svc.cluster.local:8080 +``` + +The example alert rules live at +`deploy/observability/prometheus-rules.example.yaml`. + +## Core Metrics + +| Metric | Meaning | Operator use | +|--------|---------|--------------| +| `webhookery_events_total` | Total captured canonical events. | Compare ingest volume to provider/producer expectations. | +| `webhookery_outbox_pending` | Pending durable outbox rows. | Detect worker lag or stuck routing/recovery work. | +| `webhookery_outbox_oldest_age_seconds` | Age of the oldest pending outbox row. | Primary queue-drain freshness signal. | +| `webhookery_dead_letter_open` | Open DLQ entries. | Trigger explicit retry/replay/release triage. | +| `webhookery_quarantine_open` | Open quarantine entries. | Review rejected provider evidence or unsafe requests. | +| `webhookery_endpoint_circuit_open` | Open endpoint circuits. | Identify receiver-side failure or disabled delivery paths. | +| `webhookery_audit_chain_unchained_events` | Audit events missing chain entries. | Treat as trust-boundary incident until verified. | +| `webhookery_audit_chain_verification_failures` | Audit-chain entries that fail verification. | Preserve state and investigate immediately. | +| `webhookery_audit_chain_last_anchor_age_seconds` | Age of newest local/object-store anchor. | Review anchor cadence. | +| `webhookery_deliveries{state="..."}` | Delivery counts by state. | Watch scheduled, in-progress, succeeded, and failed trends. | +| `webhookery_replay_jobs{state="..."}` | Replay job counts by state. | Ensure replay does not starve live work. | +| `webhookery_reconciliation_jobs{state="..."}` | Reconciliation jobs by state. | Track provider gap review backlog. | +| `webhookery_reconciliation_items{outcome="..."}` | Reconciliation item outcomes. | Identify failed or unrecoverable provider-side gaps. | + +## Dashboard Panels + +Start with these panels: + +- Capture: `rate(webhookery_events_total[5m])`. +- Queue depth: `webhookery_outbox_pending`. +- Queue freshness: `webhookery_outbox_oldest_age_seconds`. +- Delivery state: `sum by (state) (webhookery_deliveries)`. +- DLQ and quarantine: `webhookery_dead_letter_open` and + `webhookery_quarantine_open`. +- Audit chain: `webhookery_audit_chain_verification_failures` and + `webhookery_audit_chain_unchained_events`. +- Reconciliation outcomes: + `sum by (outcome) (webhookery_reconciliation_items)`. +- Signal egress: use authenticated alert/notification/SIEM APIs for detailed + delivery attempts; keep public metrics aggregate-only. + +## Incident Links + +When an alert fires, pair dashboard data with evidence APIs: + +```bash +whcp ops queues --api-key "$WEBHOOKERY_API_KEY" +whcp alerts firings --api-key "$WEBHOOKERY_API_KEY" +whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" +whcp reconciliation jobs --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected result: operators can determine whether the issue is capture, +storage, delivery, replay, reconciliation, audit-chain, or operational signal +egress without exposing raw payload bodies or secrets. diff --git a/docs/openapi/index.html b/docs/openapi/index.html new file mode 100644 index 0000000..ad5913b --- /dev/null +++ b/docs/openapi/index.html @@ -0,0 +1,256 @@ + + + + + + Webhookery API Reference + + + +
+

Webhookery API Reference

+

Self-hosted webhook evidence and delivery control plane.

+
+ Version 0.1.0 + 214 operations + Generated from openapi.yaml +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodPathOperationTagAuthRequestResponses
GET/healthzgetHealthz
Liveness check
Systemnone-200
GET/metricsgetMetrics
Prometheus metrics
Systemnone-200
GET/openapi.yamlgetOpenapiYaml
OpenAPI document
Systemnone-200
GET/readyzgetReadyz
Readiness check
Systemnone-200, 503
GET/v1/access-policiesgetAccessPolicies
List access policy rules
Auth And IdentitybearerAuth-200
POST/v1/access-policiespostAccessPolicies
Create access policy rule
Auth And IdentitybearerAuthapplication/json201
PATCH/v1/access-policies/{policy_id}patchAccessPoliciesPolicyId
Update access policy rule
Auth And IdentitybearerAuthapplication/json200
DELETE/v1/access-policies/{policy_id}deleteAccessPoliciesPolicyId
Disable access policy rule
Auth And IdentitybearerAuthapplication/json200
GET/v1/adaptersgetAdapters
List built-in and tenant custom adapters
Sources And ProvidersbearerAuth-200
POST/v1/adapterspostAdapters
Create tenant custom adapter
Sources And ProvidersbearerAuthapplication/json201
GET/v1/adapters/{adapter_id}getAdaptersAdapterId
Get adapter metadata
Sources And ProvidersbearerAuth-200
GET/v1/adapters/{adapter_id}/versionsgetAdaptersAdapterIdVersions
List adapter versions
Sources And ProvidersbearerAuth-200
POST/v1/adapters/{adapter_id}/versionspostAdaptersAdapterIdVersions
Create adapter version
Sources And ProvidersbearerAuthapplication/json201
POST/v1/adapters/{adapter_id}/versions/{version_id}/test-vectorspostAdaptersAdapterIdVersionsVersionIdTestVectors
Add adapter version test vector
Sources And ProvidersbearerAuthapplication/json201
POST/v1/adapters/{adapter_id}/versions/{version_id}:transitionpostAdaptersAdapterIdVersionsVersionIdTransition
Transition adapter version through approval workflow
Sources And ProvidersbearerAuthapplication/json200
GET/v1/admin/retention-policiesgetAdminRetentionPolicies
List retention policies
Audit And RetentionbearerAuth-200
POST/v1/admin/retention-policiespostAdminRetentionPolicies
Create or update retention policy
Audit And RetentionbearerAuthapplication/json201
PATCH/v1/admin/retention-policies/{policy_id}patchAdminRetentionPoliciesPolicyId
Update retention policy
Audit And RetentionbearerAuthapplication/json200
GET/v1/alert-firingsgetAlertFirings
List alert firings
OperationsbearerAuth-200
GET/v1/alert-firings/{firing_id}getAlertFiringsFiringId
Get alert firing
OperationsbearerAuth-200
POST/v1/alert-firings/{firing_id}:acknowledgepostAlertFiringsFiringIdAcknowledge
Acknowledge alert firing
OperationsbearerAuthapplication/json200
GET/v1/alertsgetAlerts
List alert rules
OperationsbearerAuth-200
POST/v1/alertspostAlerts
Create alert rule
OperationsbearerAuthapplication/json201, 403
GET/v1/alerts/{alert_id}getAlertsAlertId
Get alert rule
OperationsbearerAuth-200
PATCH/v1/alerts/{alert_id}patchAlertsAlertId
Update alert rule
OperationsbearerAuthapplication/json200
DELETE/v1/alerts/{alert_id}deleteAlertsAlertId
Disable alert rule
OperationsbearerAuthapplication/json200
GET/v1/api-keysgetApiKeys
List API keys
API KeysbearerAuth-200
POST/v1/api-keyspostApiKeys
Create API key
API KeysbearerAuthapplication/json201, 403
POST/v1/api-keys/{api_key_id}:revokepostApiKeysApiKeyIdRevoke
Revoke API key
API KeysbearerAuthapplication/json200
GET/v1/audit-chain/anchorsgetAuditChainAnchors
List audit chain anchors
Audit And RetentionbearerAuth-200
GET/v1/audit-chain/anchors/{anchor_id}getAuditChainAnchorsAnchorId
Get audit chain anchor
Audit And RetentionbearerAuth-200, 404
GET/v1/audit-chain/headgetAuditChainHead
Get audit chain head
Audit And RetentionbearerAuth-200
POST/v1/audit-chain:anchorpostAuditChainAnchor
Anchor a verified audit chain range
Audit And RetentionbearerAuthapplication/json201, 403
POST/v1/audit-chain:verifypostAuditChainVerify
Verify audit chain continuity
Audit And RetentionbearerAuthapplication/json200
GET/v1/audit-eventsgetAuditEvents
List audit events
Audit And RetentionbearerAuth-200
POST/v1/audit-events:exportpostAuditEventsExport
Create audit evidence export
Audit And RetentionbearerAuthapplication/json202, 403
GET/v1/audit-exportsgetAuditExports
List audit evidence exports
Audit And RetentionbearerAuth-200
GET/v1/audit-exports/{export_id}getAuditExportsExportId
Get audit evidence export status
Audit And RetentionbearerAuth-200, 404
GET/v1/audit-exports/{export_id}:downloadgetAuditExportsExportIdDownload
Download audit evidence export bundle
Audit And RetentionbearerAuth-200, 403, 410
POST/v1/auth/logoutpostAuthLogout
Revoke current management session
Auth And IdentitybearerAuth-204
GET/v1/auth/oidc/callbackgetAuthOidcCallback
Complete OIDC management login
Auth And Identitynone-200
GET/v1/auth/oidc/logingetAuthOidcLogin
Start OIDC management login
Auth And Identitynone-302
GET/v1/auth/sessiongetAuthSession
Get current management session
Auth And IdentitybearerAuth-200
GET/v1/auth/sessionsgetAuthSessions
List active management sessions
Auth And IdentitybearerAuth-200
POST/v1/auth/sessions/{session_id}:revokepostAuthSessionsSessionIdRevoke
Revoke management session
Auth And IdentitybearerAuthapplication/json200
POST/v1/authz:explainpostAuthzExplain
Explain authorization decision
Auth And IdentitybearerAuthapplication/json200
GET/v1/dead-lettergetDeadLetter
List dead-letter entries
Delivery And ReplaybearerAuth-200
POST/v1/dead-letter/{entry_id}:releasepostDeadLetterEntryIdRelease
Release dead-letter entry into replay work
Delivery And ReplaybearerAuthapplication/json202
POST/v1/dead-letter:bulk-releasepostDeadLetterBulkRelease
Bulk release dead-letter entries
Delivery And ReplaybearerAuthapplication/json202
GET/v1/deliveriesgetDeliveries
List deliveries
Delivery And ReplaybearerAuth-200
GET/v1/deliveries/{delivery_id}/attemptsgetDeliveriesDeliveryIdAttempts
List delivery attempts
Delivery And ReplaybearerAuth-200
POST/v1/deliveries/{delivery_id}:cancelpostDeliveriesDeliveryIdCancel
Cancel scheduled delivery
Delivery And ReplaybearerAuthapplication/json200
POST/v1/deliveries/{delivery_id}:retrypostDeliveriesDeliveryIdRetry
Manually retry delivery
Delivery And ReplaybearerAuthapplication/json202
GET/v1/delivery-attempts/{attempt_id}getDeliveryAttemptsAttemptId
Get delivery attempt
Delivery And ReplaybearerAuth-200
GET/v1/endpoint-healthgetEndpointHealth
List endpoint health
OperationsbearerAuth-200
GET/v1/endpointsgetEndpoints
List outbound endpoints
Endpoints And RoutingbearerAuth-200
POST/v1/endpointspostEndpoints
Create outbound endpoint
Endpoints And RoutingbearerAuthapplication/json201
GET/v1/endpoints/{endpoint_id}getEndpointsEndpointId
Get outbound endpoint
Endpoints And RoutingbearerAuth-200, 404
PATCH/v1/endpoints/{endpoint_id}patchEndpointsEndpointId
Update outbound endpoint metadata, URL, retry policy, or state
Endpoints And RoutingbearerAuthapplication/json200, 422
DELETE/v1/endpoints/{endpoint_id}deleteEndpointsEndpointId
Disable outbound endpoint
Endpoints And RoutingbearerAuthapplication/json200
POST/v1/endpoints/{endpoint_id}/secrets:rotatepostEndpointsEndpointIdSecretsRotate
Rotate endpoint signing secret
Endpoints And RoutingbearerAuthapplication/json200
POST/v1/endpoints/{endpoint_id}:testpostEndpointsEndpointIdTest
Schedule a signed endpoint test delivery
Endpoints And RoutingbearerAuthapplication/json202
POST/v1/endpoints:validate-urlpostEndpointsValidateUrl
Validate endpoint URL against SSRF policy
Endpoints And RoutingbearerAuth-200
GET/v1/event-typesgetEventTypes
List event types
Schemas And TransformationsbearerAuth-200
POST/v1/event-typespostEventTypes
Create event type
Schemas And TransformationsbearerAuthapplication/json201
GET/v1/event-types/{event_type}getEventTypesEventType
Get event type
Schemas And TransformationsbearerAuth-200, 404
PATCH/v1/event-types/{event_type}patchEventTypesEventType
Update event type metadata or state
Schemas And TransformationsbearerAuthapplication/json200
DELETE/v1/event-types/{event_type}deleteEventTypesEventType
Disable event type
Schemas And TransformationsbearerAuthapplication/json200
GET/v1/event-types/{event_type}/schemasgetEventTypesEventTypeSchemas
List schemas for event type
Schemas And TransformationsbearerAuth-200
POST/v1/event-types/{event_type}/schemaspostEventTypesEventTypeSchemas
Create schema for event type
Schemas And TransformationsbearerAuthapplication/json201
GET/v1/event-types/{event_type}/schemas/{schema_version}getEventTypesEventTypeSchemasSchemaVersion
Get schema for event type
Schemas And TransformationsbearerAuth-200, 404
PATCH/v1/event-types/{event_type}/schemas/{schema_version}patchEventTypesEventTypeSchemasSchemaVersion
Update schema lifecycle state
Schemas And TransformationsbearerAuthapplication/json200
DELETE/v1/event-types/{event_type}/schemas/{schema_version}deleteEventTypesEventTypeSchemasSchemaVersion
Retire schema
Schemas And TransformationsbearerAuthapplication/json200
POST/v1/event-types/{event_type}/schemas/{schema_version}:check-compatibilitypostEventTypesEventTypeSchemasSchemaVersionCheckCompatibility
Check schema compatibility
Schemas And TransformationsbearerAuthapplication/json200
POST/v1/event-types/{event_type}/schemas/{schema_version}:validatepostEventTypesEventTypeSchemasSchemaVersionValidate
Validate payload against schema
Schemas And TransformationsbearerAuthapplication/json200
GET/v1/eventsgetEvents
Search events
Events And IngestionbearerAuth-200, 400
POST/v1/eventspostEvents
Ingest product event
Events And IngestionbearerAuth, producerMTLSapplication/json202, 400, 401, 403
GET/v1/events/{event_id}getEventsEventId
Get event
Events And IngestionbearerAuth-200, 404
GET/v1/events/{event_id}/normalizedgetEventsEventIdNormalized
Get normalized event evidence
Events And IngestionbearerAuth-200, 403, 410
GET/v1/events/{event_id}/rawgetEventsEventIdRaw
Get raw payload evidence
Events And IngestionbearerAuth-200, 400, 403, 410
GET/v1/events/{event_id}/timelinegetEventsEventIdTimeline
Get event timeline
Events And IngestionbearerAuth-200
GET/v1/identity-providersgetIdentityProviders
List identity providers
Auth And IdentitybearerAuth-200
POST/v1/identity-providerspostIdentityProviders
Create OIDC identity provider
Auth And IdentitybearerAuthapplication/json201
GET/v1/identity-providers/{provider_id}getIdentityProvidersProviderId
Get identity provider
Auth And IdentitybearerAuth-200
PATCH/v1/identity-providers/{provider_id}patchIdentityProvidersProviderId
Update identity provider
Auth And IdentitybearerAuthapplication/json200
DELETE/v1/identity-providers/{provider_id}deleteIdentityProvidersProviderId
Disable identity provider
Auth And IdentitybearerAuthapplication/json200
POST/v1/identity-providers/{provider_id}:testpostIdentityProvidersProviderIdTest
Test identity provider configuration
Auth And IdentitybearerAuthapplication/json200
GET/v1/incidentsgetIncidents
List webhook incidents
IncidentsbearerAuth-200
POST/v1/incidentspostIncidents
Create webhook incident
IncidentsbearerAuthapplication/json201, 403
GET/v1/incidents/{incident_id}getIncidentsIncidentId
Get webhook incident
IncidentsbearerAuth-200, 404
POST/v1/incidents/{incident_id}/eventspostIncidentsIncidentIdEvents
Attach event to incident
IncidentsbearerAuthapplication/json201, 404
DELETE/v1/incidents/{incident_id}/events/{event_id}deleteIncidentsIncidentIdEventsEventId
Remove event from incident
IncidentsbearerAuthapplication/json200
POST/v1/incidents/{incident_id}/evidence-exportpostIncidentsIncidentIdEvidenceExport
Create incident evidence export
IncidentsbearerAuthapplication/json202
POST/v1/incidents/{incident_id}/generate-reportpostIncidentsIncidentIdGenerateReport
Generate incident report snapshot
IncidentsbearerAuthapplication/json201
GET/v1/incidents/{incident_id}/reportgetIncidentsIncidentIdReport
Get latest incident report snapshot
IncidentsbearerAuth-200
POST/v1/ingest/cloudevents/{source_id}postIngestCloudeventsSourceId
CloudEvents webhook ingestion
Events And Ingestionnone-200, 431
POST/v1/ingest/generic-jwt/{source_id}postIngestGenericJwtSourceId
Generic JWT/JWS webhook ingestion
Events And Ingestionnone-200, 401, 431
POST/v1/ingest/github/{source_id}postIngestGithubSourceId
GitHub webhook ingestion
Events And Ingestionnone-200, 431
POST/v1/ingest/shopify/{source_id}postIngestShopifySourceId
Shopify webhook ingestion
Events And Ingestionnone-200, 431
POST/v1/ingest/slack/{source_id}postIngestSlackSourceId
Slack webhook ingestion
Events And Ingestionnone-200, 431
POST/v1/ingest/stripe/{source_id}postIngestStripeSourceId
Stripe webhook ingestion
Events And Ingestionnone-200, 431
POST/v1/ingest/{tenant_id}/{source_id}postIngestTenantIdSourceId
Generic provider webhook ingestion
Events And Ingestionnoneapplication/json200, 401, 413, 431, 503
GET/v1/notification-channelsgetNotificationChannels
List notification channels
Signal EgressbearerAuth-200
POST/v1/notification-channelspostNotificationChannels
Create notification channel
Signal EgressbearerAuthapplication/json201
GET/v1/notification-channels/{channel_id}getNotificationChannelsChannelId
Get notification channel
Signal EgressbearerAuth-200
PATCH/v1/notification-channels/{channel_id}patchNotificationChannelsChannelId
Update notification channel
Signal EgressbearerAuthapplication/json200
DELETE/v1/notification-channels/{channel_id}deleteNotificationChannelsChannelId
Disable notification channel
Signal EgressbearerAuthapplication/json200
POST/v1/notification-channels/{channel_id}:testpostNotificationChannelsChannelIdTest
Queue a test notification delivery
Signal EgressbearerAuthapplication/json202
GET/v1/notification-deliveriesgetNotificationDeliveries
List notification deliveries
Signal EgressbearerAuth-200
GET/v1/notification-deliveries/{delivery_id}/attemptsgetNotificationDeliveriesDeliveryIdAttempts
List notification delivery attempts
Signal EgressbearerAuth-200
POST/v1/notification-deliveries/{delivery_id}:retrypostNotificationDeliveriesDeliveryIdRetry
Retry notification delivery
Signal EgressbearerAuthapplication/json200
POST/v1/oauth/tokenpostOauthToken
Issue producer OAuth access token
Producer TrustbasicAuthapplication/x-www-form-urlencoded200, 400, 401
GET/v1/ops/configgetOpsConfig
Get redacted runtime configuration
OperationsbearerAuth-200
GET/v1/ops/metricsgetOpsMetrics
Get tenant ops metrics
OperationsbearerAuth-200
GET/v1/ops/metrics/rollupsgetOpsMetricsRollups
List tenant metrics rollups
OperationsbearerAuth-200, 400
GET/v1/ops/queuesgetOpsQueues
List tenant queue stats
OperationsbearerAuth-200
GET/v1/ops/storagegetOpsStorage
Get tenant storage status
OperationsbearerAuth-200
GET/v1/ops/workersgetOpsWorkers
List worker leases
OperationsbearerAuth-200
GET/v1/ops/workers/{worker_id}getOpsWorkersWorkerId
Get worker lease
OperationsbearerAuth-200, 404
GET/v1/producer-clientsgetProducerClients
List producer OAuth clients
Producer TrustbearerAuth-200
POST/v1/producer-clientspostProducerClients
Create producer OAuth client
Producer TrustbearerAuthapplication/json201, 403
GET/v1/producer-clients/{client_id}getProducerClientsClientId
Get producer OAuth client
Producer TrustbearerAuth-200
PATCH/v1/producer-clients/{client_id}patchProducerClientsClientId
Update producer OAuth client
Producer TrustbearerAuthapplication/json200
DELETE/v1/producer-clients/{client_id}deleteProducerClientsClientId
Disable producer OAuth client
Producer TrustbearerAuthapplication/json200
POST/v1/producer-clients/{client_id}/secrets:rotatepostProducerClientsClientIdSecretsRotate
Rotate producer client secret
Producer TrustbearerAuthapplication/json200
GET/v1/producer-mtls-identitiesgetProducerMtlsIdentities
List producer mTLS identities
Producer TrustbearerAuth-200
POST/v1/producer-mtls-identitiespostProducerMtlsIdentities
Create producer mTLS identity
Producer TrustbearerAuthapplication/json201
GET/v1/producer-mtls-identities/{identity_id}getProducerMtlsIdentitiesIdentityId
Get producer mTLS identity
Producer TrustbearerAuth-200
PATCH/v1/producer-mtls-identities/{identity_id}patchProducerMtlsIdentitiesIdentityId
Update producer mTLS identity
Producer TrustbearerAuthapplication/json200
DELETE/v1/producer-mtls-identities/{identity_id}deleteProducerMtlsIdentitiesIdentityId
Disable producer mTLS identity
Producer TrustbearerAuthapplication/json200
POST/v1/producer-mtls-identities/{identity_id}:verifypostProducerMtlsIdentitiesIdentityIdVerify
Verify producer mTLS certificate against identity
Producer TrustbearerAuthapplication/json200
GET/v1/provider-connectionsgetProviderConnections
List provider API reconciliation connections
Sources And ProvidersbearerAuth-200
POST/v1/provider-connectionspostProviderConnections
Create provider API reconciliation connection
Sources And ProvidersbearerAuthapplication/json201
GET/v1/provider-connections/{connection_id}getProviderConnectionsConnectionId
Get provider API reconciliation connection
Sources And ProvidersbearerAuth-200
POST/v1/provider-connections/{connection_id}:revokepostProviderConnectionsConnectionIdRevoke
Revoke provider API connection credentials
Sources And ProvidersbearerAuthapplication/json200
POST/v1/provider-connections/{connection_id}:verifypostProviderConnectionsConnectionIdVerify
Verify provider API connection credentials
Sources And ProvidersbearerAuthapplication/json200
GET/v1/quarantinegetQuarantine
List quarantine entries
Delivery And ReplaybearerAuth-200
POST/v1/quarantine/{entry_id}:approvepostQuarantineEntryIdApprove
Approve quarantine entry
Delivery And ReplaybearerAuth-200
POST/v1/quarantine/{entry_id}:rejectpostQuarantineEntryIdReject
Reject quarantine entry
Delivery And ReplaybearerAuth-200
GET/v1/reconciliation-jobsgetReconciliationJobs
List provider reconciliation jobs
ReconciliationbearerAuth-200
POST/v1/reconciliation-jobspostReconciliationJobs
Create provider reconciliation job
ReconciliationbearerAuthapplication/json201
GET/v1/reconciliation-jobs/{job_id}getReconciliationJobsJobId
Get provider reconciliation job
ReconciliationbearerAuth-200
GET/v1/reconciliation-jobs/{job_id}/itemsgetReconciliationJobsJobIdItems
List provider reconciliation gap items
ReconciliationbearerAuth-200
POST/v1/reconciliation-jobs/{job_id}:cancelpostReconciliationJobsJobIdCancel
Cancel provider reconciliation job
ReconciliationbearerAuthapplication/json200
POST/v1/reconciliation-jobs:dry-runpostReconciliationJobsDryRun
Dry-run provider reconciliation
ReconciliationbearerAuthapplication/json200
GET/v1/replay-approval-policiesgetReplayApprovalPolicies
List replay approval policies
Delivery And ReplaybearerAuth-200
POST/v1/replay-approval-policiespostReplayApprovalPolicies
Create or reactivate replay approval policy
Delivery And ReplaybearerAuthapplication/json201
DELETE/v1/replay-approval-policies/{policy_id}deleteReplayApprovalPoliciesPolicyId
Disable replay approval policy
Delivery And ReplaybearerAuthapplication/json200
GET/v1/replay-jobsgetReplayJobs
List replay jobs
Delivery And ReplaybearerAuth-200
POST/v1/replay-jobspostReplayJobs
Create replay job
Delivery And ReplaybearerAuthapplication/json202
POST/v1/replay-jobs/previewpostReplayJobsPreview
Preview replay
Delivery And ReplaybearerAuthapplication/json200
POST/v1/replay-jobs/{replay_job_id}:approvepostReplayJobsReplayJobIdApprove
Approve pending replay job
Delivery And ReplaybearerAuthapplication/json200
POST/v1/replay-jobs/{replay_job_id}:cancelpostReplayJobsReplayJobIdCancel
Cancel replay job
Delivery And ReplaybearerAuthapplication/json200
POST/v1/replay-jobs/{replay_job_id}:pausepostReplayJobsReplayJobIdPause
Pause replay job
Delivery And ReplaybearerAuthapplication/json200
POST/v1/replay-jobs/{replay_job_id}:resumepostReplayJobsReplayJobIdResume
Resume replay job
Delivery And ReplaybearerAuthapplication/json200
POST/v1/replay-jobs:dry-runpostReplayJobsDryRun
Dry-run replay
Delivery And ReplaybearerAuthapplication/json200
GET/v1/retry-policiesgetRetryPolicies
List retry policies
Endpoints And RoutingbearerAuth-200
POST/v1/retry-policiespostRetryPolicies
Create retry policy version
Endpoints And RoutingbearerAuthapplication/json201
GET/v1/retry-policies/{retry_policy_id}getRetryPoliciesRetryPolicyId
Get retry policy
Endpoints And RoutingbearerAuth-200, 404
PATCH/v1/retry-policies/{retry_policy_id}patchRetryPoliciesRetryPolicyId
Create a new retry policy version from an existing policy
Endpoints And RoutingbearerAuthapplication/json200
DELETE/v1/retry-policies/{retry_policy_id}deleteRetryPoliciesRetryPolicyId
Disable retry policy
Endpoints And RoutingbearerAuthapplication/json200
GET/v1/role-bindingsgetRoleBindings
List resource-aware role bindings
Auth And IdentitybearerAuth-200
POST/v1/role-bindingspostRoleBindings
Create resource-aware role binding
Auth And IdentitybearerAuthapplication/json201
PATCH/v1/role-bindings/{binding_id}patchRoleBindingsBindingId
Update role binding
Auth And IdentitybearerAuthapplication/json200
DELETE/v1/role-bindings/{binding_id}deleteRoleBindingsBindingId
Disable role binding
Auth And IdentitybearerAuthapplication/json200
GET/v1/routesgetRoutes
List routes
Endpoints And RoutingbearerAuth-200
POST/v1/routespostRoutes
Create route
Endpoints And RoutingbearerAuthapplication/json201
GET/v1/routes/{route_id}getRoutesRouteId
Get route
Endpoints And RoutingbearerAuth-200, 404
PATCH/v1/routes/{route_id}patchRoutesRouteId
Update route
Endpoints And RoutingbearerAuthapplication/json200
DELETE/v1/routes/{route_id}deleteRoutesRouteId
Inactivate route
Endpoints And RoutingbearerAuthapplication/json200
GET/v1/routes/{route_id}/versionsgetRoutesRouteIdVersions
List immutable route versions
Endpoints And RoutingbearerAuth-200
POST/v1/routes/{route_id}:activatepostRoutesRouteIdActivate
Activate route
Endpoints And RoutingbearerAuth-200
POST/v1/routes/{route_id}:dry-runpostRoutesRouteIdDryRun
Dry-run route against event
Endpoints And RoutingbearerAuth-200
GET/v1/scim-tokensgetScimTokens
List SCIM tokens
Auth And IdentitybearerAuth-200
POST/v1/scim-tokenspostScimTokens
Create SCIM token
Auth And IdentitybearerAuthapplication/json201
DELETE/v1/scim-tokens/{token_id}deleteScimTokensTokenId
Revoke SCIM token
Auth And IdentitybearerAuthapplication/json200
GET/v1/scim/v2/GroupsgetScimV2Groups
List SCIM groups
Auth And IdentitybearerAuth-200
POST/v1/scim/v2/GroupspostScimV2Groups
Provision SCIM group
Auth And IdentitybearerAuthapplication/json201
GET/v1/scim/v2/Groups/{group_id}getScimV2GroupsGroupId
Get SCIM group
Auth And IdentitybearerAuth-200
PUT/v1/scim/v2/Groups/{group_id}putScimV2GroupsGroupId
Replace SCIM group
Auth And IdentitybearerAuthapplication/json200
PATCH/v1/scim/v2/Groups/{group_id}patchScimV2GroupsGroupId
Patch SCIM group
Auth And IdentitybearerAuthapplication/json200
DELETE/v1/scim/v2/Groups/{group_id}deleteScimV2GroupsGroupId
Deactivate SCIM group
Auth And IdentitybearerAuth-200
GET/v1/scim/v2/UsersgetScimV2Users
List SCIM users
Auth And IdentitybearerAuth-200
POST/v1/scim/v2/UserspostScimV2Users
Provision SCIM user
Auth And IdentitybearerAuthapplication/json201
GET/v1/scim/v2/Users/{user_id}getScimV2UsersUserId
Get SCIM user
Auth And IdentitybearerAuth-200
PUT/v1/scim/v2/Users/{user_id}putScimV2UsersUserId
Replace SCIM user
Auth And IdentitybearerAuthapplication/json200
PATCH/v1/scim/v2/Users/{user_id}patchScimV2UsersUserId
Patch SCIM user
Auth And IdentitybearerAuthapplication/json200
DELETE/v1/scim/v2/Users/{user_id}deleteScimV2UsersUserId
Deactivate SCIM user
Auth And IdentitybearerAuth-200
GET/v1/siem-deliveriesgetSiemDeliveries
List SIEM deliveries
Signal EgressbearerAuth-200
GET/v1/siem-deliveries/{delivery_id}/attemptsgetSiemDeliveriesDeliveryIdAttempts
List SIEM delivery attempts
Signal EgressbearerAuth-200
POST/v1/siem-deliveries/{delivery_id}:retrypostSiemDeliveriesDeliveryIdRetry
Retry SIEM delivery
Signal EgressbearerAuthapplication/json200
GET/v1/siem-sinksgetSiemSinks
List SIEM sinks
Signal EgressbearerAuth-200
POST/v1/siem-sinkspostSiemSinks
Create SIEM sink
Signal EgressbearerAuthapplication/json201
GET/v1/siem-sinks/{sink_id}getSiemSinksSinkId
Get SIEM sink
Signal EgressbearerAuth-200
PATCH/v1/siem-sinks/{sink_id}patchSiemSinksSinkId
Update SIEM sink
Signal EgressbearerAuthapplication/json200
DELETE/v1/siem-sinks/{sink_id}deleteSiemSinksSinkId
Disable SIEM sink
Signal EgressbearerAuthapplication/json200
POST/v1/siem-sinks/{sink_id}:testpostSiemSinksSinkIdTest
Queue a test SIEM delivery
Signal EgressbearerAuthapplication/json202
GET/v1/sourcesgetSources
List inbound sources
Sources And ProvidersbearerAuth-200
POST/v1/sourcespostSources
Create inbound source
Sources And ProvidersbearerAuthapplication/json201
GET/v1/sources/{source_id}getSourcesSourceId
Get inbound source
Sources And ProvidersbearerAuth-200, 404
PATCH/v1/sources/{source_id}patchSourcesSourceId
Update inbound source metadata or state
Sources And ProvidersbearerAuthapplication/json200
DELETE/v1/sources/{source_id}deleteSourcesSourceId
Disable inbound source
Sources And ProvidersbearerAuthapplication/json200
POST/v1/sources/{source_id}/secrets:rotatepostSourcesSourceIdSecretsRotate
Rotate source verification secret
Sources And ProvidersbearerAuthapplication/json200
GET/v1/subscriptionsgetSubscriptions
List subscriptions
Endpoints And RoutingbearerAuth-200
POST/v1/subscriptionspostSubscriptions
Create subscription
Endpoints And RoutingbearerAuthapplication/json201
GET/v1/subscriptions/{subscription_id}getSubscriptionsSubscriptionId
Get subscription
Endpoints And RoutingbearerAuth-200, 404
PATCH/v1/subscriptions/{subscription_id}patchSubscriptionsSubscriptionId
Update subscription
Endpoints And RoutingbearerAuthapplication/json200
DELETE/v1/subscriptions/{subscription_id}deleteSubscriptionsSubscriptionId
Disable subscription
Endpoints And RoutingbearerAuthapplication/json200
GET/v1/transformationsgetTransformations
List deterministic transformations
Schemas And TransformationsbearerAuth-200
POST/v1/transformationspostTransformations
Create deterministic transformation
Schemas And TransformationsbearerAuthapplication/json201
GET/v1/transformations/{transformation_id}getTransformationsTransformationId
Get deterministic transformation
Schemas And TransformationsbearerAuth-200
GET/v1/transformations/{transformation_id}/versionsgetTransformationsTransformationIdVersions
List immutable transformation versions
Schemas And TransformationsbearerAuth-200
POST/v1/transformations/{transformation_id}/versionspostTransformationsTransformationIdVersions
Create immutable transformation version
Schemas And TransformationsbearerAuthapplication/json201
POST/v1/transformations/{transformation_id}/versions/{version_id}:activatepostTransformationsTransformationIdVersionsVersionIdActivate
Activate transformation version
Schemas And TransformationsbearerAuthapplication/json200
+
+ + diff --git a/docs/operations.md b/docs/operations.md index 79ebe08..9257f92 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -1,78 +1,86 @@ # Webhookery Operations -This document describes implemented operational behavior, not future product -marketing. - -## Deployment Profile - -The MVP deployment profile is a Go API/worker binary backed by PostgreSQL. -Docker Compose starts PostgreSQL, runs migrations, starts the API, and starts a worker. PostgreSQL is the -source of truth for raw payload metadata, events, dedupe records, delivery -state, audit events, retention state, evidence export metadata, and durable -outbox work. - -Raw payload body storage defaults to PostgreSQL `bytea`. Optional S3-compatible -storage is enabled with `WEBHOOKERY_RAW_STORAGE_MODE=s3` and the -`WEBHOOKERY_OBJECT_STORAGE_*` variables. In S3 mode, inbound success requires -both the object write and the PostgreSQL metadata transaction to succeed. The -database still stores the raw payload hash, size, storage backend, bucket, key, -write status, receipts, events, deliveries, and audit rows. - -Kubernetes manifests live under `deploy/kubernetes`. They define separate API, -worker, scheduler, and migration-job workloads plus a service for the API. The -profile expects external PostgreSQL and optional external object storage; it -does not install ingress, TLS certificates, network policies, service monitors, -or PostgreSQL. Use `deploy/kubernetes/secret.example.yaml` only as a template, -then create the real `webhookery-secrets` Secret through the cluster's normal -secret-management workflow. - -The Helm chart under `deploy/helm/webhookery` deploys the same API, worker, -scheduler, and optional migration-job shape. It also expects external -PostgreSQL and optional object storage. By default it references an existing -Kubernetes Secret for sensitive values; `secret.create=true` is intended for -operator-supplied values files, not committed secrets. - -The Terraform module under `deploy/terraform/webhookery-helm` installs that -Helm chart into an existing Kubernetes cluster. It intentionally accepts only an -existing Secret name, not database URLs, master keys, object-store access keys, -or bootstrap key hashes, because Terraform state is not an appropriate secret -store. - -The project makes no FIPS/NIST/CMVP certification claim. - -## Production Doctor - -Run the production doctor before promoting or upgrading a self-hosted -deployment: +This document is the operator runbook for the implemented self-hosted release +candidate. It is not a product marketing page, API reference, CLI catalog, or +feature behavior reference. + +Use these canonical references with this runbook: + +- `docs/configuration.md`: environment variables, defaults, safe production + values, and secret sensitivity. +- `docs/deployment.md`: shared deployment posture for dependencies, TLS, + secrets, object storage, network policy, readiness, backup/restore, upgrade, + and rollback. +- `docs/day-2-operations.md`: post-install backup cadence, restore drills, + upgrades, incident triage, alert handling, key rotation, retention review, + and audit evidence handoff. +- `docs/observability.md`: public metric names, Prometheus examples, and + dashboard starter panels. +- `docs/feature-behavior.md`: capture, auth, routing, delivery, replay, + reconciliation, transformations, retention, identity, producer trust, and + SSRF behavior. +- `docs/cli.md`: `whcp` command catalog. +- `openapi.yaml`: REST API contract. +- `docs/release-evidence-template.md`: release evidence checklist. +- `deploy/`: deployment profile specifics. + +## Operating Promise + +See `docs/security-promise.md` for the canonical durable-capture promise and +non-claims. Operationally, inbound success means Webhookery durably captured +raw request evidence and verification metadata according to the configured +storage mode. It does not mean downstream business processing succeeded. + +Release evidence and deployment profile checks intentionally preserve this +phrase: no FIPS/NIST/CMVP certification. + +## Deployment Posture + +The release-candidate topology is single-region and PostgreSQL-first: + +- API, worker, scheduler, and migration processes are Go binaries from this + repository. +- PostgreSQL is the source of truth for events, receipts, raw payload metadata, + dedupe records, delivery state, audit rows, retention state, evidence export + metadata, and durable outbox work. +- Raw payload bodies default to PostgreSQL `bytea`. +- S3-compatible raw payload storage is optional. With + `WEBHOOKERY_RAW_STORAGE_MODE=s3`, inbound success requires both the object + write and PostgreSQL metadata transaction to succeed. +- Kubernetes, Helm, and Terraform profiles expect external PostgreSQL and + externally managed production secrets. + +Object storage can hold raw bodies, but PostgreSQL remains the evidence and +metadata authority. Back up both PostgreSQL and object storage when S3 mode is +enabled. + +## Production Doctor Runbook + +Run the production doctor before promotion, after configuration changes, and +before upgrades that affect storage, key custody, TLS, object storage, or +bootstrap access: ```bash WEBHOOKERY_ENVIRONMENT=production go run ./cmd/whcp doctor production ``` -The command reads local environment/configuration only. It does not connect to -PostgreSQL, object storage, Vault, AWS KMS, or webhook receivers, and it does -not replace `make rc-check`, readiness probes, backup/restore drills, or -tenant-scoped ops APIs. Its output is deliberately redacted and must not print -database passwords, API keys, webhook secrets, OAuth tokens, Vault tokens, AWS -credentials, raw KMS key ids, object-store credentials, private keys, or raw -payload bodies. +The doctor reads environment and configuration only. It does not connect to +PostgreSQL, object storage, Vault, AWS KMS, webhook receivers, or live +providers. It does not replace readiness checks, RC checks, or restore drills. -Doctor severities are: +Severity meanings: -- `blocker`: unsafe or incomplete production posture. The command exits - non-zero. -- `warning`: an operator must explicitly accept or remediate the risk. The - command may exit zero when only warnings remain. -- `ok`: the checked setting has production-acceptable shape. +| Severity | Meaning | Required action | +|----------|---------|-----------------| +| `blocker` | Unsafe or incomplete production posture. The command exits non-zero. | Fix before promotion. | +| `warning` | Operator review item. The command may exit zero. | Accept deliberately or remediate. | +| `ok` | Checked setting has production-acceptable shape. | No action. | -Local development should use `.env.example`, `WEBHOOKERY_ENVIRONMENT=development`, -and Docker Compose. The production doctor is not a local-development pass/fail -gate; use `make fast-check` and local smoke tests for development feedback. +The doctor output must not print database passwords, API keys, webhook secrets, +OAuth tokens, Vault tokens, AWS credentials, raw KMS key IDs, object-store +credentials, private keys, raw signatures, or raw payload bodies. -A production-local deployment that uses local envelope encryption must provide -a non-placeholder database URL, API TLS certificate/key files, a generated -32-byte base64 `WEBHOOKERY_MASTER_KEY_BASE64`, and a non-placeholder bootstrap -key hash or no bootstrap key. Example shape: +Common production preflight shapes: ```bash WEBHOOKERY_ENVIRONMENT=production @@ -85,13 +93,6 @@ WEBHOOKERY_BOOTSTRAP_API_KEY_HASH= go run ./cmd/whcp doctor production ``` -Local secret-box mode can be acceptable for smaller self-hosted installations -with disciplined key custody, but the doctor reports it as a warning because -Vault Transit or AWS KMS gives stronger operational separation. - -Vault Transit mode requires a TLS Vault address, token, and transit key name. -The token is consumed from the environment and is never printed: - ```bash WEBHOOKERY_ENVIRONMENT=production WEBHOOKERY_SECRET_BOX_MODE=vault-transit @@ -101,9 +102,6 @@ WEBHOOKERY_VAULT_TRANSIT_KEY=webhookery go run ./cmd/whcp doctor production ``` -AWS KMS mode requires region and key id. A custom endpoint is intended for -LocalStack-style testing; non-TLS endpoint overrides produce a warning: - ```bash WEBHOOKERY_ENVIRONMENT=production WEBHOOKERY_SECRET_BOX_MODE=aws-kms @@ -112,10 +110,6 @@ WEBHOOKERY_AWS_KMS_KEY_ID= go run ./cmd/whcp doctor production ``` -S3-compatible raw payload storage is strict when enabled: inbound success -requires the object write plus PostgreSQL metadata commit. Production S3 mode -must define the endpoint, bucket, access key, secret key, and TLS posture: - ```bash WEBHOOKERY_ENVIRONMENT=production WEBHOOKERY_RAW_STORAGE_MODE=s3 @@ -127,38 +121,81 @@ WEBHOOKERY_OBJECT_STORAGE_USE_SSL=true go run ./cmd/whcp doctor production ``` +Local development should use `.env.example`, +`WEBHOOKERY_ENVIRONMENT=development`, and Docker Compose. Do not treat the +production doctor as the local development gate. + +## Pilot Doctor Runbook + +Use the pilot doctor before a bounded commercial evaluation or provider proof +run: + +```bash +go run ./cmd/whcp doctor pilot --no-network +``` + +`--no-network` keeps the check local: configuration, key custody shape, +raw-storage config, bootstrap-key risk, provider-proof manifest presence, and +explicit skipped network checks are reported without contacting PostgreSQL, +object storage, receivers, or live providers. + +To include safe PostgreSQL metadata checks, omit `--no-network` only when +`WEBHOOKERY_DATABASE_URL` points at the intended disposable or pilot database: + +```bash +go run ./cmd/whcp doctor pilot --timeout 3s +``` + +Network-enabled pilot doctor checks include: + +- PostgreSQL connectivity; +- applied migration count compared with repository migration files; +- durable outbox pending and in-progress counts; +- active retention policy count; and +- audit-chain entry presence. + +Receiver connectivity is skipped unless both +`WEBHOOKERY_PILOT_RECEIVER_CHECK_URL` and +`WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK=true` are set. The receiver URL is +validated through the SSRF policy before a `HEAD` request is attempted, and the +doctor output does not print the URL. + +The pilot doctor does not replace `make rc-check`, `make finalize`, restore +drills, audit-chain verification, object-store read/write drills, or manual +live-provider proof guides. + ## Production RC Checklist -The release-candidate target is a single-region self-hosted deployment with -PostgreSQL as the source of truth. It is production-respectable only when the -core product checks, failure drills, restore rehearsal, and operator preflight -are repeatable. It is not a certification, hosted SLA, or exactly-once claim. - -Before promotion, complete this checklist: - -- `go run ./cmd/whcp doctor production` exits with no `blocker` findings. - Warnings require an explicit operator decision. -- `make finalize` passes on the release candidate commit. -- `make rc-check` passes without live third-party provider, AWS, Vault, Slack, - PagerDuty, SIEM, or customer receiver calls. -- `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check` passes against a - disposable PostgreSQL database. This runs migrations, Postgres integration, - and DB-backed RC E2E flows serially. -- `WEBHOOKERY_RC_RESTORE_DATABASE_URL=postgres://...` is set to a separate - disposable restore database and the restore drill passes before upgrades that - touch migrations, audit chain behavior, retention, exports, or storage. -- Object storage, if enabled, has its own backup/restore procedure; PostgreSQL - dumps do not contain S3 object bodies. -- Bootstrap API key posture is reviewed: the bootstrap hash is removed, - rotated, or restricted after a database-backed owner/security key exists. -- `/readyz`, `/v1/ops/storage`, `/v1/ops/config`, `/v1/ops/queues`, - `/v1/ops/metrics`, alert firings, and audit-chain verification are checked - after deployment. - -Expected local RC command sequence: +Before promoting a release candidate: + +1. Run `go run ./cmd/whcp doctor production`. Fix blockers. +2. Run `go run ./cmd/whcp doctor pilot --no-network`, then a network-enabled + pilot doctor only against a disposable or intended pilot database. +3. Run `make finalize` on the release candidate commit. +4. Run `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check` + against a disposable PostgreSQL database. +4. Run non-live `make rc-check`. It must not require live third-party provider, + AWS, Vault, Slack, PagerDuty, SIEM, or customer receiver calls. +5. Run DB-backed `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check` + against a disposable database. +6. For migration, storage, retention, export, or audit-chain changes, set + `WEBHOOKERY_RC_RESTORE_DATABASE_URL=postgres://...` to a separate disposable + restore database before `make rc-check`. +7. Verify `/readyz`, `/v1/ops/storage`, `/v1/ops/config`, `/v1/ops/queues`, + `/v1/ops/metrics`, alert firings, and audit-chain verification after + deployment. +8. Remove, rotate, or restrict bootstrap API key posture after a database- + backed owner or security key exists. + +Record release results in `docs/release-evidence-template.md`. Keep that +template as the canonical evidence checklist; this runbook only explains the +operator flow. + +Expected local RC sequence: ```bash docker compose up -d postgres +WEBHOOKERY_TEST_DATABASE_URL=postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable make live-postgres-check WEBHOOKERY_TEST_DATABASE_URL=postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable make rc-check ``` @@ -168,8 +205,7 @@ A successful run ends with: rc-check: release-candidate acceptance checks passed ``` -Restore drills require a separate disposable restore database URL. The source -and restore URLs must not point at the same database: +Restore drills require a separate disposable restore database: ```bash WEBHOOKERY_TEST_DATABASE_URL=postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable \ @@ -177,23 +213,41 @@ WEBHOOKERY_RC_RESTORE_DATABASE_URL=postgres://webhookery_restore:change-me@local make rc-check ``` -Use whatever local PostgreSQL topology provides that second URL; the drill is -destructive for the restore target and the restore script refuses to run -without `WEBHOOKERY_RESTORE_CONFIRM=restore`. +The restore target is destructive. The restore script refuses to run without +`WEBHOOKERY_RESTORE_CONFIRM=restore`. + +For a repeatable wrapper that backs up a source database, restores into an +explicitly separate disposable target, applies migrations, and writes sanitized +JSON evidence, run: + +```bash +WEBHOOKERY_DATABASE_URL=postgres://source \ +WEBHOOKERY_RESTORE_DRILL_DATABASE_URL=postgres://disposable-restore \ +make restore-drill +``` + +The wrapper writes `tmp/restore-drill/restore-drill.json` and does not print +database URLs. PostgreSQL restore drills do not verify S3 or MinIO object +bodies; record that storage drill separately when object storage is enabled. + +Failure drill planning lives in `docs/failure-drills.md`. ## Upgrade And Restore Drill -For production upgrades, rehearse the restore path before changing live -storage, migrations, retention policies, audit-chain code, or key-custody mode. +Back up PostgreSQL before upgrading, changing retention policies, rotating +master-key material, changing secret custody mode, or enabling object storage. +Use `docs/schema-migrations.md` for schema ordering, evidence-authority tables, +and restore compatibility review. 1. Stop API, worker, and scheduler processes for the restore target. -2. Back up the source PostgreSQL database: +2. Create a PostgreSQL custom-format dump: ```bash WEBHOOKERY_DATABASE_URL=postgres://... scripts/backup_postgres.sh backups ``` Expected result: a `backups/webhookery-.dump` path is printed. + The script uses restrictive file permissions through `umask 077`. 3. Restore into a fresh disposable database: @@ -209,90 +263,135 @@ storage, migrations, retention policies, audit-chain code, or key-custody mode. WEBHOOKERY_DATABASE_URL=postgres://... go run ./cmd/whcp migrate up ``` -5. Verify restored evidence surfaces: +5. Start API and workers, then verify: ```bash + curl -fsS http://localhost:8080/readyz go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" - go run ./cmd/whcp audit verify-bundle --file evidence.tar.gz + go run ./cmd/whcp ops storage --api-key "$WEBHOOKERY_API_KEY" + go run ./cmd/whcp ops queues --api-key "$WEBHOOKERY_API_KEY" ``` - Expected result: the chain verification is valid, and bundle verification - reports valid file hashes and audit-chain continuity. +PostgreSQL dumps do not contain S3 object bodies. When S3-compatible raw +payload storage is enabled, restore object storage through the bucket provider +and verify that metadata still points to available objects. + +Do not restore over live state until the drill succeeds. Preserve failed +restore databases for investigation when audit-chain, event, delivery, or +export evidence does not verify. + +## Durable Capture Checks -6. Start API and workers, then check: +Use these checks when providers receive non-2xx responses, operators suspect +loss, or storage health changes: + +1. Check API readiness: ```bash curl -fsS http://localhost:8080/readyz + ``` + +2. Check storage and queues: + + ```bash go run ./cmd/whcp ops storage --api-key "$WEBHOOKERY_API_KEY" go run ./cmd/whcp ops queues --api-key "$WEBHOOKERY_API_KEY" ``` -The automated RC restore drill in `internal/e2e` creates evidence, backs up -PostgreSQL, restores to a fresh DB, migrates, and verifies event, export, and -audit-chain readability. It intentionally treats object bodies as outside the -PostgreSQL dump scope. - -## Incident Triage +3. Inspect a captured event: -Use the control-plane evidence before assuming provider loss or downstream -success. Inbound 2xx only means durable capture. + ```bash + go run ./cmd/whcp events timeline --event-id evt_... --api-key "$WEBHOOKERY_API_KEY" + ``` -| Symptom | First checks | Expected operator action | -| --- | --- | --- | -| Provider receives non-2xx or no ack | `/readyz`, API logs, PostgreSQL availability, `WEBHOOKERY_RAW_STORAGE_MODE`, object-store health in S3 mode | Restore durable storage first; do not force 2xx while capture is unavailable. | -| Invalid signatures or quarantine growth | `go run ./cmd/whcp events timeline --event-id evt_...`, quarantine list, source secret versions | Verify exact raw-body signing settings, timestamp windows, and secret rotation grace periods before replaying. | -| DLQ growth or receiver failures | `go run ./cmd/whcp ops queues`, `go run ./cmd/whcp alerts firings`, delivery attempts, endpoint circuit state | Fix receiver/SSRF/TLS errors, then release DLQ entries with a reason. | -| Replay backlog affects live traffic | `go run ./cmd/whcp ops queues`, replay job status and rate limits | Pause or rate-limit replay; live deliveries should remain prioritized. | -| Audit chain verification failure | `go run ./cmd/whcp audit verify-chain`, latest audit exports, retention run records | Stop retention/export changes, preserve database state, and investigate mismatched or missing entries before anchoring. | -| SIEM or notification egress failures | `go run ./cmd/whcp notification-deliveries list --state failed`, `go run ./cmd/whcp siem-deliveries list --state failed` | Fix receiver availability/signature config and retry; cursors must not be advanced manually. | -| Reconciliation gaps | `go run ./cmd/whcp reconciliation-jobs items --job-id rec_...` | Distinguish `captured`, `redelivery_requested`, `unrecoverable`, and provider limitation evidence before claiming recovery. | -| Restore uncertainty | Restore into a disposable DB and run audit-chain plus bundle verification | Do not restore over live state until the drill succeeds and object-storage backup scope is understood. | +4. If raw payload access is required, use a scoped key and record the reason: -## Explicit Non-Goals + ```bash + go run ./cmd/whcp events raw-export --event-id evt_... --output payload.bin --api-key "$WEBHOOKERY_API_KEY" + ``` -The implemented core product does not claim: +Raw payload retrieval is an elevated action and emits audit evidence. Store +exported payloads with the same care as customer data, and delete local copies +when the investigation is complete. -- exactly-once delivery or provider-side event completeness; -- multi-region active-active coordination; -- external timestamping, HSM/PKCS#11 custody, or compliance-certified evidence - packs; -- live Stripe/GitHub/Shopify/Slack/AWS/Vault calls in release acceptance; -- Kafka, NATS, Redis, or object storage as the authority for accepted event - evidence; -- arbitrary code plugins, visual workflow builders, marketplace distribution, - GraphQL, SAML assertion processing, or vendor-specific alert integrations. +Do not force a 2xx response while durable capture is unavailable. For S3 mode, +restore both object-write health and PostgreSQL commit health before accepting +new ingress as healthy. -## Backup And Restore +## Audit Evidence Runbook -PostgreSQL is the authoritative metadata store for accepted events, receipts, -deliveries, audit rows, reconciliation evidence, retention state, and outbox -work. Back up PostgreSQL before upgrading, changing retention policies, -rotating master-key material, or enabling object storage. +Audit events are chained per tenant with SHA-256. The chain records hashes, +previous hashes, sequence state, source, and tombstone metadata; it does not +duplicate raw payloads, credentials, or payload bodies. -Create a custom-format PostgreSQL dump with: +Verify chain status: ```bash -WEBHOOKERY_DATABASE_URL=postgres://... scripts/backup_postgres.sh backups +go run ./cmd/whcp audit chain-head --api-key "$WEBHOOKERY_API_KEY" +go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" ``` -The script writes a timestamped `webhookery-*.dump` file with owner/group -permissions restricted by `umask 077`. It requires `pg_dump` on the operator -machine and does not include S3-compatible object bodies; object storage must be -backed up through the bucket provider. +Create an operator anchor only after verification succeeds: + +```bash +go run ./cmd/whcp audit anchor --reason "daily anchor" --api-key "$WEBHOOKERY_API_KEY" +go run ./cmd/whcp audit anchors --api-key "$WEBHOOKERY_API_KEY" +``` -Restore into an already provisioned PostgreSQL database with: +Verify downloaded evidence bundles locally: ```bash -WEBHOOKERY_DATABASE_URL=postgres://... WEBHOOKERY_RESTORE_CONFIRM=restore scripts/restore_postgres.sh backups/webhookery-20260525T000000Z.dump +go run ./cmd/whcp audit verify-bundle --file evidence.tar.gz ``` -The restore script requires an explicit confirmation environment variable and -uses `pg_restore --clean --if-exists`. Stop API and worker processes before -restoring so no process writes new evidence into a partially restored database. -After restore, run `whcp migrate up`, start the API and workers, then check -`/readyz`, `/v1/ops/metrics`, and a recent event timeline. +If verification fails: + +1. Stop retention, export, replay, and migration work that might modify + evidence state. +2. Preserve the database and relevant object-storage state. +3. Compare the latest audit export, retention run records, and chain head. +4. Do not anchor a failing range. +5. Restore into a disposable database before attempting repair. -## Cryptography And Secrets +Audit retention may leave hash-only tombstones. Missing non-retained rows and +mismatched hashes are failures. + +## Incident Triage + +| Symptom | First checks | Required action | +|---------|--------------|-----------------| +| Provider receives non-2xx or no ack | `/readyz`, API logs, PostgreSQL availability, `WEBHOOKERY_RAW_STORAGE_MODE`, object-store health in S3 mode | Restore durable storage first. Do not force 2xx while capture is unavailable. | +| Invalid signatures or quarantine growth | Event timeline, quarantine state, source secret versions | Verify exact raw-body signing settings, timestamp windows, and secret rotation grace periods before replaying. | +| DLQ growth or receiver failures | `whcp ops queues`, alert firings, delivery attempts, endpoint circuit state | Fix receiver, SSRF, TLS, or signing errors, then release DLQ entries with a reason. | +| Replay backlog affects live traffic | Queue depth, replay job status, replay rate limits | Pause or rate-limit replay. Live delivery work should remain prioritized. | +| Audit-chain verification failure | `whcp audit verify-chain`, latest exports, retention run records | Stop evidence-mutating work, preserve state, investigate before anchoring. | +| SIEM or notification egress failures | Notification and SIEM delivery failure lists | Fix receiver availability or signature config; do not advance cursors manually. | +| Reconciliation gaps | Reconciliation job items | Distinguish captured, redelivery-requested, unrecoverable, and provider limitation evidence before claiming recovery. | +| Restore uncertainty | Disposable restore plus audit-chain and bundle verification | Do not restore over live state until the drill succeeds. | + +## Recovery Notes + +- Provider retries and provider APIs have provider-specific limits. Do not claim + provider-side completeness unless current official provider docs and local + reconciliation evidence prove it for the case. +- Replay creates new delivery work linked to existing evidence. It does not + mutate original event history. +- Duplicate events remain visible. Dedupe may suppress processing, but it must + not erase receipt evidence. +- Queue outage must not lose accepted events; durable outbox state remains in + PostgreSQL. +- Object storage outage in S3 mode is an ingress durability problem, not only a + delivery problem. + +## Explicit Non-Goals + +The production RC posture does not claim exactly-once delivery, provider-side +event completeness, managed-service availability, multi-region active-active +operation, compliance certification, legal evidentiary certification, external +timestamping, or recovery of every provider-side event. Use +`docs/security-promise.md` as the canonical non-claims reference. + +## Cryptography And Secret Handling Inbound provider adapters use HMAC-SHA256 where provider semantics require it: Stripe, GitHub, Shopify, Slack, and generic HMAC. Outbound delivery signing @@ -302,589 +401,23 @@ uses the `Webhook-Signature` header with HMAC-SHA256 over: timestamp + "." + raw_delivery_body ``` -Receivers should verify the exact raw delivery body with -`pkg/verifier.VerifyWebhookerySignature`, a five-minute timestamp window unless -their own policy requires a smaller window, and their endpoint's active or -grace-period signing secret. `Webhook-Signature-Key-Id` and -`Webhook-Signature-Key-Version` are metadata for selecting and auditing the -receiver-side secret version; they are not a substitute for HMAC verification. - -Webhook/source secrets and endpoint signing secrets are stored through an -envelope encryption interface. `WEBHOOKERY_SECRET_BOX_MODE=local` is the -default and requires `WEBHOOKERY_MASTER_KEY_BASE64` to be a base64-encoded -32-byte key at runtime. `WEBHOOKERY_SECRET_BOX_MODE=vault-transit` uses a -Vault Transit-compatible HTTP API configured with `WEBHOOKERY_VAULT_ADDR`, -`WEBHOOKERY_VAULT_TOKEN`, and `WEBHOOKERY_VAULT_TRANSIT_KEY`; Vault encrypts -and decrypts secret material while PostgreSQL stores only wrapped -`vault-transit:` ciphertext. - -`WEBHOOKERY_SECRET_BOX_MODE=aws-kms` uses AWS KMS envelope encryption with -`WEBHOOKERY_AWS_REGION`, `WEBHOOKERY_AWS_KMS_KEY_ID`, and optional -`WEBHOOKERY_AWS_KMS_ENDPOINT` for LocalStack-style tests. Webhookery calls -AWS KMS `GenerateDataKey`, encrypts the secret locally with AES-GCM, stores the -encrypted data key beside the ciphertext, and calls `Decrypt` only to unwrap -that data key later. This follows the AWS KMS documented envelope pattern of -using `Plaintext` data keys outside KMS, storing `CiphertextBlob`, and erasing -plaintext data keys after use: -https://docs.aws.amazon.com/kms/latest/APIReference/API_GenerateDataKey.html -and https://docs.aws.amazon.com/kms/latest/APIReference/API_Decrypt.html. -The adapter is built with AWS SDK for Go v2: -https://docs.aws.amazon.com/sdk-for-go/v2/developer-guide/getting-started.html. -Switching modes does not re-encrypt existing rows automatically, so plan a -controlled migration if moving live tenants between local, Vault-backed, and -AWS KMS-backed envelopes. `whcp key-custody test` performs a local -encrypt/decrypt smoke test for the configured mode without printing plaintext, -ciphertext, or full key ids. Example env files contain placeholders only. -Logs, errors, metrics, and UI surfaces must not print raw API keys, webhook -secrets, Vault tokens, AWS credentials, KMS key ids in full, signatures, bearer -tokens, or raw payloads by default. - -Source verification secrets and endpoint signing secrets are versioned. -Rotation creates a new active version and moves the prior active version to -`previous` with a bounded grace period. Provider verification tries active and -non-expired previous source secrets against the exact raw request bytes. -Outbound deliveries sign with the current active endpoint signing secret and -include `Webhook-Signature-Key-Id` plus `Webhook-Signature-Key-Version` headers -so receivers can audit which key version signed a request. Plaintext secret -values are not returned by API, CLI, or UI responses. - -Source reads, updates, and deletes are tenant-scoped. `DELETE /v1/sources/{id}` -and `whcp sources delete` disable the source instead of deleting historical -events, receipts, raw payload metadata, or audit evidence. Disabled sources are -rejected before capture and routing; re-enable by patching the source state -back to `active` with a reason. - -Endpoint reads, updates, and deletes are tenant-scoped. `PATCH -/v1/endpoints/{id}` and `whcp endpoints update` can change endpoint metadata, -state, URL, and retry policy references with an operator reason. URL changes -rerun the same SSRF policy used at creation before metadata is committed. -`DELETE /v1/endpoints/{id}` and `whcp endpoints delete` disable future delivery -claims without deleting historical deliveries, attempts, payload snapshots, or -audit evidence. Endpoint signing secrets and mTLS key material are managed -through their dedicated rotation/create paths and are never returned by these -metadata APIs. - -Subscription reads, updates, and deletes are tenant-scoped. Updating a -subscription can change its endpoint, event types, payload format, -transformation reference, or state, and writes a new immutable subscription -version plus audit event. Endpoint references are checked against active -endpoints in the same tenant before subscription creation or endpoint changes. -`DELETE /v1/subscriptions/{id}` and `whcp subscriptions delete` disable future -fanout without deleting historical delivery or config-version evidence. - -Route reads, updates, and deletes are tenant-scoped. Route create/update checks -source and endpoint references against active resources in the same tenant, -resolves active transformation versions when configured, and writes a new -immutable route version for each mutation. `DELETE /v1/routes/{id}` and `whcp -routes delete` move the route to `inactive`; historical route decisions, -delivery rows, replay receipts, and config versions are retained. - -Retry policy reads, updates, and deletes are tenant-scoped under the routes -permission family. `PATCH /v1/retry-policies/{id}` and `whcp retry-policies -update` create a new retry policy version row from the referenced policy -instead of rewriting it in place. `DELETE /v1/retry-policies/{id}` disables -future use of the referenced policy row while retaining delivery and audit -evidence that already points at it. - -Event type and event schema reads are tenant-scoped under the schemas -permission family. Operators can list event types, list schemas for an event -type, and fetch a specific schema version through both API and CLI before -running validation or compatibility checks. Event type lifecycle mutations -require `schemas:write` and an operator reason; event type names are immutable, -and delete moves the event type to `disabled` without deleting historical -schema, config-version, delivery, or audit evidence. Event schema bodies and -versions are immutable after creation. Schema lifecycle updates can move a -schema through `active`, `deprecated`, and `retired`; delete moves the schema -to `retired`. Schema state changes are tenant-scoped, audited, and recorded as -new config versions so later validation and replay evidence can identify which -schema state was in force. - -Endpoints may also be created with a PEM client certificate and private key for -outbound mTLS. The API accepts `mtls_client_cert_pem` and -`mtls_client_key_pem` together, validates that they form a client certificate -pair, stores both through envelope encryption, and returns only -`mtls_enabled` plus certificate subject metadata. Delivery workers decrypt the -material at claim time and fail closed with `client_certificate_error` if the -stored pair is invalid. Redirects remain disabled. - -## Authentication And Authorization - -Normal operation uses database-backed API keys. API key rows store only -`sha256:` token hashes, key prefixes, last four characters, scopes, state, and -membership linkage. Users and memberships are tenant-scoped, and authorization -requires both the membership role and the key scope. The bootstrap API key hash -exists only to create or recover database-backed keys; remove or rotate it for -production-style operation. - -## Inbound Acknowledgement - -Inbound provider endpoints may return success only after raw body bytes, -headers, request metadata, verification result, event metadata, dedupe result, -and durable outbox work are committed. A downstream delivery success is never -implied by inbound 2xx. The API rejects ingress requests above the 2 MiB body -limit with HTTP 413 and requests with more than 128 header pairs, more than -64 KiB of header name/value bytes, or any single header value above 8 KiB with -HTTP 431 before source lookup or capture work starts. - -Provider-specific behavior checked on May 25, 2026: - -- Slack `url_verification` requests must be authenticated and answered with the - received `challenge` value: https://docs.slack.dev/reference/events/url_verification -- CloudEvents HTTP supports binary header attributes and structured JSON - envelopes with `application/cloudevents+json`: - https://github.com/cloudevents/spec/blob/v1.0.2/cloudevents/bindings/http-protocol-binding.md - -The generic JWT/JWS ingress adapter is intentionally narrow. It accepts compact -JWTs from `Authorization: Bearer ...` or `Webhook-JWT`, supports only HS256 -with the source verification secret, rejects `alg=none` and other algorithms, -requires `exp`, honors `nbf` and future `iat`, and requires `body_sha256` to -match the exact raw request body captured by Webhookery. - -## Delivery Worker - -The worker claims durable outbox rows with database leases, evaluates active -subscriptions and routes, creates delivery jobs, then claims scheduled -deliveries. Delivery attempts are signed, recorded, retried on retryable -failures, and moved to the dead-letter table after terminal failure. -Worker leases are refreshed in PostgreSQL when outbox or delivery work is -claimed. Outbox and delivery claim batches use a tenant-fair ordering in -PostgreSQL: live route work is selected before replay and reconciliation work, -live deliveries are selected before replay deliveries, and each priority class -round-robins by tenant before taking additional work from the same tenant. - -Routes are snapshotted in `route_versions`, subscriptions are snapshotted in -`subscription_versions`, and decisions attach `route_version_id` or -`subscription_version_id` to delivery evidence. Retry policies are -tenant-scoped, versioned resources; endpoints and routes can reference a -policy, and deliveries retain the selected `retry_policy_id`. If no policy is -selected, the implemented default remains 12 attempts over a 72-hour maximum -with full-jitter exponential backoff between 10 seconds and 6 hours. Each -delivery stores a `retry_seed`, and each retryable delivery attempt records the -deterministic jitter delay and `next_retry_at` chosen from that seed. - -Replay jobs create new delivery work linked to the original event or delivery. -Replay never mutates the original event evidence. - -Replay jobs can be created with `config_mode=current` or -`config_mode=original` and an optional `rate_limit_per_minute`. Current-mode -event replay evaluates current active subscriptions and routes. Original-mode -event replay clones the event's recorded non-replay delivery decisions and -preserves their route, subscription, and retry policy evidence. Replay-created -deliveries are marked with `replay_job_id`, scheduled according to the replay -rate limit, and ordered behind live due deliveries when workers claim delivery -work. Replay jobs can be paused, resumed, or canceled through the API/CLI. -Paused jobs keep durable outbox work uncompleted until they are resumed. -Replay jobs can also be created with `require_approval=true`; those jobs stay -in `pending_approval`, do not enqueue replay delivery work, and require -`POST /v1/replay-jobs/{replay_job_id}:approve` or -`whcp replay-jobs approve` with replay write permission and a reason before -workers can process them. The approval records approver metadata and a chained -audit event. This is a single approval gate, not a two-person approval workflow. -Dead-letter entries can be released one at a time or in bounded bulk batches. - -Authenticated operators with `ops:read` can inspect runtime worker leases and -tenant-scoped queue depth through `GET /v1/ops/workers`, -`GET /v1/ops/workers/{worker_id}`, `GET /v1/ops/queues`, `GET -/v1/ops/storage`, `GET /v1/ops/config`, and `whcp ops -workers|worker|queues|rollups|storage|config`. Worker status exposes only lease metadata (`worker_id`, -active/expired state, last seen time, and expiry). Queue stats are scoped to the -actor tenant and report durable outbox kinds plus the delivery queue with -pending, in-progress, terminal/completed, due-now, oldest pending age, and next -scheduled timestamps. Storage status reports tenant-scoped payload/evidence -counts, storage backends, and stored-byte totals. Runtime config reports only -safe metadata such as environment, UI enabled state, raw storage mode, secret -box mode, and request limits. These APIs do not expose payload bodies, endpoint -URLs, database URLs, object-store credentials, API keys, webhook secrets, master -keys, Vault tokens, or tenant labels on public metrics. - -The scheduler also writes derived one-minute operational rollups to -`metrics_rollups`. Operators can query them through -`GET /v1/ops/metrics/rollups` or `whcp ops rollups`, optionally filtering by -`metric_name`. Rollups cover queue depth and age, delivery/replay/reconciliation -states, open DLQ/quarantine counts, endpoint failure-rate summaries, and audit -chain status. They are dashboard and alert inputs only; the underlying event, -delivery, audit, retention, and reconciliation rows remain authoritative. - -Alert rules are stored in `alert_rules` and evaluated by the scheduler against -recent rollups. Supported rule types are open DLQ, open quarantine, endpoint -failure rate, open endpoint circuit, oldest outbox age, expired worker leases, -audit-chain verification failures, and reconciliation failed/unrecoverable -items. A breached rule opens one `alert_firings` row until the condition -resolves; acknowledged firings stay unique per rule and then resolve when the -metric clears. Reads require `ops:read`; create, update, disable, and -acknowledge require `ops:write` plus an operator reason for disabling and -acknowledging. Alert APIs and UI views do not expose payload bodies, endpoint -secrets, provider credentials, or tenant labels on public `/metrics`. - -## Provider Reconciliation - -Provider reconciliation jobs are implemented for cases where a provider API can -show provider-side event or delivery evidence that may not exist locally. -Provider API credentials are stored in `provider_connections` through the same -envelope encryption interface used for webhook and endpoint secrets. API and -CLI responses expose only `credential_type`, a redacted `credential_hint`, -provider name, state, timestamps, and provider-specific configuration metadata. - -Create and verify connections with `/v1/provider-connections` or -`whcp provider-connections`. Create reconciliation jobs with -`/v1/reconciliation-jobs` or `whcp reconciliation-jobs`. Job creation and -cancelation require replay/recovery write permission and a reason. Reads require -replay read permission. Jobs and items are tenant-scoped. - -Implemented provider behavior checked on May 25, 2026: - -- Stripe event reconciliation uses the Events API. Stripe documents event - list/retrieve access for events going back up to 30 days: - https://docs.stripe.com/api/events/list and - https://docs.stripe.com/api/events -- GitHub repository webhook reconciliation uses repository webhook deliveries - and redelivery attempts: - https://docs.github.com/en/rest/repos/webhooks and - https://docs.github.com/en/webhooks/testing-and-troubleshooting-webhooks/viewing-webhook-deliveries -- Shopify is represented as capability/gap evidence only. Shopify recommends - reconciliation jobs by polling relevant resources with `updated_at` filters, - but generic missed-webhook payload recovery is topic-specific: - https://shopify.dev/docs/apps/build/webhooks -- Slack is represented as capability/gap evidence only. Slack Events API - delivery is best-effort with bounded retries and does not provide a generic - missed-event recovery feed in this implementation: - https://docs.slack.dev/apis/events-api/ - -Reconciliation item outcomes are `matched`, `missing`, `captured`, -`redelivery_requested`, `unrecoverable`, and `failed`. Missing Stripe events -and GitHub delivery payloads are captured only when `capture_missing=true` and -the provider API returned a recoverable payload body. Recovered events use -`verification_reason=provider_api_reconciliation`; they are not marked as -signed webhook deliveries. They route only when `route_recovered=true`, and the -durable recovered event capture commits before any delivery work is created. - -Provider API call evidence is stored in `provider_api_evidence` with request -method, redacted request URL, response status, response hash, size, storage -status, and optional response body. Provider API response bodies are sensitive -payload data and require `events:raw` through export body inclusion controls. -Provider tokens are not written to audit events, job items, logs, UI tables, or -export metadata. - -Endpoint health is derived from recorded delivery attempts. Repeated failures -open a lightweight endpoint circuit and delay further delivery attempts for a -short cooling period; delivery-time SSRF validation still runs for every -attempt. - -Endpoint test requests create a signed synthetic `webhookery.endpoint.test` -delivery and preserve the test event, dedupe row, delivery row, and audit -record. - -## Reproducible Configuration - -`config_versions` records immutable JSON snapshots and hashes for sources, -endpoints, subscriptions, routes, retry policies, schemas, transformation -versions, and secret-version metadata when those resources are created or -rotated through the implemented code paths. `route_versions` and -`subscription_versions` store the fields used for matching and delivery -creation, including optional `transformation_id` and -`transformation_version_id`. Retry schedule evidence is reproducible from the -stored delivery `retry_seed`, retry policy, attempt number, and recorded -attempt timestamps. - -## Normalization And Transformations - -Verified inbound events are normalized into `normalized_envelopes` after raw -body capture and provider verification. Raw payloads remain authoritative: -normalization does not replace raw evidence and unverified requests do not -produce routed normalized payloads by default. Normalized event metadata is -available through `GET /v1/events/{event_id}/normalized` with `events:read`; -including normalized data requires `events:raw` and emits an audit event. - -Built-in adapter versions are recorded in `provider_adapters` and -`adapter_versions`. Each normalized envelope stores the selected -`adapter_version_id`, provider identifiers, stable hashes for the envelope, -data, and metadata, and retention state. Existing verified events are backfilled -as `legacy_metadata_only` envelopes so historical event metadata remains visible -without inventing payload data. - -Tenant custom adapter governance is available through `/v1/adapters` and -`whcp adapters`. Custom adapter rows are tenant-scoped and have immutable -versions that move through `draft`, `automated_tests`, `security_review`, -`staging_approved`, `active`, `deprecated`, and `retired`. Active declarative -HMAC-SHA256 adapters can verify inbound requests using exact raw bytes, -configured signature/timestamp headers, and replay windows; normalization uses -the stored declarative metadata and data selectors. Declarative definitions and -plugin package metadata are stored with SHA-256 hashes, provenance fields, and -test-vector hashes; definitions that contain secret-shaped fields are rejected. -Code-plugin package metadata can be registered for review, but Webhookery does -not execute custom plugin code in this slice. Adapter state transitions require -`security:write`, a reason, and write audit events. - -Transformations are tenant-scoped configuration resources managed through -`/v1/transformations` and `whcp transformations`. A transformation version is -immutable and declarative. Implemented operations are JSON Pointer based only: -`set`, `copy`, `drop`, and `redact`. Transformations cannot change provider -evidence, verification fields, tenant/source identifiers, hashes, or audit -metadata. There is no arbitrary scripting, network access, plugin marketplace, -or custom runtime in this slice. - -Routes and subscriptions may reference an active transformation. New delivery -work snapshots the exact transformed outbound bytes into `delivery_payloads` -before the delivery becomes claimable. Workers deliver and sign those stored -bytes instead of rebuilding payloads at claim time. Legacy deliveries without a -payload snapshot retain the previous fallback behavior. - -Replay with `config_mode=original` clones the original delivery payload and -evidence identifiers when available. Replay with `config_mode=current` -regenerates delivery payloads from the current active route, subscription, and -transformation configuration. - -Event schemas support a conservative JSON Schema subset for validation: -`type`, `required`, object `properties`, and array `items`. Compatibility -checks reject newly required fields, removed existing properties, and changed -property types. Unsupported advanced JSON Schema features are intentionally not -treated as compatibility proof. - -## Raw Payload Access - -Raw payload retrieval is an elevated action and emits an audit event. Operators -should keep raw retention shorter than metadata retention when payloads may -contain personal data. - -If a retention policy deletes a raw body or object, the body read returns HTTP -410. The event, receipt, delivery, hash, storage metadata, and audit evidence -remain queryable. - -## Retention Policies - -Retention policies are tenant-scoped and managed through -`/v1/admin/retention-policies` or `whcp retention`. Implemented policy resource -types are: - -- `raw_payload`: deletes PostgreSQL raw bodies or S3 objects after the policy - age, optionally scoped to a source. -- `normalized_envelope_data`: deletes normalized envelope and data JSON while - preserving envelope ids, provider metadata, hashes, event records, receipts, - deliveries, and audit rows. -- `delivery_payload`: deletes stored outbound delivery payload bodies while - preserving delivery ids, hashes, transformation evidence, attempts, and audit - rows. -- `provider_api_evidence`: deletes stored provider API response bodies while - preserving reconciliation jobs, gap items, request metadata, hashes, sizes, - and audit rows. -- `audit_event`: deletes audit rows after the policy age while preserving - `audit_chain_entries` as retained tombstones with hashes and sequence - metadata. - -The worker applies active policies in bounded batches and records -`retention_runs` plus `retention_run_items`. Policy changes and completed runs -write chained audit events. Policies can be placed on `legal_hold` with a -`hold_reason`; held policies remain visible and auditable but are skipped by -the retention worker until the hold is cleared. - -## Audit Evidence Exports - -`POST /v1/audit-events:export` creates a tenant-scoped `tar.gz` bundle -synchronously for this implementation slice. The bundle contains -`manifest.json`, `audit_events.jsonl`, `payload_evidence.jsonl`, -`audit_chain_proof.jsonl`, and optional `timelines.jsonl` and -`raw_payloads.jsonl`. Reconciliation evidence is included in -`reconciliation_evidence.jsonl`. Payload evidence includes normalized envelope -metadata, delivery payload metadata, provider API evidence metadata, and hashes. -Raw payload bodies are included only with -`include_raw_payloads=true` when the actor has both `audit:read` and -`events:raw`. Normalized, delivery payload, and provider API response bodies are -included only with `include_payload_bodies=true` and the same permissions. -Actors without `events:raw` cannot see or download raw- or payload-body -inclusive exports. - -Each export row stores the bundle SHA-256, manifest SHA-256, file hashes, -storage backend, size, creator, completion timestamp, and audit-chain range -metadata. Export creation verifies the chain proof before marking an export -ready. `whcp audit verify-bundle --file evidence.tar.gz` checks tar entry -safety, manifest/file hashes, and audit-chain continuity in the downloaded -bundle. - -## Audit Chain Verification And Anchors - -Every audit event written through implemented API, CLI, worker, retention, -replay, export, reconciliation, and configuration paths is appended to a -tenant-scoped SHA-256 chain in the same transaction as the audit row. Chain -entries store the audit event hash, previous chain hash, current chain hash, -canonicalization version, source, state, and tombstone metadata. They do not -duplicate raw payloads, credentials, or payload bodies. +Webhook/source secrets, endpoint signing secrets, provider credentials, OIDC +client secrets, SCIM tokens, producer credentials, and mTLS private keys are +stored through an envelope encryption interface. `WEBHOOKERY_SECRET_BOX_MODE` +selects local, Vault Transit, or AWS KMS custody. Switching custody modes does +not re-encrypt existing rows automatically. -Existing audit rows are backfilled into deterministic per-tenant chains ordered -by `occurred_at, id` during store startup. Backfilled chains prove continuity -from the current database state; they cannot prove history from before the -chain feature existed. +Logs, errors, metrics, CLI output, UI surfaces, docs, support artifacts, and +release evidence must not include plaintext API keys, bearer tokens, webhook +secrets, source signatures, Vault tokens, AWS credentials, full KMS key IDs, +object-store secrets, private keys, raw payload bodies, provider headers, or +unnecessary PII. -Operators can inspect and verify the chain through: +Run: ```bash -go run ./cmd/whcp audit chain-head --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit verify-chain --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit anchor --reason "daily anchor" --api-key "$WEBHOOKERY_API_KEY" -go run ./cmd/whcp audit anchors --api-key "$WEBHOOKERY_API_KEY" -``` - -`GET /v1/audit-chain/head`, `POST /v1/audit-chain:verify`, -`GET /v1/audit-chain/anchors`, and -`GET /v1/audit-chain/anchors/{anchor_id}` require `audit:read`. -`POST /v1/audit-chain:anchor` requires `security:write` and a reason. -Anchor creation verifies the requested range first, then stores a manifest hash, -range, chain hash, actor, and reason. When S3-compatible object storage is -configured, the anchor manifest is also written to the object store; otherwise -the local PostgreSQL anchor row is the durable anchor record. - -Audit-event retention marks chain entries as retained tombstones before -deleting audit rows. Verification treats retained entries as hash-only evidence, -while missing non-retained audit rows or mismatched hashes are reported as -failures. This implementation does not integrate external timestamping -services, KMS/HSM signing, or compliance-certified evidence packs. Generic -signed HTTPS alert notification delivery and SIEM streaming are handled by the -operational signal egress worker described below. - -## Metrics And Readiness - -`/readyz` checks PostgreSQL. `/metrics` exposes aggregate Prometheus text -metrics without tenant labels. `/v1/ops/metrics` exposes tenant-scoped JSON -metrics for authenticated operators, including pending outbox count, oldest -outbox age, delivery states, replay states, open DLQ count, quarantine count, -open endpoint circuits, reconciliation job states, reconciliation item outcomes, -unchained audit-event count, audit-chain verification failure count, and newest -anchor age. `/v1/ops/storage` and `/v1/ops/config` provide redacted operational -status for storage and runtime configuration. `/v1/ops/metrics/rollups` exposes -tenant-scoped derived rollup buckets for authenticated operators. `/v1/alerts` -and `/v1/alert-firings` expose alert rule and firing state for authenticated -operators. - -## Operational Signal Egress - -Alert notification channels are generic HTTPS webhook receivers managed through -`/v1/notification-channels` and `whcp notification-channels`. Creation and -updates require `ops:write`; reads require `ops:read`. Channel signing secrets -are accepted only on create/update, encrypted at rest, and returned only as -non-sensitive metadata. Channel URLs use the same SSRF protections as customer -webhook endpoints: HTTPS, no embedded credentials, no redirects during sender -delivery, and delivery-time DNS/IP revalidation. - -Alert rules may include `channel_ids`. When a firing is opened, acknowledged, -or resolved, Webhookery stores one durable notification delivery per configured -active channel and transition. Notification payloads contain alert metadata -only: tenant id, firing id, alert rule id, transition, state, observed value, -threshold, reason, and timestamp. They do not include raw webhook payload -bodies, provider headers, endpoint credentials, or channel secrets. - -Notification deliveries are inspected through `/v1/notification-deliveries`; -attempts and manual retry controls are available through -`/v1/notification-deliveries/{delivery_id}/attempts`, -`/v1/notification-deliveries/{delivery_id}:retry`, and the matching -`whcp notification-deliveries` commands. The sender signs exact bytes as: - -```text -Webhookery-Signal-Timestamp: -Webhookery-Signal-Signature: t=,v1= +go run ./cmd/whcp key-custody test ``` -Failed notification sends retry from PostgreSQL state and eventually become -terminal `failed` deliveries. Public `/metrics` remains aggregate-only and does -not expose tenant labels. - -SIEM sinks are generic signed HTTPS receivers managed through `/v1/siem-sinks` -and `whcp siem-sinks`. Sink reads require `audit:read`; create, update, -disable, test, and delivery retry require `security:write`. Sink secrets are -encrypted at rest and returned only as non-sensitive metadata. - -The SIEM scheduler builds bounded JSONL batches from `audit_chain_entries` -joined with non-sensitive `audit_events` metadata when rows are still retained. -Each line includes sequence, audit event id, event hash, previous hash, chain -hash, canonicalization version, chain entry state/source, actor id, action, -resource, resource id, reason, and timestamp. Raw payload bodies, provider -headers, API keys, bearer tokens, and egress secrets are not included. - -Each sink stores a `cursor_sequence`. The worker may create a scheduled -delivery for entries after that cursor, but it advances the cursor only after -the signed HTTPS delivery succeeds. Failed deliveries retry from PostgreSQL -state and leave the cursor unchanged, making the stream resumable without -skipping audit-chain entries. - -## Enterprise Identity And Access - -Management API and UI access can use API keys or OIDC-backed sessions. API keys -remain the bootstrap and break-glass path. OIDC identity providers are -tenant-scoped resources managed through `/v1/identity-providers` and -`whcp identity-providers`; reads require `security:read`, and create, update, -test, or disable require `security:write`. Only Authorization Code + PKCE is -implemented. Client secrets are encrypted at rest and never returned by API, -CLI, or UI responses. - -The OIDC login flow starts at `/v1/auth/oidc/login?tenant_id=...&provider_id=...` -and completes at `/v1/auth/oidc/callback`. The callback validates state, nonce, -issuer, audience/client id, expiry, and the signed ID token before creating a -hashed `webhookery_session` cookie. Session cookies are HttpOnly, SameSite=Lax, -and marked Secure. Logout revokes the server-side session hash. -Disabling an identity provider revokes active sessions created through that -provider. Security operators can list and revoke tenant sessions through -`/v1/auth/sessions`; session token hashes are never returned. SAML assertion -processing is not implemented in this slice. - -SCIM provisioning is available at `/v1/scim/v2/Users` and -`/v1/scim/v2/Groups`. SCIM bearer tokens are created through -`/v1/scim-tokens` or `whcp scim-tokens create`, returned exactly once, and -stored only as SHA-256 hashes with prefix/last4 metadata. SCIM delete requests -deactivate users or groups instead of hard-deleting them. User deactivation -disables memberships and active sessions while preserving historical users, -memberships, audit events, and actor references. - -Resource-aware role bindings and access policy rules are available through -`/v1/role-bindings`, `/v1/access-policies`, `whcp role-bindings`, and -`whcp access-policies`. Existing fixed roles and scoped API keys remain the -compatibility baseline. Role bindings can scope roles by principal, resource -family, resource id, and environment. Access policy rules can explicitly allow -or deny actions for resource families/environments; deny rules take precedence -in explain output. `POST /v1/authz:explain` and `whcp authz explain` return a -redacted policy decision containing matched role, role binding, policy rule, -and reason without exposing sessions, provider tokens, secrets, or payload -bodies. - -Emergency recovery remains API-key based: keep a tightly controlled owner or -security-capable bootstrap/recovery key, rotate it after use, and audit every -identity or access-control change. Production operators should rotate OIDC -client secrets and SCIM tokens through the control API rather than editing -database rows. - -## Enterprise Producer Trust - -Product-event ingestion at `POST /v1/events` accepts three producer trust -mechanisms: API keys with `events:write`, OAuth client-credentials bearer -tokens, and verified producer mTLS identities. Producer credentials can be -source-bound; when `source_id` is set on the credential, the submitted event -body must contain the same `source_id` or ingestion is denied before the event -service is called. - -Producer OAuth clients are tenant-scoped resources managed through -`/v1/producer-clients` and `whcp producer-clients`. Reads require -`security:read`; create, update, disable, and secret rotation require -`security:write`. Client secrets are generated once, returned only in the -create/rotate response, and stored as SHA-256 hashes. Access tokens are opaque -bearer values, stored hashed, have no refresh tokens, default to a 15-minute -TTL, and may not exceed one hour. The public token endpoint is -`POST /v1/oauth/token` with `application/x-www-form-urlencoded` -`grant_type=client_credentials` and HTTP Basic client authentication only. -This matches the OAuth 2.0 client credentials grant shape in RFC 6749 section -4.4: https://www.rfc-editor.org/rfc/rfc6749#section-4.4. - -Producer mTLS identities are managed through `/v1/producer-mtls-identities` -and `whcp producer-mtls-identities`. They store public certificate metadata -only: SHA-256 fingerprint, subject/SAN metadata, validity timestamps, state, -and optional source binding. Private keys are never submitted or persisted. -To require app-side certificate verification, configure -`WEBHOOKERY_TLS_CERT_FILE`, `WEBHOOKERY_TLS_KEY_FILE`, and -`WEBHOOKERY_PRODUCER_MTLS_CLIENT_CA_FILE`. The server verifies peer -certificates against that CA before matching the fingerprint. This slice does -not trust proxy-supplied mTLS headers; deployments that terminate TLS before -the API process must use API-key or OAuth producer credentials until a future -trusted-proxy design is implemented. - -## SSRF Protection - -Customer endpoint URLs are treated as hostile input. Production endpoint -delivery requires HTTPS by default, rejects embedded credentials and private or -reserved IP destinations, re-resolves hostnames at delivery time, and does not -follow redirects unless an explicit audited policy allows it. +Expected result: the configured mode can encrypt and decrypt a marker without +printing plaintext, ciphertext, or full key IDs. diff --git a/docs/performance-envelope.md b/docs/performance-envelope.md new file mode 100644 index 0000000..ed4f4fe --- /dev/null +++ b/docs/performance-envelope.md @@ -0,0 +1,104 @@ +# Performance Envelope + +This document explains how to collect and interpret Webhookery performance +evidence for a controlled self-hosted release candidate. It is not a managed +service SLA, benchmark certification, or universal sizing guarantee. + +## Local Smoke Harness + +Run the local performance smoke against a disposable PostgreSQL database: + +```bash +docker compose up -d postgres +WEBHOOKERY_TEST_DATABASE_URL=postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable make perf-smoke +``` + +Expected result: + +```text +perf-smoke: wrote tmp/perf-smoke/perf-smoke.json +perf-smoke: wrote tmp/perf-smoke/perf-smoke.md +``` + +The smoke uses local fake Stripe-style signatures and a fake receiver. It does +not call live Stripe, GitHub, Shopify, Slack, AWS, Vault, SIEM, PagerDuty, or +customer receivers. + +The generated files contain aggregate timings and counts only. They must not +contain database URLs, endpoint URLs, secrets, raw signatures, raw payloads, +tenant IDs, provider tokens, or customer data. + +## What The Smoke Covers + +The current smoke records: + +- inbound ingest latency percentiles for verified provider events; +- delivery drain time and delivered-throughput estimate; +- replay create-and-drain time for a current-config replay; +- retry scheduling evidence for a retryable receiver failure; +- successful delivery count and error count. + +Use the output as release evidence that the core path still works under a small +local batch. Do not use it as a capacity promise for another deployment. + +## Sizing Inputs + +Production sizing depends on: + +- sustained inbound event rate and peak burst size; +- raw body size distribution, including the current 2 MiB ingress body limit; +- header count and header size; +- number of tenants, sources, endpoints, subscriptions, and routes; +- fanout ratio from each event to deliveries; +- receiver latency, timeout rate, and retry behavior; +- replay volume and replay rate limits; +- retention windows for raw payloads, normalized envelopes, delivery payloads, + provider API evidence, audit events, and exports; +- object storage mode and object-store latency when + `WEBHOOKERY_RAW_STORAGE_MODE=s3`; +- PostgreSQL CPU, memory, storage IOPS, WAL volume, autovacuum behavior, and + backup cadence. + +## Storage Growth + +Estimate storage from the evidence objects Webhookery keeps: + +| Area | Growth driver | +|------|---------------| +| Raw payloads | Inbound body size times accepted/rejected request count until raw retention deletes bodies. | +| Events and receipts | One event/receipt row per captured request plus duplicate evidence. | +| Delivery payloads | One payload snapshot per delivery until delivery payload retention deletes bodies. | +| Attempts | One row per delivery attempt, including retries and replay deliveries. | +| Audit chain | Append-only audit chain metadata and hashes; retained longer than audit event rows. | +| Exports | Bundle metadata in PostgreSQL and bundle bytes in configured export storage. | +| Reconciliation | Provider API evidence metadata and optional provider response bodies until retention. | + +S3-compatible raw storage reduces PostgreSQL body storage but does not remove +PostgreSQL metadata or backup requirements. PostgreSQL remains the evidence +authority. + +## PostgreSQL Notes + +For production pilots: + +- use managed or operator-backed PostgreSQL with backups and restore drills; +- keep WAL and disk alerts ahead of retention/export/replay peaks; +- review indexes and query plans when tenant, endpoint, route, delivery, audit, + and retention counts grow beyond smoke-test scale; +- run `WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check` + on disposable databases after migration changes; +- run restore drills before promoting schema or evidence-storage changes. + +Do not run performance gates against production databases. + +## Release Evidence + +Attach sanitized `tmp/perf-smoke/perf-smoke.json` and +`tmp/perf-smoke/perf-smoke.md` to the release evidence package when +`make perf-smoke` is run for a candidate. + +Record the machine shape, PostgreSQL version/configuration, object storage +mode, Webhookery commit, and whether the smoke used default Docker Compose or a +dedicated test database. Missing or skipped performance evidence should be +recorded as `blocked`, `fail`, or an accepted-risk exception in +`docs/release-evidence-template.md`. diff --git a/docs/pilot-evidence-template.md b/docs/pilot-evidence-template.md new file mode 100644 index 0000000..9888744 --- /dev/null +++ b/docs/pilot-evidence-template.md @@ -0,0 +1,124 @@ +# Pilot Evidence Template + +Use this template to record sanitized evidence from each Webhookery pilot. +Every completed pilot should produce one comparable packet that can be reviewed +without exposing secrets, raw payload bodies, provider signatures, customer +data, or production database URLs. + +Do not store completed sensitive pilot evidence in the public repository unless +it has been explicitly redacted and reviewed. + +## Summary + +- Pilot name: +- Owner: +- Date range: +- Webhookery version / commit: +- Commercial scope or agreement reference: +- Pilot topology accepted: `yes | no` +- Link to `docs/pilot-topology.md` review: + +## Deployment Topology + +- Deployment mode: `Docker Compose | Helm | other` +- Region: +- PostgreSQL provider / responsibility: +- Raw payload storage mode: `postgres | s3` +- Object storage drill completed: `yes | no | not applicable` +- TLS / ingress owner: +- Backup owner: +- Alert / incident owner: +- Known topology gaps: + +## Provider Connected + +List provider families and event types without real payload data. + +| Provider | Event families | Verification status | Live proof status | Notes | +| --- | --- | --- | --- | --- | +| Stripe | | | | | +| GitHub | | | | | +| Shopify | | | | | + +## Event Volume Range + +- Expected daily volume: +- Peak test volume: +- Retention target: +- Raw payload storage growth estimate: +- Explicit volume limits accepted: + +## Failure Scenario Tested + +- Scenario name: +- Event ID or incident ID: +- Downstream receiver behavior: +- Delivery attempts observed: +- DLQ state: +- Evidence path: + +## Replay Scenario Tested + +- Replay mode: +- Replay reason code: +- Replay reason: +- Actor / role: +- Approval requirement, if any: +- Replay result: +- Evidence path: + +## Evidence Packet + +- Incident ID: +- Incident report generated: `yes | no` +- Evidence bundle generated: `yes | no` +- Evidence bundle profile used: +- Bundle verification command: +- Verification result: +- Raw payload bodies included: `no | yes with elevated approval` +- Redaction review owner: + +## Audit Chain + +- `whcp audit verify-chain` result: +- `whcp audit verify-bundle` result: +- Audit-chain gaps: +- Evidence export ID: + +## Restore Drill + +- Restore drill status: `passed | failed | skipped` +- Database backup artifact: +- Restore target: +- Audit-chain verification after restore: +- Object storage caveat, if any: + +## Known Gaps + +| Severity | Gap | Owner | Mitigation | Expiry | +| --- | --- | --- | --- | --- | +| blocker / warning / note | | | | | + +## Accepted Risks + +- Risk: +- Owner: +- Expiry: +- Mitigation: +- Link to accepted-risk record: + +## Commercial Follow-Up + +- Fit recommendation: +- Production-readiness review needed: `yes | no` +- Commercial license exception needed: `yes | no` +- Support package recommendation: +- Custom integration work: +- Next action: + +## Review + +- Reviewed by: +- Review date: +- Linked `docs/pilot-review-checklist.md` decision: +- Sanitized issue or private tracker link: diff --git a/docs/pilot-feedback-template.md b/docs/pilot-feedback-template.md new file mode 100644 index 0000000..9c8ff6b --- /dev/null +++ b/docs/pilot-feedback-template.md @@ -0,0 +1,97 @@ +# Pilot Feedback Template + +Use this template for sanitized Webhookery evaluator and pilot feedback. + +Do not include secrets, raw payloads, customer data, provider credentials, +database URLs with passwords, private keys, raw signatures, bearer tokens, +session cookies, or exploit payloads. + +## Summary + +- Organization / team: +- Contact owner: +- Date: +- Webhookery version / commit: +- Deployment mode: +- Evaluation status: `not_started | running | blocked | completed | abandoned` + +## Environment + +- Hosting environment: +- PostgreSQL responsibility: +- Object storage mode: +- Secret-box mode: +- TLS / mTLS requirements: +- Expected event volume range: +- Retention requirements: + +## Provider Mix + +List providers and event types without real payload data. + +| Provider | Event families | Verification requirement | Reconciliation need | +| --- | --- | --- | --- | +| Stripe | | | | +| GitHub | | | | +| Shopify | | | | +| Slack | | | | +| Internal producers | | | | + +## Current Pain + +- Incident or failure mode: +- Existing replay process: +- Existing audit/evidence process: +- Existing self-hosting or procurement constraint: +- Why current tooling is insufficient: + +## Evaluation Results + +- Evaluator quickstart completed: `yes | no` +- Evidence demo completed: `yes | no` +- `make rc-check` completed: `yes | no` +- Production doctor completed: `yes | no` +- Provider conformance reviewed: `yes | no` +- Release evidence reviewed: `yes | no` + +## Blockers + +| Severity | Blocker | Evidence path | Desired outcome | +| --- | --- | --- | --- | +| blocker / warning / note | | | | + +## Security And Review Requirements + +- Required security review artifacts: +- Required deployment evidence: +- Required support or SLA expectations: +- Required commercial license scope: +- Data residency or retention constraints: + +## Commercial Intent + +- Need commercial license exception: `yes | no | unknown` +- Need paid support: `yes | no | unknown` +- Need production-readiness review: `yes | no | unknown` +- Need custom provider adapter/integration: `yes | no | unknown` +- Timeline: + +## Follow-Up Classification + +Classify each follow-up as one of: + +- docs gap +- bug +- evaluator friction +- missing provider compatibility +- paid custom integration +- general roadmap candidate +- out of scope + +## Decision + +- Owner: +- Next action: +- Due date: +- Accepted risk, if any: +- Link to sanitized issue or backlog item: diff --git a/docs/pilot-review-checklist.md b/docs/pilot-review-checklist.md new file mode 100644 index 0000000..7d22732 --- /dev/null +++ b/docs/pilot-review-checklist.md @@ -0,0 +1,71 @@ +# Pilot Review Checklist + +Use this checklist after the first evaluator or customer pilots to decide the +next Webhookery implementation slice. + +The goal is disciplined product learning. Do not broaden scope into marketplace +plugins, multi-region operation, hosted SaaS, SAML, HSM, or vendor-specific +apps unless pilot evidence justifies that phase. + +## Inputs + +- [ ] Completed `docs/pilot-feedback-template.md`. +- [ ] Completed `docs/pilot-evidence-template.md`. +- [ ] Accepted pilot topology from `docs/pilot-topology.md`. +- [ ] Sanitized quickstart/demo result. +- [ ] Relevant issue links. +- [ ] Commercial intent, if any. +- [ ] Support expectation. +- [ ] Provider mix. +- [ ] Deployment constraints. +- [ ] Security review requirements. + +## Review Questions + +- Did the evaluator complete the local quickstart? +- Did the evidence demo explain the product value? +- Which blocker repeated across more than one evaluator? +- Which blocker affected durable capture, replay, audit, retention, provider + conformance, or deployment safety? +- Which request is clearly paid custom work rather than general roadmap? +- Which request would weaken a security invariant or non-claim? +- Which doc would have prevented the issue? +- Which release evidence artifact was missing or unclear? + +## Classification + +For each finding, choose one: + +- fix immediately +- document immediately +- include in next product backlog +- handle as paid custom work +- track as accepted risk +- reject as out of scope +- defer as enterprise/future + +## Release Claim Review + +Before changing production language, confirm: + +- [ ] `make release-acceptance` passes. +- [ ] `make rc-check` passes. +- [ ] Provider conformance evidence is current. +- [ ] Performance and failure-drill evidence supports the claim. +- [ ] External review or accepted-risk status is reflected where relevant. +- [ ] The claim does not imply exactly-once delivery. +- [ ] The claim does not imply provider-side completeness. +- [ ] The claim does not imply compliance certification. + +## Output + +Produce one short decision note: + +- pilot summary +- top three repeated blockers +- accepted risks +- next docs fixes +- next bug fixes +- next product backlog candidate +- commercial follow-up +- explicit out-of-scope requests diff --git a/docs/pilot-topology.md b/docs/pilot-topology.md new file mode 100644 index 0000000..1824f41 --- /dev/null +++ b/docs/pilot-topology.md @@ -0,0 +1,80 @@ +# Pilot Topology + +This is the single supported pilot topology for Webhookery v0.2-style +evaluation. It is intentionally narrow so evaluator feedback is comparable and +the product does not drift into a generic webhook platform. + +Use this document to decide whether a pilot is in scope before changing +deployment, provider, storage, or support claims. + +## Supported Pilot Shape + +| Area | Pilot profile | +| --- | --- | +| Deployment | Single-region self-hosted Docker Compose or Helm. | +| Database | External PostgreSQL operated and backed up by the evaluator/operator. | +| Raw payload storage | PostgreSQL raw payload storage by default. | +| Optional object storage | S3-compatible storage or MinIO only after an explicit storage drill. | +| Providers | One to three providers, with Stripe and GitHub first and Shopify optional. | +| Receiver type | HTTP downstream receiver. | +| Traffic | Bounded low-to-moderate evaluation volume agreed before the pilot. | +| Tenancy | Single organization or a controlled tenant set. | +| Evidence | Audit chain, incident packet, and evidence bundle required. | +| Support | Commercial evaluation or production-readiness review with written scope. | + +## Required Pilot Drills + +- Run `docs/evaluator-quickstart.md`. +- Generate an incident packet from `examples/webhook-evidence-demo/`. +- Verify the evidence bundle with `whcp audit verify-bundle`. +- Run `make rc-check` against a disposable PostgreSQL database. +- Run the production doctor for the intended environment. +- Complete a backup/restore drill before production-like traffic. +- Record results in `docs/pilot-evidence-template.md`. + +## Operator-Owned Responsibilities + +The operator owns: + +- PostgreSQL availability, backups, restore drills, credentials, and upgrades; +- TLS/ingress, DNS, network policy, egress controls, and firewalling; +- alert routing, incident response, on-call, and operational escalation; +- secret custody for API keys, webhook secrets, encryption keys, and provider + credentials; +- retention configuration and review of evidence before sharing; and +- receiver behavior, idempotency, and downstream business processing. + +## Optional S3/MinIO Storage + +PostgreSQL raw payload storage is the default pilot path. S3-compatible object +storage is in scope only when the pilot explicitly runs a storage drill that +proves: + +- object writes happen before accepted inbound success for object-backed raw + payloads; +- object read failures are redacted in errors and support output; +- evidence exports are readable after backup/restore procedures; and +- missing object bodies are treated as an explicit evidence gap, not silently + recovered. + +## Out Of Scope + +These requests are out of scope for the initial pilot unless repeated paid +pilot evidence justifies a new phase: + +- multi-region active-active operation; +- Kafka, NATS, SQS, Pub/Sub, or another queue as the evidence authority; +- hosted SaaS operation by this repository; +- arbitrary code plugins or marketplace integrations; +- broad outbound-webhook platform positioning; +- SAML/HSM/enterprise identity expansion beyond the implemented surface; +- provider certification claims; and +- compliance, legal evidentiary, managed-service availability, exactly-once, + or provider-side completeness claims. + +## Go/No-Go Rule + +A pilot is in scope only if the evaluator accepts the topology above and the +non-claims in `docs/security-promise.md`. If the requested topology needs a +broader platform, record it in `docs/pilot-feedback-template.md` and classify +it through `docs/roadmap-intake-policy.md` before implementation. diff --git a/docs/production-rc-checklist.md b/docs/production-rc-checklist.md new file mode 100644 index 0000000..4bd6f16 --- /dev/null +++ b/docs/production-rc-checklist.md @@ -0,0 +1,100 @@ +# Production RC Checklist + +Use this checklist before treating a Webhookery tag as a controlled +self-hosted release candidate. This checklist routes to canonical docs instead +of duplicating every command. + +Webhookery release-candidate readiness does not mean exactly-once delivery, +provider-side completeness, compliance certification, or managed-service +availability. + +## 1. Release Identity + +- Tag is created and points to the intended commit. +- Release notes exist under `docs/releases/`. +- Changelog includes the release. +- Image digest is recorded. +- Source and image SBOMs are attached or linked. +- Release evidence artifact is attached or linked. + +## 2. Local Gates + +Run: + +```bash +make docs-check +make release-acceptance +make rc-check +make finalize +``` + +Expected result: all commands exit zero. If a command is skipped or blocked, +record it in the release evidence packet with owner, expiry, and mitigation. + +## 3. Disposable Database Gates + +Run against a disposable database, not production: + +```bash +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +``` + +Expected result: migrations, DB-backed evidence checks, and RC drills pass. + +## 4. Release Workflow + +Confirm the release workflow passed: + +- release acceptance +- provider conformance +- performance smoke +- `rc-check` with the local Postgres service +- Docker build and push +- cosign keyless signing +- source and image SBOM generation +- Trivy HIGH/CRITICAL image scan +- evidence artifact upload + +## 5. Security And Evidence Review + +Review: + +- `docs/security-promise.md` +- `docs/security-review-package.md` +- `docs/articles/webhook-security-review-checklist.md` +- `docs/provider-conformance.md` +- `docs/release-evidence-template.md` +- `docs/release-evidence-sample.md` + +Expected result: non-claims are preserved and sensitive data is absent from +docs, logs, release notes, and artifacts. + +## 6. Operations Readiness + +Review: + +- `docs/configuration.md` +- `docs/operations.md` +- `docs/day-2-operations.md` +- `docs/deployment.md` +- `docs/pilot-topology.md` +- `docs/schema-migrations.md` +- `docs/observability.md` + +Expected result: production doctor, backup/restore, retention, audit-chain +verification, alert handling, restore drills, and pilot topology boundaries +have an owner. + +## 7. Accepted Risks + +Record unresolved items in `docs/external-review-accepted-risks.md` or the +release evidence packet. Each accepted risk needs: + +- owner +- expiry date +- mitigation +- release decision + +Do not call a release broadly production-ready when release-blocking risks are +open without accepted-risk records. diff --git a/docs/production-readiness-review.md b/docs/production-readiness-review.md new file mode 100644 index 0000000..2314244 --- /dev/null +++ b/docs/production-readiness-review.md @@ -0,0 +1,58 @@ +# Production Readiness Review + +The production readiness review is a paid engagement for organizations +evaluating Webhookery in controlled self-hosted environments. + +It is not a compliance certification, legal evidence certification, penetration +test, hosted-service SLA, or guarantee of provider-side event completeness. + +## Review Scope + +Typical review areas: + +- deployment topology and network boundaries +- PostgreSQL backup, restore, migration, and retention posture +- object-storage configuration for raw payload evidence +- secret-box mode and key-custody responsibilities +- provider conformance and provider-specific limitations +- raw payload and PII exposure controls +- replay, DLQ, retention, evidence export, and audit-chain operations +- production doctor output +- performance smoke output and sizing assumptions +- observability, alerts, notification, and SIEM signal paths +- incident triage and restore drills + +## Inputs + +The customer provides sanitized evidence: + +- completed evaluator quickstart result +- `make rc-check` output against a disposable environment +- production doctor output with secrets redacted +- deployment diagram without private credentials +- backup/restore procedure summary +- provider list and expected event volume range +- support and incident expectations + +Do not provide secrets, customer payloads, private keys, raw signatures, bearer +tokens, session cookies, provider credentials, or database URLs with passwords. + +## Outputs + +Expected outputs: + +- readiness summary +- release/evidence gap list +- blocker/warning classification +- accepted-risk recommendations +- operational runbook improvements +- support or custom-work recommendation + +Findings are scoped to the reviewed deployment and date. They do not certify +all future releases, all provider behavior, all cloud environments, or all +customer integrations. + +## Starting Range + +Production Readiness Review starts at EUR 7,500-12,500, depending on scope, +deployment complexity, provider mix, and expected support follow-up. diff --git a/docs/provider-conformance.manifest.json b/docs/provider-conformance.manifest.json new file mode 100644 index 0000000..208c922 --- /dev/null +++ b/docs/provider-conformance.manifest.json @@ -0,0 +1,142 @@ +{ + "schema_version": "provider-conformance-v1", + "project": "webhookery", + "last_official_doc_verification": "2026-05-27", + "no_live_provider_calls": true, + "local_checks": [ + "go test ./internal/provider -run TestProviderSignatureVectors", + "go test ./internal/provider -run TestNormalizeBuiltInProviderMetadata", + "go test ./internal/provider -run TestGenericJWTAdapter", + "go test ./pkg/verifier -run TestTimestampedSignatureWindow" + ], + "signature_vector_registry": "internal/provider/testdata/signature_vectors.json", + "providers": [ + { + "name": "stripe", + "official_docs": [ + "https://docs.stripe.com/webhooks" + ], + "signature": "Stripe-Signature v1 HMAC-SHA256 over timestamp.raw_body", + "timestamp_window_seconds": 300, + "event_id": "json.id", + "event_type": "json.type", + "vector_tests": [ + "TestProviderSignatureVectors/stripe", + "TestNormalizeBuiltInProviderMetadata/stripe" + ], + "limitations": [ + "no provider-side completeness guarantee", + "recovered provider API events are not signed webhook evidence" + ] + }, + { + "name": "github", + "official_docs": [ + "https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries", + "https://docs.github.com/en/webhooks/webhook-events-and-payloads" + ], + "signature": "X-Hub-Signature-256 sha256= HMAC-SHA256 over raw_body", + "timestamp_window_seconds": null, + "event_id": "X-GitHub-Delivery", + "event_type": "X-GitHub-Event", + "vector_tests": [ + "TestProviderSignatureVectors/github", + "TestNormalizeBuiltInProviderMetadata/github" + ], + "limitations": [ + "does not invent raw payloads when provider delivery detail lacks them", + "redelivery is explicit audited recovery work" + ] + }, + { + "name": "shopify", + "official_docs": [ + "https://shopify.dev/docs/apps/build/webhooks/verify-deliveries" + ], + "signature": "X-Shopify-Hmac-SHA256 base64 HMAC-SHA256 over raw_body", + "timestamp_window_seconds": null, + "event_id": "X-Shopify-Webhook-Id", + "event_type": "X-Shopify-Topic", + "vector_tests": [ + "TestProviderSignatureVectors/shopify", + "TestNormalizeBuiltInProviderMetadata/shopify" + ], + "limitations": [ + "no generic missed-event recovery claim", + "resource polling is topic-specific" + ] + }, + { + "name": "slack", + "official_docs": [ + "https://api.slack.com/docs/verifying-requests-from-slack" + ], + "signature": "X-Slack-Signature v0 HMAC-SHA256 over v0:timestamp:raw_body", + "timestamp_window_seconds": 300, + "event_id": "json.event_id", + "event_type": "json.event.type", + "vector_tests": [ + "TestProviderSignatureVectors/slack", + "TestNormalizeBuiltInProviderMetadata/slack" + ], + "limitations": [ + "limited retry-window evidence", + "no generic missed-event recovery claim" + ] + }, + { + "name": "generic-hmac", + "official_docs": [ + "local declarative adapter contract" + ], + "signature": "configured HMAC-SHA256 signature header and signed payload template", + "timestamp_window_seconds": "configured", + "event_id": "configured JSON extractor", + "event_type": "configured JSON extractor", + "vector_tests": [ + "TestDeclarativeAdapterVerifiesTimestampedHMAC", + "TestDeclarativeAdapterRejectsExpiredAndMutatedPayloads" + ], + "limitations": [ + "no arbitrary code plugins", + "no provider recovery without a concrete adapter" + ] + }, + { + "name": "generic-jwt", + "official_docs": [ + "https://www.rfc-editor.org/info/rfc7519/" + ], + "signature": "HS256 JWT bearer token with body_sha256 claim", + "timestamp_window_seconds": "iat/exp claims", + "event_id": "jti", + "event_type": "body or configured metadata", + "vector_tests": [ + "TestGenericJWTAdapterVerifiesHS256AndBodyHash", + "TestGenericJWTAdapterRejectsAlgNone" + ], + "limitations": [ + "HS256 only in the generic JWT adapter" + ] + }, + { + "name": "cloudevents", + "official_docs": [ + "https://github.com/cloudevents/spec" + ], + "signature": "none by default", + "timestamp_window_seconds": null, + "event_id": "ce-id or structured id", + "event_type": "ce-type or structured type", + "vector_tests": [ + "TestCloudEventsAdapterDoesNotVerifyUnsignedStructuredMode", + "TestCloudEventsAdapterDoesNotVerifyUnsignedBinaryMode", + "TestNormalizeBuiltInProviderMetadata/cloudevents" + ], + "limitations": [ + "unsigned CloudEvents validity does not imply trust", + "does not route by default without explicit trusted policy" + ] + } + ] +} diff --git a/docs/provider-conformance.md b/docs/provider-conformance.md new file mode 100644 index 0000000..7a3ee73 --- /dev/null +++ b/docs/provider-conformance.md @@ -0,0 +1,92 @@ +# Provider Conformance Matrix + +This matrix is release evidence for the provider behavior Webhookery currently +claims. It makes no provider-side completeness guarantee: providers can retry, +drop, expire, redact, or make old events unrecoverable according to their own +systems. Webhookery's promise remains durable capture before success, exact raw +byte verification, visible gaps, and explicit recovery evidence where provider +APIs permit it. + +Last official-doc verification: 2026-05-27. + +Machine-readable evidence lives in +`docs/provider-conformance.manifest.json`. Run: + +```sh +make provider-conformance-check +``` + +The check uses only local deterministic vectors and documentation metadata. The +committed provider signature vector registry lives at +`internal/provider/testdata/signature_vectors.json`; each vector records its +source, checked date, headers, raw body fixture, mutated-body negative check, +and expected result. The check does not call Stripe, GitHub, Shopify, Slack, +AWS, Vault, or customer receivers. + +Manual sanitized live-provider proof guides are tracked separately: + +- Stripe operator guide: `docs/providers/stripe.md` +- Stripe proof guide: `docs/live-provider-proof/stripe.md` +- GitHub operator guide: `docs/providers/github.md` +- GitHub proof guide: `docs/live-provider-proof/github.md` +- Shopify operator guide: `docs/providers/shopify.md` +- Shopify proof guide: `docs/live-provider-proof/shopify.md` +- Private proof run record template: + `docs/live-provider-proof/run-record-template.md` +- Proof freshness metadata: `docs/provider-proof-manifest.json` + +Run: + +```sh +make provider-proof-check +``` + +Those guides are external/manual evidence procedures. They are not provider +certification, do not call live providers in repository checks, and do not +store completed private proof artifacts in public source. + +## Matrix + +| Provider or format | Verification evidence | Timestamp or replay window | Event ID and type extraction | Replay or recovery behavior | Current limitations | +|--------------------|-----------------------|----------------------------|------------------------------|-----------------------------|---------------------| +| Stripe | `Stripe-Signature` `v1` HMAC-SHA256 over `timestamp.raw_body`; local vector in `internal/provider/testdata/signature_vectors.json`. | Five-minute tolerance aligned with Stripe's documented library default. | JSON `id` and `type`; account/API version metadata where present. | Reconciliation can compare Stripe Events API IDs and capture recovered provider-API evidence when enabled. | No provider-side completeness guarantee; recovered evidence is provider API evidence, not signed webhook evidence. | +| GitHub | `X-Hub-Signature-256` `sha256=` HMAC-SHA256 over exact raw body; local vector in `internal/provider/testdata/signature_vectors.json`. | GitHub signature validation does not define a timestamp window in the signature header. | `X-GitHub-Delivery` as delivery/event ID and `X-GitHub-Event` as type. | Reconciliation can scan repository webhook delivery APIs and request redelivery where GitHub supports it. | No raw payload is invented if GitHub does not return it; redelivery is explicit audited recovery work. | +| Shopify | `X-Shopify-Hmac-SHA256` base64 HMAC-SHA256 over the raw request body; local vector in `internal/provider/testdata/signature_vectors.json`. | Shopify verification is raw-body HMAC based; this slice does not claim a generic timestamp replay window. | `X-Shopify-Webhook-Id` as delivery ID, `X-Shopify-Topic` as type, and shop domain metadata. | Capability evidence is recorded; generic missed-event recovery is not claimed. | Resource polling is topic-specific and is not represented as universal webhook recovery. | +| Slack | `X-Slack-Signature` `v0=` HMAC-SHA256 over `v0:timestamp:raw_body`; local vector in `internal/provider/testdata/signature_vectors.json`. | Five-minute timestamp skew window. | JSON `event_id` and nested event `type`; team and app metadata where present. | Capability evidence is recorded; generic missed-event recovery is not claimed. | Slack retry-window evidence is limited; unsupported gaps are evidence, not Webhookery capture failures. | +| Generic HMAC adapter | Declarative local adapters support HMAC-SHA256 with explicit signature/timestamp headers, signed payload template, encoding, and replay window. | Configured per adapter definition. | Configured JSON extractors. | No provider recovery unless a concrete reconciliation adapter is implemented. | Declarative adapters are deterministic verification/normalization helpers, not arbitrary code plugins. | +| Generic JWT adapter | HS256 JWT bearer token with allowlisted algorithm and raw body hash claim; alg `none` is rejected. | `iat`/`exp` claims are validated by local tests. | JWT `jti` plus configured/body metadata. | No provider recovery unless a concrete reconciliation adapter is implemented. | HS256 only in the current generic JWT adapter. | +| CloudEvents | Structured and binary CloudEvents metadata can be parsed and normalized. | None by default. | CloudEvents `id`, `type`, `source`, and optional `subject`. | No provider recovery. | Unsigned CloudEvents validity does not imply trust and does not route by default. | + +## Official Sources Checked + +- Stripe webhooks and signature behavior: + +- GitHub webhook signature validation: + +- GitHub webhook events and delivery headers: + +- GitHub webhook redelivery: + +- GitHub webhook best practices: + +- Shopify webhook HMAC verification: + +- Slack request signing: + +- CloudEvents core and JSON event format: + +- JWT standard: + +- SSRF prevention guidance for webhook-style URLs: + + +## Evidence Boundaries + +- These checks prove local parser, verifier, and normalization behavior against + committed vectors; they do not certify live provider behavior. +- Provider APIs used for reconciliation are tested with fake local servers in + release gates; live-provider acceptance calls are intentionally excluded. +- Unsupported provider recovery is recorded as explicit gap evidence rather + than hidden as success. +- The conformance matrix must be rechecked before release evidence if the last + official-doc verification date is older than 90 days. diff --git a/docs/provider-proof-manifest.json b/docs/provider-proof-manifest.json new file mode 100644 index 0000000..220a3b8 --- /dev/null +++ b/docs/provider-proof-manifest.json @@ -0,0 +1,90 @@ +{ + "schema_version": "provider-proof-v1", + "project": "webhookery", + "no_live_provider_calls": true, + "max_age_days": 90, + "generated_private_evidence_policy": "completed live-provider proof artifacts stay outside public source control; committed samples must be redacted", + "proofs": [ + { + "provider": "stripe", + "status": "manual_external", + "checked_date": "2026-06-04", + "expires_after": "2026-09-02", + "operator_guide": "docs/providers/stripe.md", + "proof_guide": "docs/live-provider-proof/stripe.md", + "redaction_policy": "docs/live-provider-proof/stripe-redaction-policy.md", + "sample_report": "docs/live-provider-proof/samples/stripe-incident-report.redacted.md", + "official_sources": [ + "https://docs.stripe.com/webhooks", + "https://docs.stripe.com/webhooks/signature" + ], + "scope_checked": [ + "Stripe-Signature raw-body verification", + "timestamp replay window context", + "test-mode CLI forwarding", + "provider retry and manual resend boundaries" + ], + "non_claims": [ + "not provider certification", + "no provider-side completeness guarantee", + "no exactly-once delivery claim" + ] + }, + { + "provider": "github", + "status": "manual_external", + "checked_date": "2026-06-04", + "expires_after": "2026-09-02", + "operator_guide": "docs/providers/github.md", + "proof_guide": "docs/live-provider-proof/github.md", + "redaction_policy": "docs/live-provider-proof/stripe-redaction-policy.md", + "sample_report": "docs/live-provider-proof/samples/github-incident-report.redacted.md", + "official_sources": [ + "https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries", + "https://docs.github.com/en/webhooks/webhook-events-and-payloads", + "https://docs.github.com/en/webhooks/testing-and-troubleshooting-webhooks/redelivering-webhooks", + "https://docs.github.com/en/webhooks/using-webhooks/best-practices-for-using-webhooks" + ], + "scope_checked": [ + "X-Hub-Signature-256 raw-body verification", + "X-GitHub-Delivery identity", + "repository ping and push proof flow", + "manual redelivery boundaries" + ], + "non_claims": [ + "not provider certification", + "no provider-side completeness guarantee", + "no exactly-once delivery claim" + ] + }, + { + "provider": "shopify", + "status": "manual_external", + "checked_date": "2026-06-04", + "expires_after": "2026-09-02", + "operator_guide": "docs/providers/shopify.md", + "proof_guide": "docs/live-provider-proof/shopify.md", + "redaction_policy": "docs/live-provider-proof/stripe-redaction-policy.md", + "sample_report": "docs/live-provider-proof/samples/shopify-incident-report.redacted.md", + "official_sources": [ + "https://shopify.dev/docs/apps/build/webhooks", + "https://shopify.dev/docs/apps/build/webhooks/delivery-structure", + "https://shopify.dev/docs/apps/build/webhooks/verify-deliveries", + "https://shopify.dev/docs/apps/build/webhooks/troubleshoot" + ], + "scope_checked": [ + "X-Shopify-Hmac-SHA256 raw-body verification", + "X-Shopify-Webhook-Id delivery identity", + "X-Shopify-Topic event type metadata", + "development-store proof flow", + "retry and topic-specific recovery boundaries" + ], + "non_claims": [ + "not provider certification", + "no provider-side completeness guarantee", + "no exactly-once delivery claim", + "no universal Shopify recovery claim" + ] + } + ] +} diff --git a/docs/providers/github.md b/docs/providers/github.md new file mode 100644 index 0000000..82f13c6 --- /dev/null +++ b/docs/providers/github.md @@ -0,0 +1,230 @@ +# GitHub Operator Guide + +This guide is for operators configuring GitHub as a Webhookery source. It +covers the implemented Webhookery behavior and the GitHub behavior that was +checked against official docs on 2026-06-04. + +## Scope + +Implemented Webhookery behavior: + +- `X-Hub-Signature-256` verification using HMAC-SHA256 over the exact raw body. +- Event identity from `X-GitHub-Delivery`. +- Event type from `X-GitHub-Event`. +- Duplicate delivery IDs remain visible as evidence; redelivery can be + separated from Webhookery replay. +- Replay creates new Webhookery delivery work linked to the original evidence. + +This guide does not claim GitHub provider certification, provider-side event +completeness, ordering, exactly-once delivery, or downstream business success. +In short: this is not provider certification. + +Official sources checked: + +- +- +- +- + +## Setup + +Use a disposable repository first. Do not send organization production events +or private customer payloads to proof environments. + +1. Start Webhookery and set a control-plane API key: + + ```bash + cp .env.example .env + docker compose up --build + export WEBHOOKERY_API_KEY=dev-bootstrap-key + ``` + +2. Generate a high-entropy webhook secret and keep it in a local shell or + secret manager. Do not put secrets in payload URLs. + + ```bash + export WEBHOOKERY_GITHUB_WEBHOOK_SECRET='replace-with-random-local-secret' + ``` + +3. Create a GitHub source: + + ```bash + go run ./cmd/whcp sources create \ + --name github-test-repo \ + --provider github \ + --secret "$WEBHOOKERY_GITHUB_WEBHOOK_SECRET" \ + --api-key "$WEBHOOKERY_API_KEY" + ``` + +4. Save the returned source ID outside commits: + + ```bash + export WEBHOOKERY_GITHUB_SOURCE_ID=src_replace_me + ``` + +5. Configure the repository webhook in GitHub: + + - Payload URL: + `https://webhookery.example.test/v1/ingest/github/${WEBHOOKERY_GITHUB_SOURCE_ID}` + - Content type: `application/json` + - Secret: the value of `WEBHOOKERY_GITHUB_WEBHOOK_SECRET` + - Events: start with `ping` and `push` + - SSL verification: enabled for public HTTPS endpoints + +For local-only proof where GitHub cannot reach `localhost`, use a temporary +webhook proxy that forwards to +`http://localhost:8080/v1/ingest/github/${WEBHOOKERY_GITHUB_SOURCE_ID}`. +Treat proxy URLs as temporary test infrastructure and remove the webhook after +the proof. + +## Secret Handling + +- Use a unique secret per GitHub webhook. +- Store it in Webhookery through the source secret APIs, not in docs or issue + comments. +- Rotate the source secret if it appears in a terminal recording, screenshot, + CI log, support artifact, or shell history you cannot control. +- Do not put API keys or tokens in the webhook payload URL. + +## Signature Verification + +GitHub sends `X-Hub-Signature-256` when a webhook secret is configured. The +signature starts with `sha256=` and is computed with the webhook secret and +payload body. Webhookery verifies the exact raw body and uses constant-time +comparison through the provider verification path. + +The older `X-Hub-Signature` SHA-1 header is compatibility evidence only. Use +`X-Hub-Signature-256` for GitHub sources. + +## Delivery Identity And Dedupe + +GitHub sends `X-GitHub-Delivery` as the delivery GUID and `X-GitHub-Event` as +the event name. Webhookery uses those values for normalized provider metadata. + +GitHub's redelivery flow reuses the original `X-GitHub-Delivery` value. In +Webhookery, that means a manual GitHub redelivery should be visible as a +duplicate receipt for the same provider delivery identity. Do not treat the +duplicate as erased evidence. + +## Redelivery Behavior + +GitHub documentation says failed deliveries can be manually redelivered from +the past three days and that GitHub does not automatically redeliver failed +deliveries. Repository redelivery through the web UI requires repository admin +access. REST API redelivery or reconciliation requires a GitHub token with the +permissions required by GitHub for that endpoint. + +No GitHub token is required for the basic repository webhook ping or push proof +unless you explicitly test REST API redelivery or Webhookery reconciliation. + +## Replay And Evidence Workflow + +Create a disposable receiver and route before sending the test event: + +```bash +export WEBHOOKERY_RECEIVER_URL='https://receiver.example.test/fail-first' + +go run ./cmd/whcp endpoints validate-url \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp endpoints create \ + --name github-proof-receiver \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ENDPOINT_ID=end_replace_me + +go run ./cmd/whcp routes create \ + --name github-push-proof \ + --source-id "$WEBHOOKERY_GITHUB_SOURCE_ID" \ + --endpoint-id "$WEBHOOKERY_ENDPOINT_ID" \ + --event-types ping,push \ + --state draft \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ROUTE_ID=rte_replace_me + +go run ./cmd/whcp routes activate \ + --route-id "$WEBHOOKERY_ROUTE_ID" \ + --reason "github proof route activation" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +After GitHub sends `ping` or `push`, inspect and replay: + +```bash +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_EVENT_ID=evt_replace_me + +go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs dry-run \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "github proof receiver fixed before replay" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs create \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "github proof receiver fixed before replay" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## Incident Packet Example + +Generate a report and evidence export after replay: + +```bash +go run ./cmd/whcp incidents create \ + --title "GitHub test repository webhook failed then replayed" \ + --reason "github proof investigation" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_INCIDENT_ID=inc_replace_me + +go run ./cmd/whcp incidents add-event \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --reason "attach GitHub test delivery" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents generate-report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "github proof report" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --format markdown \ + --output github-incident-report.private.md \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Keep completed live reports and bundles private. Commit only sanitized samples +such as `docs/live-provider-proof/samples/github-incident-report.redacted.md`. + +## Cleanup + +1. Delete the test repository webhook in GitHub. +2. Rotate or delete the Webhookery source secret. +3. Disable or delete disposable routes and endpoints with operator reasons. +4. Delete temporary webhook proxy channels. +5. Remove private reports and bundles from shared machines. + +## Non-Claims + +This guide does not prove: + +- downstream business processing succeeded; +- GitHub will redeliver every provider-side event forever; +- global event ordering; +- exactly-once delivery; +- compliance or legal evidentiary certification; or +- safe use of production customer data in public evidence. diff --git a/docs/providers/shopify.md b/docs/providers/shopify.md new file mode 100644 index 0000000..f8a2c2d --- /dev/null +++ b/docs/providers/shopify.md @@ -0,0 +1,262 @@ +# Shopify Operator Guide + +This guide is for operators configuring Shopify as a Webhookery source. It +covers the implemented Webhookery behavior and the Shopify behavior that was +checked against official docs on 2026-06-04. + +## Scope + +Implemented Webhookery behavior: + +- `X-Shopify-Hmac-SHA256` base64 HMAC-SHA256 verification over the exact raw + body. +- Delivery identity from `X-Shopify-Webhook-Id`. +- Event type from `X-Shopify-Topic`. +- Shop domain metadata from Shopify delivery headers when present. +- Duplicate deliveries remain visible as evidence; dedupe can suppress + processing but must not erase receipts or raw payload metadata. +- Replay creates new Webhookery delivery work linked to the original evidence. + +This guide does not claim Shopify provider certification, provider-side event +completeness, ordering, exactly-once delivery, or downstream business success. +In short: this is not provider certification. + +Official sources checked: + +- +- +- +- + +## Setup + +Use a development store or test app first. Do not use production shop data for +proof runs. + +1. Start Webhookery and set a control-plane API key: + + ```bash + cp .env.example .env + docker compose up --build + export WEBHOOKERY_API_KEY=dev-bootstrap-key + ``` + +2. Store the Shopify app client secret in a local shell or secret manager. Do + not commit it or paste it into issues. + + ```bash + export WEBHOOKERY_SHOPIFY_CLIENT_SECRET='replace-with-app-client-secret' + ``` + +3. Create a Shopify source: + + ```bash + go run ./cmd/whcp sources create \ + --name shopify-development-store \ + --provider shopify \ + --secret "$WEBHOOKERY_SHOPIFY_CLIENT_SECRET" \ + --api-key "$WEBHOOKERY_API_KEY" + ``` + +4. Save the returned source ID outside commits: + + ```bash + export WEBHOOKERY_SHOPIFY_SOURCE_ID=src_replace_me + ``` + +5. Configure a Shopify webhook subscription for a development-store topic such + as `products/create`: + + ```text + https://webhookery.example.test/v1/ingest/shopify/${WEBHOOKERY_SHOPIFY_SOURCE_ID} + ``` + +Use HTTPS for public Shopify deliveries. During local development, use your +normal Shopify development tunnel or another temporary webhook proxy, and +remove it after the proof. + +## Signature Verification + +Shopify HTTPS deliveries include a base64 HMAC signature in +`X-Shopify-Hmac-SHA256`, generated with the app client secret and raw request +body. Webhookery verifies the raw body before treating the event as trusted. + +Keep these operating rules: + +- Preserve exact raw request bytes until verification completes. +- Use the app client secret that Shopify uses for the app sending the webhook. +- Reject or quarantine missing, malformed, or wrong-secret signatures. +- Verify before trusting topic, shop domain, or payload content. + +## Event ID And Type Extraction + +Webhookery normalizes: + +| Shopify header | Webhookery use | +|----------------|----------------| +| `X-Shopify-Webhook-Id` | provider delivery identity and dedupe key | +| `X-Shopify-Topic` | event type, for example `products/create` | +| `X-Shopify-Shop-Domain` | shop metadata | +| `X-Shopify-Event-Id` | correlation metadata when present | + +If the same merchant action produces multiple deliveries, Shopify can provide +separate webhook IDs with a shared event ID. Keep both values visible in +evidence where present. + +## Duplicate Handling + +Shopify says apps can receive the same webhook more than once, for example +after a timeout or retry. Webhookery records duplicate receipts instead of +overwriting evidence. Downstream handlers should still be idempotent. + +## Retry Expectations + +Shopify expects a `200` series response for successful HTTPS delivery. Current +official docs say failed deliveries are retried up to 8 times; troubleshooting +docs describe this as an 8-attempt retry pattern over a four-hour period. + +Webhookery's downstream delivery retries are separate from Shopify's provider +retries. Webhookery may have durably captured the Shopify delivery even when a +customer receiver later fails. + +## Manual Recovery Limitations + +Do not claim universal Shopify recovery. Some gaps can be investigated by +checking Shopify delivery logs or by querying Shopify resource APIs for a +specific topic, but that is topic-specific reconciliation evidence. It is not +signed webhook evidence and it is not proof that every provider-side event can +be recovered. + +## Route By Topic + +Create a route for the Shopify topic you subscribed to: + +```bash +export WEBHOOKERY_RECEIVER_URL='https://receiver.example.test/fail-first' + +go run ./cmd/whcp endpoints validate-url \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp endpoints create \ + --name shopify-proof-receiver \ + --url "$WEBHOOKERY_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ENDPOINT_ID=end_replace_me + +go run ./cmd/whcp routes create \ + --name shopify-products-proof \ + --source-id "$WEBHOOKERY_SHOPIFY_SOURCE_ID" \ + --endpoint-id "$WEBHOOKERY_ENDPOINT_ID" \ + --event-types products/create \ + --state draft \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ROUTE_ID=rte_replace_me + +go run ./cmd/whcp routes activate \ + --route-id "$WEBHOOKERY_ROUTE_ID" \ + --reason "shopify proof route activation" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## Development-Store Proof + +Trigger the subscribed topic in a development store. For example, create a +product to trigger `products/create`. + +```bash +go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_EVENT_ID=evt_replace_me +go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Expected evidence: + +- provider verification is valid; +- topic and webhook ID metadata are present; +- route matching uses the configured topic; +- downstream failure and replay evidence remain separate from Shopify retry + evidence. + +## Replay Workflow + +After the receiver is fixed, replay with an explicit reason: + +```bash +go run ./cmd/whcp replay-jobs dry-run \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "shopify proof receiver fixed before replay" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs create \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "shopify proof receiver fixed before replay" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +If the event reached DLQ, release the DLQ entry instead: + +```bash +go run ./cmd/whcp dead-letter list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_DLQ_ID=dlq_replace_me +go run ./cmd/whcp dead-letter release \ + --entry-id "$WEBHOOKERY_DLQ_ID" \ + --reason-code receiver_fixed \ + --reason "shopify proof receiver recovered" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## Incident Packet Example + +Generate a report and evidence export after replay: + +```bash +go run ./cmd/whcp incidents create \ + --title "Shopify development-store webhook failed then replayed" \ + --reason "shopify proof investigation" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_INCIDENT_ID=inc_replace_me + +go run ./cmd/whcp incidents add-event \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --reason "attach Shopify development-store delivery" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents generate-report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "shopify proof report" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --format markdown \ + --output shopify-incident-report.private.md \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Keep completed live reports and bundles private. Commit only sanitized samples +such as `docs/live-provider-proof/samples/shopify-incident-report.redacted.md`. + +## Non-Claims + +This guide does not prove: + +- downstream business processing succeeded; +- Shopify will redeliver every provider-side event forever; +- universal recovery for every topic; +- global event ordering; +- exactly-once delivery; +- compliance or legal evidentiary certification; or +- safe use of production shop data in public evidence. + diff --git a/docs/providers/stripe.md b/docs/providers/stripe.md new file mode 100644 index 0000000..cae1c12 --- /dev/null +++ b/docs/providers/stripe.md @@ -0,0 +1,276 @@ +# Stripe Operator Guide + +This guide is for operators configuring Stripe as a Webhookery source. It +covers the implemented Webhookery behavior and the Stripe behavior that was +checked against official docs on 2026-06-04. + +## Scope + +Implemented Webhookery behavior: + +- `Stripe-Signature` `v1` HMAC-SHA256 verification over + `timestamp.raw_body`. +- Five-minute timestamp tolerance for replay protection. +- Event identity from JSON `id` and event type from JSON `type`. +- Duplicate events remain visible as evidence; dedupe can suppress processing + but must not erase receipts or raw payload metadata. +- Replay creates new delivery work linked to the original event or delivery. + +This guide does not claim Stripe provider certification, provider-side event +completeness, ordering, exactly-once delivery, or downstream business success. +In short: this is not provider certification. + +Official sources checked: + +- +- + +## Setup + +Use a disposable Stripe sandbox or test-mode account first. Do not use live +customer data for proof runs. + +1. Start Webhookery and set a control-plane API key: + + ```bash + cp .env.example .env + docker compose up --build + export WEBHOOKERY_API_KEY=dev-bootstrap-key + ``` + +2. Create a Stripe source with a temporary placeholder. Stripe CLI signing + secrets are printed after `stripe listen` starts, so rotate the source + secret after the listener prints the real test secret. + + ```bash + go run ./cmd/whcp sources create \ + --name stripe-test-mode \ + --provider stripe \ + --secret temporary-local-placeholder \ + --api-key "$WEBHOOKERY_API_KEY" + ``` + +3. Save the returned source ID outside commits: + + ```bash + export WEBHOOKERY_STRIPE_SOURCE_ID=src_replace_me + ``` + +4. Start a Stripe CLI listener in another shell and forward only the event + types needed for the proof: + + ```bash + stripe listen \ + --events payment_intent.succeeded,payment_intent.payment_failed \ + --forward-to "http://localhost:8080/v1/ingest/stripe/${WEBHOOKERY_STRIPE_SOURCE_ID}" + ``` + +5. Copy the webhook signing secret from the `stripe listen` output into a local + shell variable. Do not paste it into docs, issues, screenshots, or commits. + + ```bash + export WEBHOOKERY_STRIPE_WEBHOOK_SECRET='replace-with-local-listener-secret' + go run ./cmd/whcp sources rotate-secret \ + --source-id "$WEBHOOKERY_STRIPE_SOURCE_ID" \ + --secret "$WEBHOOKERY_STRIPE_WEBHOOK_SECRET" \ + --reason "stripe test-mode listener secret" \ + --api-key "$WEBHOOKERY_API_KEY" + ``` + +For a public HTTPS endpoint created in Stripe Workbench, use the same source +ID in the endpoint URL, then rotate the source to the endpoint-specific signing +secret shown by Stripe. + +## Routing And Downstream Failure + +Create a disposable receiver that can first return `500` and later return +`204`. Use an HTTPS endpoint for public Stripe deliveries; local CLI forwarding +can use `http://localhost`. + +```bash +export WEBHOOKERY_FAILING_RECEIVER_URL='https://receiver.example.test/fail-first' + +go run ./cmd/whcp endpoints validate-url \ + --url "$WEBHOOKERY_FAILING_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp endpoints create \ + --name stripe-proof-receiver \ + --url "$WEBHOOKERY_FAILING_RECEIVER_URL" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ENDPOINT_ID=end_replace_me + +go run ./cmd/whcp routes create \ + --name stripe-payment-proof \ + --source-id "$WEBHOOKERY_STRIPE_SOURCE_ID" \ + --endpoint-id "$WEBHOOKERY_ENDPOINT_ID" \ + --event-types payment_intent.succeeded,payment_intent.payment_failed \ + --state draft \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_ROUTE_ID=rte_replace_me + +go run ./cmd/whcp routes activate \ + --route-id "$WEBHOOKERY_ROUTE_ID" \ + --reason "stripe proof route activation" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## Local And Live Test Modes + +Local deterministic test: + +- Run `examples/webhook-evidence-demo/run.sh`. +- Inspect `examples/webhook-evidence-demo/output/incident-report.md`. +- This uses synthetic Stripe-style vectors and does not contact Stripe. + +Stripe test-mode proof: + +1. Keep the Stripe CLI listener running. +2. Trigger a test event: + + ```bash + stripe trigger payment_intent.succeeded + ``` + +3. Find the captured event and inspect the timeline: + + ```bash + go run ./cmd/whcp events list --api-key "$WEBHOOKERY_API_KEY" + export WEBHOOKERY_EVENT_ID=evt_replace_me + go run ./cmd/whcp events timeline \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --format markdown \ + --api-key "$WEBHOOKERY_API_KEY" + ``` + +The timeline should show durable capture, provider verification, route +matching, delivery attempts, and any retry or DLQ transition. + +## Signature Verification + +Stripe includes a timestamp in `Stripe-Signature`; Webhookery verifies the +signed timestamp and raw body before treating the event as trusted. Keep these +operating rules: + +- Preserve exact raw request bytes until verification completes. +- Use the endpoint-specific signing secret for the exact listener or Stripe + Workbench endpoint. +- Reject or quarantine missing, malformed, stale, or wrong-secret signatures. +- Keep system clocks synchronized; timestamp verification depends on current + time. + +## Duplicate Handling + +Stripe can retry deliveries and manual resends can create additional inbound +attempts. Webhookery records duplicate evidence rather than hiding it. Use the +event timeline and incident report to distinguish: + +- original capture evidence; +- duplicate receipts; +- dedupe suppression, when processing is suppressed; +- downstream retry attempts; and +- operator-initiated Webhookery replay. + +## Retry Expectations + +Stripe live-mode delivery retries can continue for up to three days with +exponential backoff. Stripe sandbox deliveries have a shorter retry pattern. +Webhookery's own downstream delivery retries are separate evidence: they record +attempts from Webhookery to your configured receiver after the provider event +has already been durably captured. + +Do not collapse Stripe retry evidence and Webhookery replay evidence into one +claim. They happen on different sides of the control plane. + +## Manual Recovery Limitations + +Stripe manual resend is useful for proof and recovery, but it is not a +provider-side completeness guarantee. If the provider cannot redeliver an +older event, Webhookery can only report the gap unless reconciliation against +Stripe APIs is explicitly configured and records provider API evidence. + +Recovered provider API evidence is not the same as signed webhook evidence. + +## Replay Workflow + +After the receiver is fixed, replay with an explicit reason: + +```bash +go run ./cmd/whcp replay-jobs dry-run \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "stripe proof receiver fixed before replay" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp replay-jobs create \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --config-mode original \ + --reason-code receiver_fixed \ + --reason "stripe proof receiver fixed before replay" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +If the event reached DLQ, release the DLQ entry instead: + +```bash +go run ./cmd/whcp dead-letter list --api-key "$WEBHOOKERY_API_KEY" +export WEBHOOKERY_DLQ_ID=dlq_replace_me +go run ./cmd/whcp dead-letter release \ + --entry-id "$WEBHOOKERY_DLQ_ID" \ + --reason-code receiver_fixed \ + --reason "stripe proof receiver recovered" \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +## Incident Packet Example + +Generate a report and evidence export after replay: + +```bash +go run ./cmd/whcp incidents create \ + --title "Stripe test-mode payment webhook failed then replayed" \ + --reason "stripe proof investigation" \ + --api-key "$WEBHOOKERY_API_KEY" + +export WEBHOOKERY_INCIDENT_ID=inc_replace_me + +go run ./cmd/whcp incidents add-event \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --event-id "$WEBHOOKERY_EVENT_ID" \ + --reason "attach Stripe test event" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents generate-report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "stripe proof report" \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents report \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --format markdown \ + --output stripe-incident-report.private.md \ + --api-key "$WEBHOOKERY_API_KEY" + +go run ./cmd/whcp incidents export \ + --incident-id "$WEBHOOKERY_INCIDENT_ID" \ + --reason "stripe proof evidence export" \ + --output stripe-incident-evidence.private.tar.gz \ + --api-key "$WEBHOOKERY_API_KEY" +``` + +Keep completed live reports and bundles private. Commit only sanitized samples +such as `docs/live-provider-proof/samples/stripe-incident-report.redacted.md`. + +## Non-Claims + +This guide does not prove: + +- downstream business processing succeeded; +- Stripe will redeliver every provider-side event forever; +- global event ordering; +- exactly-once delivery; +- compliance or legal evidentiary certification; or +- safe use of production customer data in public evidence. diff --git a/docs/reference/api-contract-matrix.md b/docs/reference/api-contract-matrix.md new file mode 100644 index 0000000..6d965a5 --- /dev/null +++ b/docs/reference/api-contract-matrix.md @@ -0,0 +1,222 @@ +# Webhookery API Contract Matrix + +Generated from `openapi.yaml`. Do not edit operation rows manually; run `make openapi-reference-generate`. + +Total operations: `214`. + +| Method | Path | Operation ID | Tag | Auth | Parameters | Request | Responses | +| --- | --- | --- | --- | --- | --- | --- | --- | +| `GET` | `/healthz` | `getHealthz` | System | none | - | - | 200 | +| `GET` | `/metrics` | `getMetrics` | System | none | - | - | 200 | +| `GET` | `/openapi.yaml` | `getOpenapiYaml` | System | none | - | - | 200 | +| `GET` | `/readyz` | `getReadyz` | System | none | - | - | 200, 503 | +| `GET` | `/v1/access-policies` | `getAccessPolicies` | Auth And Identity | bearerAuth | - | - | 200 | +| `POST` | `/v1/access-policies` | `postAccessPolicies` | Auth And Identity | bearerAuth | - | application/json | 201 | +| `PATCH` | `/v1/access-policies/{policy_id}` | `patchAccessPoliciesPolicyId` | Auth And Identity | bearerAuth | path:policy_id | application/json | 200 | +| `DELETE` | `/v1/access-policies/{policy_id}` | `deleteAccessPoliciesPolicyId` | Auth And Identity | bearerAuth | path:policy_id | application/json | 200 | +| `GET` | `/v1/adapters` | `getAdapters` | Sources And Providers | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/adapters` | `postAdapters` | Sources And Providers | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/adapters/{adapter_id}` | `getAdaptersAdapterId` | Sources And Providers | bearerAuth | path:adapter_id | - | 200 | +| `GET` | `/v1/adapters/{adapter_id}/versions` | `getAdaptersAdapterIdVersions` | Sources And Providers | bearerAuth | path:adapter_id, ref:Limit | - | 200 | +| `POST` | `/v1/adapters/{adapter_id}/versions` | `postAdaptersAdapterIdVersions` | Sources And Providers | bearerAuth | path:adapter_id | application/json | 201 | +| `POST` | `/v1/adapters/{adapter_id}/versions/{version_id}/test-vectors` | `postAdaptersAdapterIdVersionsVersionIdTestVectors` | Sources And Providers | bearerAuth | path:adapter_id, path:version_id | application/json | 201 | +| `POST` | `/v1/adapters/{adapter_id}/versions/{version_id}:transition` | `postAdaptersAdapterIdVersionsVersionIdTransition` | Sources And Providers | bearerAuth | path:adapter_id, path:version_id | application/json | 200 | +| `GET` | `/v1/admin/retention-policies` | `getAdminRetentionPolicies` | Audit And Retention | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/admin/retention-policies` | `postAdminRetentionPolicies` | Audit And Retention | bearerAuth | - | application/json | 201 | +| `PATCH` | `/v1/admin/retention-policies/{policy_id}` | `patchAdminRetentionPoliciesPolicyId` | Audit And Retention | bearerAuth | path:policy_id | application/json | 200 | +| `GET` | `/v1/alert-firings` | `getAlertFirings` | Operations | bearerAuth | ref:Limit, query:state | - | 200 | +| `GET` | `/v1/alert-firings/{firing_id}` | `getAlertFiringsFiringId` | Operations | bearerAuth | path:firing_id | - | 200 | +| `POST` | `/v1/alert-firings/{firing_id}:acknowledge` | `postAlertFiringsFiringIdAcknowledge` | Operations | bearerAuth | path:firing_id | application/json | 200 | +| `GET` | `/v1/alerts` | `getAlerts` | Operations | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/alerts` | `postAlerts` | Operations | bearerAuth | - | application/json | 201, 403 | +| `GET` | `/v1/alerts/{alert_id}` | `getAlertsAlertId` | Operations | bearerAuth | path:alert_id | - | 200 | +| `PATCH` | `/v1/alerts/{alert_id}` | `patchAlertsAlertId` | Operations | bearerAuth | path:alert_id | application/json | 200 | +| `DELETE` | `/v1/alerts/{alert_id}` | `deleteAlertsAlertId` | Operations | bearerAuth | path:alert_id | application/json | 200 | +| `GET` | `/v1/api-keys` | `getApiKeys` | API Keys | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/api-keys` | `postApiKeys` | API Keys | bearerAuth | - | application/json | 201, 403 | +| `POST` | `/v1/api-keys/{api_key_id}:revoke` | `postApiKeysApiKeyIdRevoke` | API Keys | bearerAuth | path:api_key_id | application/json | 200 | +| `GET` | `/v1/audit-chain/anchors` | `getAuditChainAnchors` | Audit And Retention | bearerAuth | ref:Limit | - | 200 | +| `GET` | `/v1/audit-chain/anchors/{anchor_id}` | `getAuditChainAnchorsAnchorId` | Audit And Retention | bearerAuth | path:anchor_id | - | 200, 404 | +| `GET` | `/v1/audit-chain/head` | `getAuditChainHead` | Audit And Retention | bearerAuth | - | - | 200 | +| `POST` | `/v1/audit-chain:anchor` | `postAuditChainAnchor` | Audit And Retention | bearerAuth | - | application/json | 201, 403 | +| `POST` | `/v1/audit-chain:verify` | `postAuditChainVerify` | Audit And Retention | bearerAuth | - | application/json | 200 | +| `GET` | `/v1/audit-events` | `getAuditEvents` | Audit And Retention | bearerAuth | - | - | 200 | +| `POST` | `/v1/audit-events:export` | `postAuditEventsExport` | Audit And Retention | bearerAuth | - | application/json | 202, 403 | +| `GET` | `/v1/audit-exports` | `getAuditExports` | Audit And Retention | bearerAuth | ref:Limit | - | 200 | +| `GET` | `/v1/audit-exports/{export_id}` | `getAuditExportsExportId` | Audit And Retention | bearerAuth | path:export_id | - | 200, 404 | +| `GET` | `/v1/audit-exports/{export_id}:download` | `getAuditExportsExportIdDownload` | Audit And Retention | bearerAuth | path:export_id | - | 200, 403, 410 | +| `POST` | `/v1/auth/logout` | `postAuthLogout` | Auth And Identity | bearerAuth | - | - | 204 | +| `GET` | `/v1/auth/oidc/callback` | `getAuthOidcCallback` | Auth And Identity | none | query:state, query:code | - | 200 | +| `GET` | `/v1/auth/oidc/login` | `getAuthOidcLogin` | Auth And Identity | none | query:tenant_id, query:provider_id, query:redirect_after | - | 302 | +| `GET` | `/v1/auth/session` | `getAuthSession` | Auth And Identity | bearerAuth | - | - | 200 | +| `GET` | `/v1/auth/sessions` | `getAuthSessions` | Auth And Identity | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/auth/sessions/{session_id}:revoke` | `postAuthSessionsSessionIdRevoke` | Auth And Identity | bearerAuth | path:session_id | application/json | 200 | +| `POST` | `/v1/authz:explain` | `postAuthzExplain` | Auth And Identity | bearerAuth | - | application/json | 200 | +| `GET` | `/v1/dead-letter` | `getDeadLetter` | Delivery And Replay | bearerAuth | - | - | 200 | +| `POST` | `/v1/dead-letter/{entry_id}:release` | `postDeadLetterEntryIdRelease` | Delivery And Replay | bearerAuth | path:entry_id | application/json | 202 | +| `POST` | `/v1/dead-letter:bulk-release` | `postDeadLetterBulkRelease` | Delivery And Replay | bearerAuth | - | application/json | 202 | +| `GET` | `/v1/deliveries` | `getDeliveries` | Delivery And Replay | bearerAuth | - | - | 200 | +| `GET` | `/v1/deliveries/{delivery_id}/attempts` | `getDeliveriesDeliveryIdAttempts` | Delivery And Replay | bearerAuth | path:delivery_id | - | 200 | +| `POST` | `/v1/deliveries/{delivery_id}:cancel` | `postDeliveriesDeliveryIdCancel` | Delivery And Replay | bearerAuth | path:delivery_id | application/json | 200 | +| `POST` | `/v1/deliveries/{delivery_id}:retry` | `postDeliveriesDeliveryIdRetry` | Delivery And Replay | bearerAuth | path:delivery_id | application/json | 202 | +| `GET` | `/v1/delivery-attempts/{attempt_id}` | `getDeliveryAttemptsAttemptId` | Delivery And Replay | bearerAuth | path:attempt_id | - | 200 | +| `GET` | `/v1/endpoint-health` | `getEndpointHealth` | Operations | bearerAuth | - | - | 200 | +| `GET` | `/v1/endpoints` | `getEndpoints` | Endpoints And Routing | bearerAuth | - | - | 200 | +| `POST` | `/v1/endpoints` | `postEndpoints` | Endpoints And Routing | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/endpoints/{endpoint_id}` | `getEndpointsEndpointId` | Endpoints And Routing | bearerAuth | path:endpoint_id | - | 200, 404 | +| `PATCH` | `/v1/endpoints/{endpoint_id}` | `patchEndpointsEndpointId` | Endpoints And Routing | bearerAuth | path:endpoint_id | application/json | 200, 422 | +| `DELETE` | `/v1/endpoints/{endpoint_id}` | `deleteEndpointsEndpointId` | Endpoints And Routing | bearerAuth | path:endpoint_id | application/json | 200 | +| `POST` | `/v1/endpoints/{endpoint_id}/secrets:rotate` | `postEndpointsEndpointIdSecretsRotate` | Endpoints And Routing | bearerAuth | path:endpoint_id | application/json | 200 | +| `POST` | `/v1/endpoints/{endpoint_id}:test` | `postEndpointsEndpointIdTest` | Endpoints And Routing | bearerAuth | path:endpoint_id | application/json | 202 | +| `POST` | `/v1/endpoints:validate-url` | `postEndpointsValidateUrl` | Endpoints And Routing | bearerAuth | - | - | 200 | +| `GET` | `/v1/event-types` | `getEventTypes` | Schemas And Transformations | bearerAuth | - | - | 200 | +| `POST` | `/v1/event-types` | `postEventTypes` | Schemas And Transformations | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/event-types/{event_type}` | `getEventTypesEventType` | Schemas And Transformations | bearerAuth | path:event_type | - | 200, 404 | +| `PATCH` | `/v1/event-types/{event_type}` | `patchEventTypesEventType` | Schemas And Transformations | bearerAuth | path:event_type | application/json | 200 | +| `DELETE` | `/v1/event-types/{event_type}` | `deleteEventTypesEventType` | Schemas And Transformations | bearerAuth | path:event_type | application/json | 200 | +| `GET` | `/v1/event-types/{event_type}/schemas` | `getEventTypesEventTypeSchemas` | Schemas And Transformations | bearerAuth | path:event_type | - | 200 | +| `POST` | `/v1/event-types/{event_type}/schemas` | `postEventTypesEventTypeSchemas` | Schemas And Transformations | bearerAuth | path:event_type | application/json | 201 | +| `GET` | `/v1/event-types/{event_type}/schemas/{schema_version}` | `getEventTypesEventTypeSchemasSchemaVersion` | Schemas And Transformations | bearerAuth | path:event_type, path:schema_version | - | 200, 404 | +| `PATCH` | `/v1/event-types/{event_type}/schemas/{schema_version}` | `patchEventTypesEventTypeSchemasSchemaVersion` | Schemas And Transformations | bearerAuth | path:event_type, path:schema_version | application/json | 200 | +| `DELETE` | `/v1/event-types/{event_type}/schemas/{schema_version}` | `deleteEventTypesEventTypeSchemasSchemaVersion` | Schemas And Transformations | bearerAuth | path:event_type, path:schema_version | application/json | 200 | +| `POST` | `/v1/event-types/{event_type}/schemas/{schema_version}:check-compatibility` | `postEventTypesEventTypeSchemasSchemaVersionCheckCompatibility` | Schemas And Transformations | bearerAuth | path:event_type, path:schema_version | application/json | 200 | +| `POST` | `/v1/event-types/{event_type}/schemas/{schema_version}:validate` | `postEventTypesEventTypeSchemasSchemaVersionValidate` | Schemas And Transformations | bearerAuth | path:event_type, path:schema_version | application/json | 200 | +| `GET` | `/v1/events` | `getEvents` | Events And Ingestion | bearerAuth | query:limit, query:provider, query:external_id, query:delivery_id, query:status, query:verification, query:received_after, query:route_id | - | 200, 400 | +| `POST` | `/v1/events` | `postEvents` | Events And Ingestion | bearerAuth, producerMTLS | - | application/json | 202, 400, 401, 403 | +| `GET` | `/v1/events/{event_id}` | `getEventsEventId` | Events And Ingestion | bearerAuth | path:event_id | - | 200, 404 | +| `GET` | `/v1/events/{event_id}/normalized` | `getEventsEventIdNormalized` | Events And Ingestion | bearerAuth | path:event_id, query:include_data | - | 200, 403, 410 | +| `GET` | `/v1/events/{event_id}/raw` | `getEventsEventIdRaw` | Events And Ingestion | bearerAuth | path:event_id, query:reason | - | 200, 400, 403, 410 | +| `GET` | `/v1/events/{event_id}/timeline` | `getEventsEventIdTimeline` | Events And Ingestion | bearerAuth | path:event_id, ref:Limit | - | 200 | +| `GET` | `/v1/identity-providers` | `getIdentityProviders` | Auth And Identity | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/identity-providers` | `postIdentityProviders` | Auth And Identity | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/identity-providers/{provider_id}` | `getIdentityProvidersProviderId` | Auth And Identity | bearerAuth | path:provider_id | - | 200 | +| `PATCH` | `/v1/identity-providers/{provider_id}` | `patchIdentityProvidersProviderId` | Auth And Identity | bearerAuth | path:provider_id | application/json | 200 | +| `DELETE` | `/v1/identity-providers/{provider_id}` | `deleteIdentityProvidersProviderId` | Auth And Identity | bearerAuth | path:provider_id | application/json | 200 | +| `POST` | `/v1/identity-providers/{provider_id}:test` | `postIdentityProvidersProviderIdTest` | Auth And Identity | bearerAuth | path:provider_id | application/json | 200 | +| `GET` | `/v1/incidents` | `getIncidents` | Incidents | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/incidents` | `postIncidents` | Incidents | bearerAuth | - | application/json | 201, 403 | +| `GET` | `/v1/incidents/{incident_id}` | `getIncidentsIncidentId` | Incidents | bearerAuth | path:incident_id | - | 200, 404 | +| `POST` | `/v1/incidents/{incident_id}/events` | `postIncidentsIncidentIdEvents` | Incidents | bearerAuth | path:incident_id | application/json | 201, 404 | +| `DELETE` | `/v1/incidents/{incident_id}/events/{event_id}` | `deleteIncidentsIncidentIdEventsEventId` | Incidents | bearerAuth | path:incident_id, path:event_id | application/json | 200 | +| `POST` | `/v1/incidents/{incident_id}/evidence-export` | `postIncidentsIncidentIdEvidenceExport` | Incidents | bearerAuth | path:incident_id | application/json | 202 | +| `POST` | `/v1/incidents/{incident_id}/generate-report` | `postIncidentsIncidentIdGenerateReport` | Incidents | bearerAuth | path:incident_id | application/json | 201 | +| `GET` | `/v1/incidents/{incident_id}/report` | `getIncidentsIncidentIdReport` | Incidents | bearerAuth | path:incident_id, query:format | - | 200 | +| `POST` | `/v1/ingest/cloudevents/{source_id}` | `postIngestCloudeventsSourceId` | Events And Ingestion | none | - | - | 200, 431 | +| `POST` | `/v1/ingest/generic-jwt/{source_id}` | `postIngestGenericJwtSourceId` | Events And Ingestion | none | - | - | 200, 401, 431 | +| `POST` | `/v1/ingest/github/{source_id}` | `postIngestGithubSourceId` | Events And Ingestion | none | - | - | 200, 431 | +| `POST` | `/v1/ingest/shopify/{source_id}` | `postIngestShopifySourceId` | Events And Ingestion | none | - | - | 200, 431 | +| `POST` | `/v1/ingest/slack/{source_id}` | `postIngestSlackSourceId` | Events And Ingestion | none | - | - | 200, 431 | +| `POST` | `/v1/ingest/stripe/{source_id}` | `postIngestStripeSourceId` | Events And Ingestion | none | - | - | 200, 431 | +| `POST` | `/v1/ingest/{tenant_id}/{source_id}` | `postIngestTenantIdSourceId` | Events And Ingestion | none | path:tenant_id, path:source_id | application/json | 200, 401, 413, 431, 503 | +| `GET` | `/v1/notification-channels` | `getNotificationChannels` | Signal Egress | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/notification-channels` | `postNotificationChannels` | Signal Egress | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/notification-channels/{channel_id}` | `getNotificationChannelsChannelId` | Signal Egress | bearerAuth | path:channel_id | - | 200 | +| `PATCH` | `/v1/notification-channels/{channel_id}` | `patchNotificationChannelsChannelId` | Signal Egress | bearerAuth | path:channel_id | application/json | 200 | +| `DELETE` | `/v1/notification-channels/{channel_id}` | `deleteNotificationChannelsChannelId` | Signal Egress | bearerAuth | path:channel_id | application/json | 200 | +| `POST` | `/v1/notification-channels/{channel_id}:test` | `postNotificationChannelsChannelIdTest` | Signal Egress | bearerAuth | path:channel_id | application/json | 202 | +| `GET` | `/v1/notification-deliveries` | `getNotificationDeliveries` | Signal Egress | bearerAuth | ref:Limit, query:state | - | 200 | +| `GET` | `/v1/notification-deliveries/{delivery_id}/attempts` | `getNotificationDeliveriesDeliveryIdAttempts` | Signal Egress | bearerAuth | path:delivery_id, ref:Limit | - | 200 | +| `POST` | `/v1/notification-deliveries/{delivery_id}:retry` | `postNotificationDeliveriesDeliveryIdRetry` | Signal Egress | bearerAuth | path:delivery_id | application/json | 200 | +| `POST` | `/v1/oauth/token` | `postOauthToken` | Producer Trust | basicAuth | - | application/x-www-form-urlencoded | 200, 400, 401 | +| `GET` | `/v1/ops/config` | `getOpsConfig` | Operations | bearerAuth | - | - | 200 | +| `GET` | `/v1/ops/metrics` | `getOpsMetrics` | Operations | bearerAuth | - | - | 200 | +| `GET` | `/v1/ops/metrics/rollups` | `getOpsMetricsRollups` | Operations | bearerAuth | ref:Limit, query:metric_name | - | 200, 400 | +| `GET` | `/v1/ops/queues` | `getOpsQueues` | Operations | bearerAuth | - | - | 200 | +| `GET` | `/v1/ops/storage` | `getOpsStorage` | Operations | bearerAuth | - | - | 200 | +| `GET` | `/v1/ops/workers` | `getOpsWorkers` | Operations | bearerAuth | ref:Limit | - | 200 | +| `GET` | `/v1/ops/workers/{worker_id}` | `getOpsWorkersWorkerId` | Operations | bearerAuth | path:worker_id | - | 200, 404 | +| `GET` | `/v1/producer-clients` | `getProducerClients` | Producer Trust | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/producer-clients` | `postProducerClients` | Producer Trust | bearerAuth | - | application/json | 201, 403 | +| `GET` | `/v1/producer-clients/{client_id}` | `getProducerClientsClientId` | Producer Trust | bearerAuth | path:client_id | - | 200 | +| `PATCH` | `/v1/producer-clients/{client_id}` | `patchProducerClientsClientId` | Producer Trust | bearerAuth | path:client_id | application/json | 200 | +| `DELETE` | `/v1/producer-clients/{client_id}` | `deleteProducerClientsClientId` | Producer Trust | bearerAuth | path:client_id | application/json | 200 | +| `POST` | `/v1/producer-clients/{client_id}/secrets:rotate` | `postProducerClientsClientIdSecretsRotate` | Producer Trust | bearerAuth | path:client_id | application/json | 200 | +| `GET` | `/v1/producer-mtls-identities` | `getProducerMtlsIdentities` | Producer Trust | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/producer-mtls-identities` | `postProducerMtlsIdentities` | Producer Trust | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/producer-mtls-identities/{identity_id}` | `getProducerMtlsIdentitiesIdentityId` | Producer Trust | bearerAuth | path:identity_id | - | 200 | +| `PATCH` | `/v1/producer-mtls-identities/{identity_id}` | `patchProducerMtlsIdentitiesIdentityId` | Producer Trust | bearerAuth | path:identity_id | application/json | 200 | +| `DELETE` | `/v1/producer-mtls-identities/{identity_id}` | `deleteProducerMtlsIdentitiesIdentityId` | Producer Trust | bearerAuth | path:identity_id | application/json | 200 | +| `POST` | `/v1/producer-mtls-identities/{identity_id}:verify` | `postProducerMtlsIdentitiesIdentityIdVerify` | Producer Trust | bearerAuth | path:identity_id | application/json | 200 | +| `GET` | `/v1/provider-connections` | `getProviderConnections` | Sources And Providers | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/provider-connections` | `postProviderConnections` | Sources And Providers | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/provider-connections/{connection_id}` | `getProviderConnectionsConnectionId` | Sources And Providers | bearerAuth | path:connection_id | - | 200 | +| `POST` | `/v1/provider-connections/{connection_id}:revoke` | `postProviderConnectionsConnectionIdRevoke` | Sources And Providers | bearerAuth | path:connection_id | application/json | 200 | +| `POST` | `/v1/provider-connections/{connection_id}:verify` | `postProviderConnectionsConnectionIdVerify` | Sources And Providers | bearerAuth | path:connection_id | application/json | 200 | +| `GET` | `/v1/quarantine` | `getQuarantine` | Delivery And Replay | bearerAuth | - | - | 200 | +| `POST` | `/v1/quarantine/{entry_id}:approve` | `postQuarantineEntryIdApprove` | Delivery And Replay | bearerAuth | path:entry_id | - | 200 | +| `POST` | `/v1/quarantine/{entry_id}:reject` | `postQuarantineEntryIdReject` | Delivery And Replay | bearerAuth | path:entry_id | - | 200 | +| `GET` | `/v1/reconciliation-jobs` | `getReconciliationJobs` | Reconciliation | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/reconciliation-jobs` | `postReconciliationJobs` | Reconciliation | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/reconciliation-jobs/{job_id}` | `getReconciliationJobsJobId` | Reconciliation | bearerAuth | path:job_id | - | 200 | +| `GET` | `/v1/reconciliation-jobs/{job_id}/items` | `getReconciliationJobsJobIdItems` | Reconciliation | bearerAuth | path:job_id, ref:Limit | - | 200 | +| `POST` | `/v1/reconciliation-jobs/{job_id}:cancel` | `postReconciliationJobsJobIdCancel` | Reconciliation | bearerAuth | path:job_id | application/json | 200 | +| `POST` | `/v1/reconciliation-jobs:dry-run` | `postReconciliationJobsDryRun` | Reconciliation | bearerAuth | - | application/json | 200 | +| `GET` | `/v1/replay-approval-policies` | `getReplayApprovalPolicies` | Delivery And Replay | bearerAuth | - | - | 200 | +| `POST` | `/v1/replay-approval-policies` | `postReplayApprovalPolicies` | Delivery And Replay | bearerAuth | - | application/json | 201 | +| `DELETE` | `/v1/replay-approval-policies/{policy_id}` | `deleteReplayApprovalPoliciesPolicyId` | Delivery And Replay | bearerAuth | path:policy_id | application/json | 200 | +| `GET` | `/v1/replay-jobs` | `getReplayJobs` | Delivery And Replay | bearerAuth | - | - | 200 | +| `POST` | `/v1/replay-jobs` | `postReplayJobs` | Delivery And Replay | bearerAuth | - | application/json | 202 | +| `POST` | `/v1/replay-jobs/preview` | `postReplayJobsPreview` | Delivery And Replay | bearerAuth | - | application/json | 200 | +| `POST` | `/v1/replay-jobs/{replay_job_id}:approve` | `postReplayJobsReplayJobIdApprove` | Delivery And Replay | bearerAuth | path:replay_job_id | application/json | 200 | +| `POST` | `/v1/replay-jobs/{replay_job_id}:cancel` | `postReplayJobsReplayJobIdCancel` | Delivery And Replay | bearerAuth | path:replay_job_id | application/json | 200 | +| `POST` | `/v1/replay-jobs/{replay_job_id}:pause` | `postReplayJobsReplayJobIdPause` | Delivery And Replay | bearerAuth | path:replay_job_id | application/json | 200 | +| `POST` | `/v1/replay-jobs/{replay_job_id}:resume` | `postReplayJobsReplayJobIdResume` | Delivery And Replay | bearerAuth | path:replay_job_id | application/json | 200 | +| `POST` | `/v1/replay-jobs:dry-run` | `postReplayJobsDryRun` | Delivery And Replay | bearerAuth | - | application/json | 200 | +| `GET` | `/v1/retry-policies` | `getRetryPolicies` | Endpoints And Routing | bearerAuth | - | - | 200 | +| `POST` | `/v1/retry-policies` | `postRetryPolicies` | Endpoints And Routing | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/retry-policies/{retry_policy_id}` | `getRetryPoliciesRetryPolicyId` | Endpoints And Routing | bearerAuth | path:retry_policy_id | - | 200, 404 | +| `PATCH` | `/v1/retry-policies/{retry_policy_id}` | `patchRetryPoliciesRetryPolicyId` | Endpoints And Routing | bearerAuth | path:retry_policy_id | application/json | 200 | +| `DELETE` | `/v1/retry-policies/{retry_policy_id}` | `deleteRetryPoliciesRetryPolicyId` | Endpoints And Routing | bearerAuth | path:retry_policy_id | application/json | 200 | +| `GET` | `/v1/role-bindings` | `getRoleBindings` | Auth And Identity | bearerAuth | - | - | 200 | +| `POST` | `/v1/role-bindings` | `postRoleBindings` | Auth And Identity | bearerAuth | - | application/json | 201 | +| `PATCH` | `/v1/role-bindings/{binding_id}` | `patchRoleBindingsBindingId` | Auth And Identity | bearerAuth | path:binding_id | application/json | 200 | +| `DELETE` | `/v1/role-bindings/{binding_id}` | `deleteRoleBindingsBindingId` | Auth And Identity | bearerAuth | path:binding_id | application/json | 200 | +| `GET` | `/v1/routes` | `getRoutes` | Endpoints And Routing | bearerAuth | - | - | 200 | +| `POST` | `/v1/routes` | `postRoutes` | Endpoints And Routing | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/routes/{route_id}` | `getRoutesRouteId` | Endpoints And Routing | bearerAuth | path:route_id | - | 200, 404 | +| `PATCH` | `/v1/routes/{route_id}` | `patchRoutesRouteId` | Endpoints And Routing | bearerAuth | path:route_id | application/json | 200 | +| `DELETE` | `/v1/routes/{route_id}` | `deleteRoutesRouteId` | Endpoints And Routing | bearerAuth | path:route_id | application/json | 200 | +| `GET` | `/v1/routes/{route_id}/versions` | `getRoutesRouteIdVersions` | Endpoints And Routing | bearerAuth | path:route_id, ref:Limit | - | 200 | +| `POST` | `/v1/routes/{route_id}:activate` | `postRoutesRouteIdActivate` | Endpoints And Routing | bearerAuth | path:route_id | - | 200 | +| `POST` | `/v1/routes/{route_id}:dry-run` | `postRoutesRouteIdDryRun` | Endpoints And Routing | bearerAuth | path:route_id | - | 200 | +| `GET` | `/v1/scim-tokens` | `getScimTokens` | Auth And Identity | bearerAuth | - | - | 200 | +| `POST` | `/v1/scim-tokens` | `postScimTokens` | Auth And Identity | bearerAuth | - | application/json | 201 | +| `DELETE` | `/v1/scim-tokens/{token_id}` | `deleteScimTokensTokenId` | Auth And Identity | bearerAuth | path:token_id | application/json | 200 | +| `GET` | `/v1/scim/v2/Groups` | `getScimV2Groups` | Auth And Identity | bearerAuth | - | - | 200 | +| `POST` | `/v1/scim/v2/Groups` | `postScimV2Groups` | Auth And Identity | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/scim/v2/Groups/{group_id}` | `getScimV2GroupsGroupId` | Auth And Identity | bearerAuth | path:group_id | - | 200 | +| `PUT` | `/v1/scim/v2/Groups/{group_id}` | `putScimV2GroupsGroupId` | Auth And Identity | bearerAuth | path:group_id | application/json | 200 | +| `PATCH` | `/v1/scim/v2/Groups/{group_id}` | `patchScimV2GroupsGroupId` | Auth And Identity | bearerAuth | path:group_id | application/json | 200 | +| `DELETE` | `/v1/scim/v2/Groups/{group_id}` | `deleteScimV2GroupsGroupId` | Auth And Identity | bearerAuth | path:group_id | - | 200 | +| `GET` | `/v1/scim/v2/Users` | `getScimV2Users` | Auth And Identity | bearerAuth | - | - | 200 | +| `POST` | `/v1/scim/v2/Users` | `postScimV2Users` | Auth And Identity | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/scim/v2/Users/{user_id}` | `getScimV2UsersUserId` | Auth And Identity | bearerAuth | path:user_id | - | 200 | +| `PUT` | `/v1/scim/v2/Users/{user_id}` | `putScimV2UsersUserId` | Auth And Identity | bearerAuth | path:user_id | application/json | 200 | +| `PATCH` | `/v1/scim/v2/Users/{user_id}` | `patchScimV2UsersUserId` | Auth And Identity | bearerAuth | path:user_id | application/json | 200 | +| `DELETE` | `/v1/scim/v2/Users/{user_id}` | `deleteScimV2UsersUserId` | Auth And Identity | bearerAuth | path:user_id | - | 200 | +| `GET` | `/v1/siem-deliveries` | `getSiemDeliveries` | Signal Egress | bearerAuth | ref:Limit, query:state | - | 200 | +| `GET` | `/v1/siem-deliveries/{delivery_id}/attempts` | `getSiemDeliveriesDeliveryIdAttempts` | Signal Egress | bearerAuth | path:delivery_id, ref:Limit | - | 200 | +| `POST` | `/v1/siem-deliveries/{delivery_id}:retry` | `postSiemDeliveriesDeliveryIdRetry` | Signal Egress | bearerAuth | path:delivery_id | application/json | 200 | +| `GET` | `/v1/siem-sinks` | `getSiemSinks` | Signal Egress | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/siem-sinks` | `postSiemSinks` | Signal Egress | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/siem-sinks/{sink_id}` | `getSiemSinksSinkId` | Signal Egress | bearerAuth | path:sink_id | - | 200 | +| `PATCH` | `/v1/siem-sinks/{sink_id}` | `patchSiemSinksSinkId` | Signal Egress | bearerAuth | path:sink_id | application/json | 200 | +| `DELETE` | `/v1/siem-sinks/{sink_id}` | `deleteSiemSinksSinkId` | Signal Egress | bearerAuth | path:sink_id | application/json | 200 | +| `POST` | `/v1/siem-sinks/{sink_id}:test` | `postSiemSinksSinkIdTest` | Signal Egress | bearerAuth | path:sink_id | application/json | 202 | +| `GET` | `/v1/sources` | `getSources` | Sources And Providers | bearerAuth | - | - | 200 | +| `POST` | `/v1/sources` | `postSources` | Sources And Providers | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/sources/{source_id}` | `getSourcesSourceId` | Sources And Providers | bearerAuth | path:source_id | - | 200, 404 | +| `PATCH` | `/v1/sources/{source_id}` | `patchSourcesSourceId` | Sources And Providers | bearerAuth | path:source_id | application/json | 200 | +| `DELETE` | `/v1/sources/{source_id}` | `deleteSourcesSourceId` | Sources And Providers | bearerAuth | path:source_id | application/json | 200 | +| `POST` | `/v1/sources/{source_id}/secrets:rotate` | `postSourcesSourceIdSecretsRotate` | Sources And Providers | bearerAuth | path:source_id | application/json | 200 | +| `GET` | `/v1/subscriptions` | `getSubscriptions` | Endpoints And Routing | bearerAuth | - | - | 200 | +| `POST` | `/v1/subscriptions` | `postSubscriptions` | Endpoints And Routing | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/subscriptions/{subscription_id}` | `getSubscriptionsSubscriptionId` | Endpoints And Routing | bearerAuth | path:subscription_id | - | 200, 404 | +| `PATCH` | `/v1/subscriptions/{subscription_id}` | `patchSubscriptionsSubscriptionId` | Endpoints And Routing | bearerAuth | path:subscription_id | application/json | 200 | +| `DELETE` | `/v1/subscriptions/{subscription_id}` | `deleteSubscriptionsSubscriptionId` | Endpoints And Routing | bearerAuth | path:subscription_id | application/json | 200 | +| `GET` | `/v1/transformations` | `getTransformations` | Schemas And Transformations | bearerAuth | ref:Limit | - | 200 | +| `POST` | `/v1/transformations` | `postTransformations` | Schemas And Transformations | bearerAuth | - | application/json | 201 | +| `GET` | `/v1/transformations/{transformation_id}` | `getTransformationsTransformationId` | Schemas And Transformations | bearerAuth | path:transformation_id | - | 200 | +| `GET` | `/v1/transformations/{transformation_id}/versions` | `getTransformationsTransformationIdVersions` | Schemas And Transformations | bearerAuth | path:transformation_id, ref:Limit | - | 200 | +| `POST` | `/v1/transformations/{transformation_id}/versions` | `postTransformationsTransformationIdVersions` | Schemas And Transformations | bearerAuth | path:transformation_id | application/json | 201 | +| `POST` | `/v1/transformations/{transformation_id}/versions/{version_id}:activate` | `postTransformationsTransformationIdVersionsVersionIdActivate` | Schemas And Transformations | bearerAuth | path:transformation_id, path:version_id | application/json | 200 | diff --git a/docs/reference/openapi.md b/docs/reference/openapi.md new file mode 100644 index 0000000..fe98fb0 --- /dev/null +++ b/docs/reference/openapi.md @@ -0,0 +1,32 @@ +# OpenAPI Reference + +`openapi.yaml` is the canonical REST API contract for Webhookery API version `0.1.0`. + +Self-hosted webhook evidence and delivery control plane. + +- Rendered HTML reference: [`docs/openapi/index.html`](../openapi/index.html) +- API contract matrix: [`docs/reference/api-contract-matrix.md`](api-contract-matrix.md) +- Total operations: `214` + +## Operations By Tag + +| Tag | Operations | +| --- | ---: | +| API Keys | 3 | +| Audit And Retention | 13 | +| Auth And Identity | 36 | +| Delivery And Replay | 22 | +| Endpoints And Routing | 26 | +| Events And Ingestion | 13 | +| Incidents | 8 | +| Operations | 16 | +| Producer Trust | 13 | +| Reconciliation | 6 | +| Schemas And Transformations | 18 | +| Signal Egress | 18 | +| Sources And Providers | 18 | +| System | 4 | + +## Maintenance + +When `openapi.yaml` changes, run `make openapi-reference-generate` and commit the regenerated reference artifacts with the contract change. `make openapi-reference-check` verifies that the generated files are current. diff --git a/docs/reference/release-evidence-index.md b/docs/reference/release-evidence-index.md new file mode 100644 index 0000000..3028a04 --- /dev/null +++ b/docs/reference/release-evidence-index.md @@ -0,0 +1,104 @@ +# Release Evidence Index + +This reference maps public release evidence for Webhookery release candidates. +It complements `docs/release-evidence-template.md`, which remains the canonical +checklist for completing a release evidence packet. + +Release evidence supports reproducible engineering and operator review. It is +not exactly-once delivery proof, provider-side event completeness proof, +compliance certification, legal evidentiary certification, external +timestamping, managed-service availability, or live-provider acceptance. + +## Current Public Release Candidate + +The current public release candidate is +[`v0.1.0-rc1`](https://github.com/aatuh/webhookery/releases/tag/v0.1.0-rc1). +It was published on 2026-05-27 from source commit +`51b455378b307914c71fff8b2e0bba81b9c6d435`. + +Current release metadata is tracked in `release/current.json`. + +| Artifact | Location | Verification Use | +| --- | --- | --- | +| Release notes | `docs/releases/v0.1.0-rc1.md` | Human-readable scope, limitations, and validation commands for the release candidate. | +| Release evidence archive | GitHub Release asset `webhookery-v0.1.0-rc1-release-evidence.zip` | Public release evidence archive generated by the release workflow. | +| Release evidence template | `docs/release-evidence-template.md` | Canonical fields and gate expectations for every tagged release. | +| Release evidence sample | `docs/release-evidence-sample.md` | Reader-facing example of expected public evidence shape. | +| OpenAPI source | `openapi.yaml` | Canonical API contract at the release source commit. | +| SDK OpenAPI copy | `sdk/openapi.yaml` | SDK-ready derived copy; `make sdk-check` verifies alignment. | +| Provider conformance matrix | `docs/provider-conformance.md` | Local deterministic provider vector evidence and dated official-source context. | +| Provider proof manifest | `docs/provider-proof-manifest.json` | Freshness metadata for external/manual live-provider proof guides. | +| Pilot checklist | `docs/releases/v0.2.0-pilot.md` | Next pilot-readiness checklist and remaining external/manual proof boundaries. | + +## Release Asset Set For New Tags + +The current release workflow prepares and uploads a fuller asset set for new +`v*` tags through `scripts/release_assets.sh`. + +| Artifact | Created By | Verification Use | +| --- | --- | --- | +| `webhookery__linux_amd64.tar.gz` | `scripts/release_assets.sh` | Operator install archive for Linux amd64. | +| `webhookery__linux_arm64.tar.gz` | `scripts/release_assets.sh` | Operator install archive for Linux arm64. | +| `webhookery__darwin_amd64.tar.gz` | `scripts/release_assets.sh` | Operator install archive for macOS amd64. | +| `webhookery__darwin_arm64.tar.gz` | `scripts/release_assets.sh` | Operator install archive for macOS arm64. | +| `webhookery__windows_amd64.zip` | `scripts/release_assets.sh` | Operator install archive for Windows amd64. | +| `SHA256SUMS` | `scripts/release_assets.sh` | Verify every file in the release asset set. | +| `openapi.yaml` | `scripts/release_assets.sh` | Preserve the exact public API contract shipped with the release. | +| `openapi.sha256` | `scripts/release_assets.sh` | Verify the shipped OpenAPI contract. | +| `migrations.sha256` | `scripts/release_assets.sh` | Verify migration files used for release review. | +| `release-notes.md` | `scripts/release_assets.sh` | Preserve tag-specific release notes when available. | +| `release-check-summary.txt` | `scripts/release_assets.sh` | Record the completed release gate family and non-claims. | +| `coverage.out` | `make coverage-check`, when available | Preserve the local coverage profile used by the release gate. | +| `webhookery-release-manifest.json` | `scripts/release_assets.sh` | List release files, hashes, sizes, source commit, and non-claims. | +| `webhookery-release-provenance.json` | `scripts/release_assets.sh` | Record project release provenance metadata. It is not a SLSA level claim. | +| `webhookery-release-provenance.intoto.jsonl` | `scripts/release_assets.sh` | In-toto-shaped metadata for release artifact review. It is not certification. | +| `source.spdx.json` | release workflow, when available | Source SBOM from the release workflow. | +| `image.spdx.json` | release workflow, when available | Image SBOM from the release workflow. | +| `release-evidence.md` | release workflow, when available | Human release evidence summary. | +| `perf-smoke/` | release workflow, when available | Sanitized local performance smoke evidence. | + +## Public Verification + +To inspect the current public release asset from a clean checkout: + +```sh +mkdir -p dist/v0.1.0-rc1 +gh release download v0.1.0-rc1 \ + --repo aatuh/webhookery \ + --pattern 'webhookery-v0.1.0-rc1-release-evidence.zip' \ + --dir dist/v0.1.0-rc1 +sha256sum dist/v0.1.0-rc1/webhookery-v0.1.0-rc1-release-evidence.zip +``` + +Use GitHub's published asset digest as the external comparison point. Keep any +completed live-provider proof, customer evidence, raw payloads, raw signatures, +provider credentials, private keys, bearer tokens, database URLs with +passwords, and private customer data outside public source control. + +## Release Workflow Evidence + +The release workflow records: + +- `make release-acceptance` +- `make provider-conformance-check` +- `make provider-proof-check` +- `make perf-smoke` with local PostgreSQL service +- `make rc-check` with local PostgreSQL service +- Docker image build and push to GHCR +- Cosign keyless image signing +- source and image SBOM generation +- Trivy HIGH/CRITICAL image scan + +Branch protection status, external review status, accepted-risk decisions, +completed live-provider proof, and customer pilot outcomes are manual evidence +items. Record them in the release evidence packet when they exist. + +To smoke-test the release asset script locally without cross-building every +platform: + +```sh +tmp_dir="$(mktemp -d)" +WEBHOOKERY_RELEASE_ASSET_PLATFORMS=linux/amd64 \ + scripts/release_assets.sh v0.0.0-local "$tmp_dir" "$(git rev-parse HEAD)" +(cd "$tmp_dir" && sha256sum -c SHA256SUMS) +``` diff --git a/docs/reference/release-validation.md b/docs/reference/release-validation.md new file mode 100644 index 0000000..02bc323 --- /dev/null +++ b/docs/reference/release-validation.md @@ -0,0 +1,80 @@ +# Release Validation + +Webhookery release validation is evidence-first. Passing local checks is +necessary, but it does not turn a release candidate into a broad production +guarantee, provider certification, compliance certification, exactly-once +delivery proof, or legal evidentiary certification. + +## Local Gates + +Run these from a clean checkout before preparing release evidence: + +```sh +make docs-check +make coverage-check +make release-acceptance +make rc-check +make finalize +``` + +For DB-backed release-candidate evidence, run with a disposable PostgreSQL +database: + +```sh +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check +``` + +For destructive restore evidence, use a disposable or explicitly approved +database: + +```sh +WEBHOOKERY_RESTORE_DRILL_DATABASE_URL=postgres://... make restore-drill +``` + +## Public Metadata Gates + +These checks keep public repository metadata aligned with implementation +artifacts: + +```sh +make openapi-reference-check +make meta-files-check +make release-assets-check +make static-site-check +``` + +`make openapi-reference-check` verifies that `docs/openapi/index.html`, +`docs/reference/openapi.md`, and `docs/reference/api-contract-matrix.md` match +the canonical `openapi.yaml`. + +`make release-assets-check` smoke-tests `scripts/release_assets.sh` with one +local Linux amd64 archive and verifies the generated checksums, manifest, and +provenance metadata. + +## Evidence To Record + +Each tagged release evidence packet should record: + +- tag, source commit, release workflow run, and image digest when an image is + published; +- `make release-acceptance`, `make rc-check`, and `make finalize` output; +- `make coverage-check` output and `coverage.out`; +- DB-backed `make rc-check` and `make live-postgres-check` output when a + disposable database is available; +- restore drill output or an accepted-risk decision when skipped; +- OpenAPI and migration checksum summaries; +- source and image SBOM references when generated; +- release archives, `SHA256SUMS`, release manifest, and release provenance + metadata from `scripts/release_assets.sh`; +- Trivy, govulncheck, gosec, CodeQL, and Scorecard status; +- provider conformance and provider proof metadata status; +- branch protection or repository ruleset status; +- external review status or accepted-risk decision; +- live-provider proof status, if available and sanitized. + +## Current Release Candidate + +`release/current.json` points to the current public release candidate and the +next pilot-readiness checklist. GitHub Releases remains the external source of +truth for published tags and assets. diff --git a/docs/reference/source-of-truth.md b/docs/reference/source-of-truth.md new file mode 100644 index 0000000..08dcec1 --- /dev/null +++ b/docs/reference/source-of-truth.md @@ -0,0 +1,29 @@ +# Source Of Truth + +This page summarizes the public repository evidence surfaces for Webhookery. +It exists to make the GitHub-facing metadata easier to audit without changing +the implementation source of truth. + +| Area | Source Of Truth | Notes | +| --- | --- | --- | +| Product entry point | `README.md` | Short positioning, quickstart, badges, and links. | +| Implemented API contract | `openapi.yaml` | Canonical REST contract. `sdk/openapi.yaml` and rendered docs are derived artifacts. | +| Rendered API docs | `docs/openapi/index.html` | Generated from `openapi.yaml` by `make openapi-reference-generate`. | +| API operation matrix | `docs/reference/api-contract-matrix.md` | Generated from `openapi.yaml`; useful for review and badge count checks. | +| Database schema | `migrations/` | Migration history and evidence-authority schema. | +| Release metadata | `release/current.json` | Pointer to the current public release candidate and pilot checklist. GitHub Releases remains external source of truth. | +| Release evidence | `docs/reference/release-evidence-index.md` and `docs/release-evidence-template.md` | Public artifact map and release evidence requirements. | +| Release validation | `docs/reference/release-validation.md` | Project-owned release validation path and expected evidence. | +| Security policy | `SECURITY.md` | Vulnerability reporting and sensitive-data handling. | +| Governance | `GOVERNANCE.md`, `CONTRIBUTING.md`, `CODE_OF_CONDUCT.md`, `CODEOWNERS` | Contribution, maintainer, conduct, and review ownership policy. | +| Public workflows | `.github/workflows/` | CI, security, integration, fuzz, CodeQL, Scorecard, release, and Pages publication workflows. | +| Dependency updates | `.github/dependabot.yml` | Weekly checks for Go modules, GitHub Actions, Docker, TypeScript package metadata, and Terraform profile dependencies. | +| Static site | `site/` and `.github/workflows/site-pages.yml` | Product page source and GitHub Pages publication workflow. | +| Provider proof metadata | `docs/provider-proof-manifest.json` | Freshness metadata for manual live-provider proof guides; no live provider calls are committed. | + +## Boundaries + +The public metadata does not prove live provider acceptance, branch protection, +private vulnerability reporting settings, completed external review, or +customer pilot outcomes by itself. Those items must be recorded in release or +pilot evidence when available. diff --git a/docs/release-evidence-sample.md b/docs/release-evidence-sample.md new file mode 100644 index 0000000..217c6c0 --- /dev/null +++ b/docs/release-evidence-sample.md @@ -0,0 +1,65 @@ +# Release Evidence Sample + +This document shows the public shape of a completed release evidence packet. +The canonical template is `docs/release-evidence-template.md`; this sample is a +reader aid, not a parallel checklist. + +Do not store secrets, real raw payloads, provider credentials, customer data, +private keys, bearer tokens, raw signatures, or database URLs with passwords in +release evidence. + +## Example Public Summary + +```text +Release: v0.1.0-rc1 +Commit: 2e72d451a2c16684e6abc8860556c6aca2742406 +Image: ghcr.io/aatuh/webhookery +Image digest: sha256:898426fb4a1de70d5a8ff220f171383d081c546ea9418c2f1ca2ca6ff0a7c54b +Release workflow: pass +CI workflow: pass +Integration workflow: pass +Security workflow: pass +Source SBOM: attached +Image SBOM: attached +Trivy HIGH/CRITICAL scan: pass +``` + +## Expected Evidence Files + +| File or artifact | Purpose | +| --- | --- | +| `release-evidence/release-evidence.md` | Human summary of tag, commit, image digest, checks, hashes, non-claims, and known gaps. | +| `source.spdx.json` | Source SBOM generated by the release workflow. | +| `image.spdx.json` | Image SBOM generated from the pushed container image. | +| `tmp/perf-smoke/perf-smoke.json` | Sanitized local performance smoke output. | +| `tmp/perf-smoke/perf-smoke.md` | Human-readable local performance smoke output. | + +## Required Non-Claims + +Every public release evidence packet must preserve these non-claims: + +- no exactly-once delivery +- no provider-side event completeness guarantee +- no compliance certification +- no legal evidentiary certification +- no external timestamping guarantee +- no live-provider acceptance unless a separate scoped review records it + +## How To Reproduce Locally + +For local non-live checks: + +```bash +make release-acceptance +make rc-check +``` + +For DB-backed checks with a disposable database: + +```bash +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make live-postgres-check +``` + +For full release confidence, use `docs/production-rc-checklist.md` and record +skipped checks as blockers or accepted risks. diff --git a/docs/release-evidence-template.md b/docs/release-evidence-template.md index 664fcbd..cab165f 100644 --- a/docs/release-evidence-template.md +++ b/docs/release-evidence-template.md @@ -1,8 +1,29 @@ # Webhookery Release Evidence Template -Use this template for each tagged release. Keep one evidence file per -tag/commit, attach generated SBOM and scan artifacts, and record skipped checks -as failures or explicit accepted-risk exceptions. +This is the canonical release evidence template. Root `RELEASE_EVIDENCE.md` +points here and should not grow a parallel checklist. + +Use one completed copy per tagged release. Keep generated SBOM and scan +artifacts with that copy, and record skipped checks as failures or explicit +accepted-risk exceptions. Store evidence outside source control unless it is +sanitized for public review. + +Do not include real API keys, webhook secrets, bearer tokens, session tokens, +private keys, provider credentials, database URLs with real credentials, raw +signatures, raw payload bodies, or customer data. + +## Result Rules + +- `pass`: command or review completed and evidence is attached or linked. +- `fail`: command or review failed; release is blocked unless an accepted-risk + exception below has owner, expiry, and mitigation. +- `blocked`: required environment, dependency, or permission is unavailable. +- `skipped`: only allowed with an accepted-risk exception. + +Local release-candidate gates use fake providers and receivers. They must not +require live Stripe, GitHub, Shopify, Slack, AWS, Vault, SIEM, PagerDuty, or +customer receiver credentials unless a separate commercial engagement records +the live third-party provider scope and risk. ## Release Identity @@ -16,6 +37,14 @@ as failures or explicit accepted-risk exceptions. - OpenAPI checksum: - SDK OpenAPI checksum: - Migration checksum summary: +- Stability policy reviewed: +- Performance smoke output: +- Provider conformance output: +- Provider proof metadata output: +- Failure drill output: +- External review status: +- Accepted risk status: +- Branch protection status: - Release workflow URL: - CI workflow URL: - Integration workflow URL: @@ -30,7 +59,11 @@ as failures or explicit accepted-risk exceptions. | `make finalize` | | | | `make release-acceptance` | | | | `make rc-check` | | | -| `make postgres-integration-test` with `WEBHOOKERY_TEST_DATABASE_URL` | | | +| stability policy compatibility review | | | +| `make provider-conformance-check` | | | +| `make provider-proof-check` | | | +| `make perf-smoke` with `WEBHOOKERY_TEST_DATABASE_URL` | | | +| `make live-postgres-check` with `WEBHOOKERY_TEST_DATABASE_URL` | | | | DB-backed `make rc-check` with `WEBHOOKERY_TEST_DATABASE_URL` | | | | backup/restore drill with explicit restore database | | | | `docker build -t webhookery:local .` | | | @@ -38,6 +71,7 @@ as failures or explicit accepted-risk exceptions. | `/readyz` readiness smoke | | | | `/openapi.yaml` or `/openapi.json` contract smoke | | | | `whcp doctor production` redacted production preflight | | | +| `whcp doctor pilot --no-network` redacted pilot preflight | | | | provider ingest to signed delivery smoke with fake receiver | | | | invalid provider signature rejection/quarantine smoke | | | | replay original/current config smoke | | | @@ -45,6 +79,10 @@ as failures or explicit accepted-risk exceptions. | audit-chain verification smoke | | | | reconciliation gap evidence smoke with fake providers | | | | alert notification and SIEM egress smoke with fake receivers | | | +| receiver timeout storm drill | | | +| object-store read/write failure drill | | | +| migration checksum failure drill | | | +| audit-chain/export tamper detection drill | | | | log and metrics secret scan | | | | `govulncheck` | | | | `gosec` | | | @@ -69,6 +107,32 @@ enforcement. Broad production readiness requires CI, integration, and security checks to be required before merge. If GitHub account or repository settings block private branch protection, record that as a release blocker. +Status: + +- Required checks: +- Required reviews: +- Force-push protection: +- Admin bypass: +- Evidence URL: + +## External Review + +Use `docs/external-review-scope.md`, +`docs/external-review-findings-template.md`, and +`docs/external-review-accepted-risks.md`. + +| Item | Status | Evidence | +|------|--------|----------| +| external review scope approved | | | +| external review completed | | | +| critical/high findings fixed | | | +| accepted risks copied with owner/expiry/mitigation | | | +| production-maturity language reviewed against findings | | | + +Broad production-maturity language is blocked unless external review findings +are fixed or explicitly accepted with owner, expiry, mitigation, and release +decision. + ## Smoke Outputs Attach or link sanitized artifacts: @@ -76,6 +140,11 @@ Attach or link sanitized artifacts: - `openapi.yaml` or `openapi.json`, - readiness response, - production doctor response with secrets redacted, +- pilot doctor response with secrets redacted, +- provider conformance output and manifest, +- provider proof output and manifest, +- performance smoke JSON/Markdown output, +- failure drill output, - provider ingest response with raw payload omitted, - outbound delivery attempt metadata with request body omitted unless the evidence package is explicitly body-inclusive, @@ -89,9 +158,10 @@ Attach or link sanitized artifacts: ## Non-Claims -This release evidence does not claim exactly-once delivery, provider-side event -completeness, recovery of every provider-side event, compliance certification, -legal evidentiary certification, external timestamping, managed-service -availability, or live third-party provider acceptance. Acceptance tests must -use local fake providers and receivers unless a separate commercial engagement -explicitly records live-provider scope and risk. +The canonical non-claims are in `docs/security-promise.md`. For this release +evidence: no exactly-once delivery, no provider-side event completeness, +no recovery of every provider-side event, no compliance certification, no legal +evidentiary certification, no external timestamping, no managed-service +availability, and no live third-party provider acceptance. Acceptance tests +must use local fake providers and receivers unless a separate commercial +engagement explicitly records live-provider scope and risk. diff --git a/docs/releases/v0.1.0-rc1.md b/docs/releases/v0.1.0-rc1.md new file mode 100644 index 0000000..4275ad3 --- /dev/null +++ b/docs/releases/v0.1.0-rc1.md @@ -0,0 +1,120 @@ +# Webhookery v0.1.0-rc1 Release Notes + +Status: release candidate for controlled, single-region, self-hosted +evaluation. + +Date: 2026-05-27 + +This release packages Webhookery as self-hosted webhook evidence infrastructure: +durable capture before inbound success, provider-aware verification, signed +delivery, replay, reconciliation evidence, retention, audit-chain verification, +and operator-facing release evidence. + +## Who This Release Is For + +Use this release candidate if you need to evaluate whether Webhookery can help +with webhook evidence, debugging, replay, and self-hosted operational review. +It is most relevant for platform, SRE, security, and integration teams that need +to prove what happened to webhook events. + +Do not treat this release candidate as a managed service, compliance +certification, or provider completeness guarantee. + +## Implemented Core Behavior + +- Durable inbound capture before returning success. +- Raw body and header evidence preservation. +- Provider signature verification and local conformance vectors. +- Tenant-scoped sources, endpoints, routes, subscriptions, events, deliveries, + attempts, replay, DLQ, quarantine, retention, audit, and export APIs. +- Signed outbound delivery with retry, DLQ, replay, and payload evidence. +- Versioned configuration evidence for reproducible route, retry, adapter, + transformation, and replay decisions. +- Audit hash-chain verification and release evidence export foundations. +- Provider reconciliation and gap evidence where provider APIs and credentials + permit it. +- Redacted production doctor, performance smoke, provider conformance checks, + backup/restore scripts, deployment profiles, and observability examples. + +## Release Evidence + +The canonical evidence template is `docs/release-evidence-template.md`. + +For this release candidate, release evidence should include: + +- commit SHA and tag +- `make release-acceptance` output +- `make rc-check` output +- DB-backed `make rc-check` output when `WEBHOOKERY_TEST_DATABASE_URL` is + available +- provider conformance output +- performance smoke output +- Docker image digest +- source and image SBOMs +- Trivy HIGH/CRITICAL image scan result +- branch protection or repository ruleset status +- external review status or accepted-risk record + +The GitHub release workflow generates a release evidence artifact with SBOMs, +image digest, local fake-provider/fake-receiver evidence, and non-claim +language. It does not perform live Stripe, GitHub, Shopify, Slack, AWS, Vault, +or customer receiver calls. + +## Upgrade And Rollback Notes + +- Read `docs/stability.md` before relying on API, CLI, migration, or support + windows. +- Run migrations only against a backed-up database. +- Run the restore drill from `docs/operations.md` before promoting a deployment + that changes persistence or evidence storage behavior. +- Rollback across applied migrations may require restoring from backup; do not + assume automatic down-migration safety for production data. + +## Known Limitations + +- Single-region self-hosted operation is the supported release-candidate + posture. +- Operators own PostgreSQL durability, object storage durability, backups, + network policy, TLS, alert routing, and incident response. +- Provider reconciliation cannot prove provider-side event completeness. +- Local release acceptance uses fake/local providers and receivers only. +- Performance smoke output is a local sizing signal, not an SLA. +- Commercial support and license exceptions require a separate written + agreement. + +## Non-Claims + +Webhookery v0.1.0-rc1 does not claim: + +- exactly-once delivery +- provider-side event completeness +- downstream business success +- compliance certification +- external timestamping +- legal evidence certification +- hosted-service availability +- multi-region active-active operation + +## Commercial Evaluation + +Commercial license exceptions and paid evaluation packages are described in +`COMMERCIAL.md`. The commercial path does not change the technical non-claims +above unless a written agreement explicitly narrows scope for a specific +engagement. + +## Validation Commands + +Run these from a clean checkout: + +```bash +make docs-check +make release-acceptance +make rc-check +make finalize +``` + +For DB-backed release-candidate checks: + +```bash +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +``` diff --git a/docs/releases/v0.2.0-pilot.md b/docs/releases/v0.2.0-pilot.md new file mode 100644 index 0000000..2eacea6 --- /dev/null +++ b/docs/releases/v0.2.0-pilot.md @@ -0,0 +1,104 @@ +# Webhookery v0.2.0 Pilot Readiness Checklist + +Status: pilot-readiness checklist, not a tagged release. + +Date: 2026-06-04 + +This document records the launch checklist for the next pilot-stage package: +Webhookery as a self-hosted webhook evidence gateway for controlled, +single-region evaluation. It does not claim broad production readiness, +provider certification, compliance certification, exactly-once delivery, or +provider-side event completeness. + +## Target Pilot Story + +A payment or integration webhook failed. Webhookery shows what arrived, whether +the provider signature was valid, which route matched, which delivery attempts +happened, why delivery failed, who replayed it, what changed, and which +verifiable evidence remains. + +## Checklist + +| Item | Result | Evidence | +|------|--------|----------| +| README updated | ready | `README.md` centers audit-grade capture, replay, and evidence; links the evaluator path and live-provider proof guides. | +| Evidence demo works | repo-ready; release-run pending | `examples/webhook-evidence-demo/run.sh` and committed sample output exist. Run again before tagging. | +| Incident packet works | repo-ready; release-run pending | Incident APIs, CLI commands, OpenAPI additions, migrations, and demo packet output exist. Run incident regression checks before tagging. | +| Stripe live proof guide exists | ready; external/manual | `docs/live-provider-proof/stripe.md`, `docs/providers/stripe.md`, and the redacted sample exist. Completed live proof remains private. | +| GitHub live proof guide exists | ready; external/manual | `docs/live-provider-proof/github.md`, `docs/providers/github.md`, and the redacted sample exist. Completed live proof remains private. | +| Shopify follow-up proof guide exists | ready; external/manual | `docs/live-provider-proof/shopify.md`, `docs/providers/shopify.md`, and the redacted sample exist. | +| Pilot topology exists | ready | `docs/pilot-topology.md` defines the single-region, PostgreSQL-first pilot posture. | +| Pilot evidence checklist exists | ready | `docs/pilot-evidence-template.md` defines sanitized pilot evidence capture. | +| Commercial evaluation docs updated | ready | `COMMERCIAL.md`, `docs/commercial-evaluation.md`, `docs/production-readiness-review.md`, and `docs/support-packages.md`. | +| Provider conformance current | ready as of current metadata | `docs/provider-conformance.md` was last globally checked on 2026-05-27; proof metadata for Stripe, GitHub, and Shopify was checked on 2026-06-04. | +| Provider proof freshness check exists | ready | `make provider-proof-check` validates `docs/provider-proof-manifest.json` and redacted sample safety. | +| Pilot readiness command exists | ready | `go run ./cmd/whcp doctor pilot --no-network` reports `blocker`, `warning`, and `ok` findings without contacting network services. | +| Security promise reviewed | pending release review | Re-read `docs/security-promise.md` before publishing a v0.2 pilot tag. | +| Comparison docs current | pending external-publish review | Comparison docs carry 2026-05-27 verification dates; recheck if used for public launch copy after 90 days or before external publication. | +| `make docs-check` passes | ready on 2026-06-04 | Run result for this checklist update: passed. | +| `make release-acceptance` passes | ready on 2026-06-04 | Run result for this checklist update: passed. | +| `make rc-check` passes | pending tag run | Required before tagging v0.2. Record output in release evidence. | +| `make finalize` passes | pending tag run | Required before tagging v0.2. Record output in release evidence. | +| Release evidence generated | pending tag run | Use `docs/release-evidence-template.md` and attach sanitized generated artifacts outside source control. | + +## Required Commands Before Tagging + +Run from a clean checkout: + +```bash +make docs-check +make release-acceptance +make rc-check +make finalize +``` + +For DB-backed release-candidate checks: + +```bash +WEBHOOKERY_TEST_DATABASE_URL=postgres://... make rc-check +``` + +For provider proof metadata: + +```bash +make provider-conformance-check +make provider-proof-check +go run ./cmd/whcp doctor pilot --no-network +``` + +Live-provider proof guides are manual and external. Do not run them in CI, do +not commit completed live evidence bundles, and do not include provider +secrets, raw payload bodies, raw signatures, customer data, or production +database URLs in public artifacts. + +## Known Limitations + +- v0.2 pilot readiness remains single-region and self-hosted. +- PostgreSQL remains the evidence authority for accepted events. +- Redis, Kafka, NATS, object storage, and external queues are not evidence + authorities. +- Live-provider proof guides are manual and sanitized; they are not provider + certification. +- Provider reconciliation cannot prove provider-side event completeness. +- Replay is at-least-once and can create duplicate downstream side effects. +- Raw payload access remains elevated and audited. +- Customer-controlled receiver URLs remain hostile input and must pass SSRF + validation at configuration and delivery time. +- Commercial support, license exceptions, and production-readiness reviews + require separate written scope. +- External security review status must be recorded before using broader + production-readiness language. + +## Launch Blockers + +Do not tag or externally announce v0.2 pilot readiness until these are recorded +in release evidence: + +- `make rc-check` output. +- `make finalize` output. +- DB-backed `make rc-check` output when a disposable PostgreSQL URL is + available. +- Current provider conformance and provider proof check output. +- Release evidence bundle or checklist copy. +- Branch protection or repository ruleset status. +- External review status or accepted-risk decision. diff --git a/docs/roadmap-intake-policy.md b/docs/roadmap-intake-policy.md new file mode 100644 index 0000000..1cb8b6f --- /dev/null +++ b/docs/roadmap-intake-policy.md @@ -0,0 +1,102 @@ +# Roadmap Intake Policy + +Webhookery roadmap decisions should come from repeated evidence, product fit, +security risk, and commercial value. Do not add broad platform features simply +because they appear in a single pilot conversation. + +## Intake Categories + +Classify each request as one of: + +- docs gap +- bug +- evaluator friction +- missing provider compatibility +- production hardening +- paid custom integration +- commercial packaging +- general roadmap candidate +- enterprise/future +- out of scope + +## Decision Rules + +### Docs Gap + +Use when behavior exists but the evaluator could not find or trust it. + +Action: update the canonical doc and link secondary docs. + +### Bug + +Use when implemented behavior fails its documented promise. + +Action: reproduce, add a regression test, fix, run `make finalize`, and create +a Conventional Commit. + +### Evaluator Friction + +Use when the product works but setup, quickstart, demo, release evidence, or +commercial path is unclear. + +Action: improve the evaluator path before adding features. + +### Missing Provider Compatibility + +Use when a provider-specific behavior is needed for a real evaluation. + +Action: verify official provider docs, define test vectors, and avoid claiming +generic provider completeness. + +### Paid Custom Integration + +Use when the request is valuable for one customer but not yet general product +scope. + +Action: define a written scope, acceptance criteria, support boundary, and +commercial terms before implementation. + +### General Roadmap Candidate + +Use when the request appears across multiple evaluators or closes a clear +production-respectable core gap. + +Action: create a focused backlog with evidence, non-goals, tests, and release +impact. + +### Enterprise/Future + +Use for broader capabilities such as marketplace plugins, hosted service, +multi-region coordination, SAML, HSM/PKCS#11, vendor-specific notification +apps, or compliance certification. + +Action: keep labeled as future unless repeated paid demand and architecture +evidence justify a separate phase. + +## Required Evidence + +Before promoting an item to general roadmap, capture: + +- affected buyer segment +- repeated user evidence or signed customer scope +- current workaround +- failure or opportunity cost +- security and tenant-isolation impact +- documentation impact +- release evidence impact + +## Non-Negotiable Boundaries + +Roadmap items must not weaken: + +- durable capture before inbound success +- at-least-once delivery language +- tenant isolation +- provider-specific verification +- raw payload permission gates +- SSRF-safe endpoint handling +- audit/replay evidence +- secret and PII redaction +- no exactly-once claim +- no provider-side completeness guarantee +- no compliance certification claim diff --git a/docs/schema-migrations.md b/docs/schema-migrations.md new file mode 100644 index 0000000..703fe72 --- /dev/null +++ b/docs/schema-migrations.md @@ -0,0 +1,99 @@ +# Schema And Migration Operations + +This is the DB reviewer and operator overview for Webhookery PostgreSQL +migrations. The exact schema lives in `migrations/`; this document explains how +to review and operate it. + +## Implemented Migration Runner + +`go run ./cmd/whcp migrate up` uses `internal/adapters/postgres/migrate.go`. +The current runner: + +- reads `*.up.sql` files from the selected migration directory; +- sorts filenames lexically, so numeric prefixes define order; +- applies each file in its own PostgreSQL transaction; +- records the migration filename stem and SHA-256 checksum in + `schema_migrations`; +- skips a migration only when the same version and checksum already exist. + +Do not edit an already-applied migration file. If a migration checksum changes +for a version that may have reached any shared environment, treat it as a +release blocker and add a new forward migration instead. + +The CLI does not implement `migrate down`. The checked-in `.down.sql` files are +review and compatibility artifacts, not a production rollback workflow. + +## Migration Ordering + +Current migration files are ordered from `001_init` through +`029_replay_approval_policies`. Review new files by filename, not commit order. + +| Range | Schema area | +|-------|-------------| +| `001` | Core tenants, sources, endpoints, raw payload metadata, events, receipts, deliveries, replay, DLQ, quarantine, audit events, outbox, and worker leases. | +| `002`-`003` | Endpoint secrets, subscriptions, routes, schemas, and replay scope. | +| `004`-`005` | Users, memberships, API keys, idempotency, config versions, dedupe records, and replay items. | +| `006` | Retention, evidence exports, and raw payload storage lifecycle metadata. | +| `007`-`010` | Reproducible route/subscription/retry configuration, secret versions, delivery payload hashes, and replay payload hashes. | +| `011`-`012` | Provider reconciliation evidence and audit-chain heads, entries, and anchors. | +| `013`-`017` | Retry jitter evidence, legal hold, replay approval, endpoint mTLS, and generic JWT adapter metadata. | +| `018`-`021` | Metrics rollups, alerts, notification delivery, and SIEM delivery evidence. | +| `022` | Enterprise identity, SCIM, role bindings, access policies, and authz decision logs. | +| `023`-`025` | Adapter registry governance, producer trust, producer access tokens, and producer mTLS identities. | +| `026`-`029` | Incidents, replay reason codes, replay approval expiry, and replay approval policies. | + +For every new migration, document whether it changes evidence capture, +authorization, raw payload retention, replay, exports, or outbound delivery +behavior in the release evidence. + +## Evidence-Authority Tables + +PostgreSQL is the metadata and audit authority even when raw bodies are stored +in S3-compatible object storage. These table groups are evidence-critical: + +| Group | Tables | +|-------|--------| +| Durable capture | `events`, `raw_payloads`, `provider_receipts`, `dedupe_records`, `outbox` | +| Delivery and replay | `deliveries`, `delivery_attempts`, `delivery_payloads`, `replay_jobs`, `replay_items`, `replay_receipts`, `dead_letter_entries`, `quarantine_entries` | +| Audit and export | `audit_events`, `audit_chain_heads`, `audit_chain_entries`, `audit_chain_anchors`, `evidence_exports`, `evidence_export_items` | +| Reproducible configuration | `sources`, `endpoints`, `endpoint_secrets`, `source_secret_versions`, `subscriptions`, `subscription_versions`, `routes`, `route_versions`, `retry_policies`, `config_versions`, `provider_adapters`, `adapter_versions`, `transformations`, `transformation_versions` | +| Provider reconciliation | `provider_connections`, `reconciliation_jobs`, `provider_api_evidence`, `reconciliation_items` | +| Authorization and identity | `tenants`, `users`, `memberships`, `api_keys`, `identity_providers`, `external_identities`, `auth_sessions`, `scim_tokens`, `scim_users`, `scim_groups`, `role_bindings`, `access_policy_rules`, `authz_decision_logs` | +| Operations signals | `retention_policies`, `retention_runs`, `retention_run_items`, `metrics_rollups`, `alert_rules`, `alert_firings`, `notification_channels`, `notification_deliveries`, `notification_delivery_attempts`, `siem_sinks`, `siem_deliveries`, `siem_delivery_attempts` | +| Producer trust | `producer_clients`, `producer_client_secrets`, `producer_access_tokens`, `producer_mtls_identities` | + +Treat destructive changes to these groups as data-safety changes. They need a +backup/restore drill, release evidence, and explicit compatibility notes. + +## Restore And Rollback Stance + +Rollback is restore-first. Do not assume an image rollback reverses schema +changes or preserves compatibility with newer evidence rows. + +Before applying migrations to important data: + +1. Back up PostgreSQL with `scripts/backup_postgres.sh`. +2. Back up S3-compatible raw body storage separately when + `WEBHOOKERY_RAW_STORAGE_MODE=s3`. +3. Restore into a disposable database with `scripts/restore_postgres.sh`. +4. Run `go run ./cmd/whcp migrate up` against the restored database. +5. Verify `/readyz`, event timelines, audit-chain verification, evidence export + verification, storage status, and queue status. + +When a migration fails, preserve the failed database state for analysis. Do not +retry by editing the already-applied migration. Add a new forward migration or +restore from a verified backup into a controlled target. + +## Compatibility Review Checklist + +Before merging a schema change, answer: + +- Does the migration preserve tenant predicates for every scoped resource? +- Does it alter raw payload, delivery payload, audit, export, retention, or + replay evidence? +- Does it require API, worker, scheduler, or migration Job rollout ordering? +- Does it require object-storage backup or restore coordination? +- Does it add nullable columns, defaults, or backfills that can run on existing + rows without blocking live traffic? +- Does release evidence record the migration checksum summary and restore drill + result? diff --git a/docs/security-promise.md b/docs/security-promise.md new file mode 100644 index 0000000..e8df3df --- /dev/null +++ b/docs/security-promise.md @@ -0,0 +1,58 @@ +# Security Promise And Non-Claims + +This is the canonical Webhookery security-promise and non-claims reference. +Other docs should link here instead of repeating full caveat lists. + +## Promise + +Webhookery is self-hosted webhook evidence and delivery infrastructure. Its +trust promise is deliberately narrow: + +- Do not return inbound success before durable capture according to the + configured storage mode. +- Preserve raw request evidence needed for provider verification. +- Make loss boundaries, duplicates, retries, replay, retention, and audit + evidence visible. +- Keep recovery and replay linked to original evidence without mutating + original history. +- Treat customer-controlled outbound URLs as hostile input. +- Keep secrets, raw payload bodies, bearer/session tokens, provider + credentials, private keys, and unnecessary PII out of logs, metrics, errors, + UI responses, docs, release evidence, and support artifacts. + +Inbound success means durable capture and verification metadata were recorded. +It does not mean downstream business processing succeeded. + +## Non-Claims + +Webhookery makes: + +- no exactly-once delivery claim +- no provider-side event completeness guarantee +- no recovery guarantee for every provider-side event +- no multi-region active-active coordination claim +- no external timestamping claim +- no FIPS/NIST/CMVP certification claim +- no compliance certification claim +- no legal evidentiary certification claim +- no managed-service availability claim +- no live third-party provider acceptance claim for local release gates +- no claim that Redis, NATS, Kafka, or object storage is the authority for + accepted event evidence + +Release evidence, support, commercial agreements, trademarks, and governance +docs may narrow or clarify scope for a specific engagement, but they must not +silently broaden these claims. + +## Documentation Rule + +When adding docs, examples, release evidence, or support text: + +- Link to this document for the full non-claim list. +- Use placeholders for secrets and credentials. +- Do not include raw signatures, raw payload bodies, customer data, private + keys, real database URLs, provider credentials, bearer tokens, or session + tokens. +- Verify provider-specific statements against official upstream docs before + changing provider semantics. Use `docs/documentation-maintenance.md` for the + freshness record. diff --git a/docs/security-review-package.md b/docs/security-review-package.md index 6401e0f..8a00a28 100644 --- a/docs/security-review-package.md +++ b/docs/security-review-package.md @@ -18,6 +18,12 @@ not a substitute for source inspection. - Local runtime: `docker-compose.yml`. - Deployment image: `Dockerfile`. - Operations and recovery runbook: `docs/operations.md`. +- Day-2 operations guide: `docs/day-2-operations.md`. +- Provider conformance matrix: `docs/provider-conformance.md`. +- Performance envelope: `docs/performance-envelope.md`. +- External review scope template: `docs/external-review-scope.md`. +- External review findings template: `docs/external-review-findings-template.md`. +- Accepted risk registry: `docs/external-review-accepted-risks.md`. - Release-candidate harness: `scripts/rc_acceptance.sh`. ## Threat Model Focus @@ -59,7 +65,13 @@ Include these artifacts in the review package: - `openapi.yaml` - `sdk/openapi.yaml` - `docs/operations.md` +- `docs/day-2-operations.md` +- `docs/provider-conformance.md` +- `docs/performance-envelope.md` - `docs/release-evidence-template.md` +- `docs/external-review-scope.md` +- `docs/external-review-findings-template.md` +- `docs/external-review-accepted-risks.md` - latest exact-tag release evidence file - `migrations/` - provider verification test vectors, if present @@ -68,7 +80,10 @@ Include these artifacts in the review package: - SBOMs, Trivy output, `govulncheck`, and `gosec` outputs from the release workflow - sanitized `make rc-check` output +- sanitized `make perf-smoke` output +- sanitized `make provider-conformance-check` output - sanitized backup/restore drill output +- branch protection status or accepted-risk record ## Review Exit Criteria @@ -76,7 +91,5 @@ Broad production-readiness language should wait until review findings are fixed or recorded as accepted risks with owner, severity, expiry, and mitigation in the release evidence. -This repository still makes no exactly-once delivery claim, no provider-side -event completeness guarantee, no compliance certification claim, no legal -evidentiary certification claim, no external timestamping claim, and no -managed-service availability claim. +Review exit criteria use `docs/security-promise.md` as the canonical +security-promise and non-claims reference. diff --git a/docs/stability.md b/docs/stability.md new file mode 100644 index 0000000..20c2718 --- /dev/null +++ b/docs/stability.md @@ -0,0 +1,113 @@ +# Stability And Compatibility Policy + +This policy defines the compatibility promise for self-hosted Webhookery +releases. It is intentionally conservative until the project has broader +deployment history, performance evidence, and external review evidence. + +## Release Stage + +Current public positioning is release-candidate/early-GA for controlled +single-region self-hosted deployments. + +Do not describe a release as broadly production mature unless the release +evidence package includes passing RC gates, DB-backed checks, restore drill +evidence, provider conformance evidence, performance smoke output, and current +security-review status. + +## Versioning + +Webhookery uses semantic version tags for release artifacts: + +| Version | Compatibility expectation | +|---------|---------------------------| +| `0.x` | Public contract is useful but may change with clear release notes and migration guidance. Breaking changes must be called out before promotion. | +| `1.x` | Stable REST, CLI, migration, and deployment behavior for documented production-core workflows. Breaking changes require deprecation or a major version. | +| Patch | Bug, security, documentation, and compatibility fixes only. | +| Minor | Backward-compatible features, additive API fields, additive tables/columns, new checks, and new docs. | +| Major | Intentional breaking changes to APIs, CLI behavior, persistence compatibility, or deployment contract. | + +`openapi.yaml` is the canonical REST contract. `sdk/openapi.yaml` must remain +an exact copy for SDK consumers. + +## API Compatibility + +Backward-compatible REST changes include: + +- adding optional request fields; +- adding response fields that clients can ignore; +- adding enum values only when existing clients can safely treat them as + unknown; +- adding endpoints under the existing versioned path; +- clarifying problem-details messages without changing machine-readable codes. + +Breaking REST changes include: + +- removing or renaming endpoints, fields, headers, scopes, or problem codes; +- changing required request fields or response types; +- narrowing authorization in a way that breaks documented core workflows + without migration guidance; +- changing default retention, replay, delivery, or capture semantics. + +High-risk API behavior must keep examples in `openapi.yaml` and must pass +`make openapi-check` and `make sdk-check`. + +## CLI Compatibility + +The `whcp` CLI is operator-facing. Keep command names, required flags, exit +codes, and output safety stable for documented workflows. New JSON fields are +allowed. Removing commands or changing destructive-action guards is breaking. + +CLI output must not print secrets, database passwords, bearer/session tokens, +webhook secrets, private keys, raw signatures, raw payload bodies, or customer +data unless the command is explicitly an elevated body export and the operator +selected an output file. + +## Persistence And Migrations + +PostgreSQL is the evidence and metadata authority. Migration compatibility is +restore-first: + +- never edit a migration that may have reached a shared environment; +- add forward migrations for schema changes; +- record migration checksum summaries in release evidence; +- run restore drills for changes that affect evidence, retention, export, + audit chain, replay, delivery, secret custody, or authorization data. + +Rollback is not only image rollback. If the database has advanced, use the +restore workflow in `docs/schema-migrations.md` and `docs/operations.md`. + +## Support Windows + +Until a `1.0` release policy replaces this section: + +- the current release tag receives security and critical data-safety fixes; +- the immediately previous minor release may receive fixes when migration risk + is lower than upgrade risk; +- unsupported versions should be upgraded before production promotion or + external security review. + +Commercial support windows may be longer by written agreement. Public docs must +not imply an SLA unless it is contracted. + +## Deprecation Rules + +Deprecations must identify: + +- the affected API, CLI command, config variable, migration behavior, or + deployment profile; +- the replacement path; +- the first release where warnings appear; +- the earliest release where removal can happen; +- migration, restore, or compatibility risks. + +Security fixes can remove unsafe behavior faster, but the release evidence must +record the reason and operator impact. + +## Non-Claims + +This policy does not claim exactly-once delivery, provider-side event +completeness, managed-service availability, multi-region active-active +operation, compliance certification, legal evidentiary certification, external +timestamping, or recovery of every provider-side event. + +Use `docs/security-promise.md` as the canonical non-claims reference. diff --git a/docs/support-packages.md b/docs/support-packages.md new file mode 100644 index 0000000..afedc07 --- /dev/null +++ b/docs/support-packages.md @@ -0,0 +1,57 @@ +# Support Packages + +Webhookery community support and commercial support have different boundaries. + +Community support happens through public repository channels and has no SLA. +Commercial support requires a written agreement. + +## Support Options + +| Option | Starting range | Boundary | +| --- | ---: | --- | +| Community support | No charge | Public, best effort, no SLA, no private data. | +| Commercial Evaluation | EUR 490-1,000 | Fit review and next-step recommendation. | +| Release Evidence Package | EUR 2,500-5,000 | Evidence artifact review and accepted-risk summary. | +| Production Readiness Review | EUR 7,500-12,500 | Deployment and operations readiness review. | +| Commercial License + Support | EUR 9,900-24,900 per year | Written license exception and contracted support terms. | +| Custom Integration / Provider Adapter | Fixed scope or EUR 150-250/hour | Implementation work with agreed acceptance criteria. | + +## What Support Can Cover + +- deployment review +- release evidence interpretation +- upgrade planning +- backup and restore drills +- provider adapter review +- incident triage guidance +- observability and alert review +- security review package preparation + +## What Support Does Not Imply + +Support does not imply: + +- exactly-once delivery +- provider-side event completeness +- downstream business-processing success +- compliance certification +- legal evidence certification +- hosted-service availability +- production SLA without a written SLA + +## Safe Support Requests + +Good support requests include: + +- Webhookery version or commit +- deployment mode +- sanitized configuration summary +- exact command run +- expected outcome +- actual outcome +- relevant redacted logs +- whether the issue blocks capture, delivery, replay, retention, or audit + +Do not include API keys, bearer tokens, webhook secrets, raw signatures, private +keys, provider credentials, customer payloads, PII, database URLs with +passwords, or exploit payloads. diff --git a/docs/use-cases/github-automation-webhooks.md b/docs/use-cases/github-automation-webhooks.md new file mode 100644 index 0000000..73c37ff --- /dev/null +++ b/docs/use-cases/github-automation-webhooks.md @@ -0,0 +1,45 @@ +# GitHub Automation Webhooks + +Audience: platform teams and maintainers investigating repository automation +that did not run after a GitHub webhook delivery. + +## Problem + +A repository event was expected to trigger automation, but the downstream +receiver did not complete the work. The operator needs to find the delivery, +verify signature evidence, inspect dedupe and delivery attempts, replay when +safe, and retain a report for maintainers. + +## Workflow + +Use a test repository for live proof, or the local evidence demo when no live +provider access is available. + +```bash +whcp events search --provider github --delivery-id gh_del_... --api-key "$WEBHOOKERY_API_KEY" +whcp events timeline --event-id evt_... --format table --api-key "$WEBHOOKERY_API_KEY" +whcp replay-jobs create --event-id evt_... --config-mode original --reason-code operator_requested --reason "rerun automation after receiver fix" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents create --title "GitHub automation webhook investigation" --reason "automation did not run" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents add-event --incident-id inc_... --event-id evt_... --reason "automation receiver failure" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents export --incident-id inc_... --reason "maintainer evidence handoff" --output github-automation-evidence.tar.gz --api-key "$WEBHOOKERY_API_KEY" +``` + +## Evidence Output + +Expected evidence includes: + +- provider and delivery identity metadata; +- signature verification result; +- dedupe visibility for repeated deliveries; +- delivery attempt and replay timeline entries; +- replay reason code and operator reason; and +- incident evidence bundle verification output. + +Use `docs/live-provider-proof/github.md` for a sanitized live test-repository +proof path and `docs/providers/github.md` for setup details. + +## Non-Claims + +This workflow does not certify GitHub delivery behavior, does not prove +provider-side completeness, and does not guarantee that downstream automation +is idempotent. diff --git a/docs/use-cases/internal-integration-replay.md b/docs/use-cases/internal-integration-replay.md new file mode 100644 index 0000000..a3d494f --- /dev/null +++ b/docs/use-cases/internal-integration-replay.md @@ -0,0 +1,52 @@ +# Internal Integration Replay + +Audience: platform teams using Webhookery for controlled internal producers +and receivers where replay must be governed and auditable. + +## Problem + +An internal producer sent an event to Webhookery, but a downstream receiver +failed. The operator needs to prove durable capture, inspect delivery +attempts, preview or run replay with a reason, and preserve evidence for an +incident review. + +## Workflow + +Find affected events first: + +```bash +whcp events search --status dlq --since 24h --api-key "$WEBHOOKERY_API_KEY" +whcp events timeline --event-id evt_... --format json --api-key "$WEBHOOKERY_API_KEY" +``` + +Run replay only after confirming receiver readiness and idempotency: + +```bash +whcp replay-jobs create --event-id evt_... --config-mode original --rate-limit-per-minute 30 --reason-code receiver_fixed --reason "receiver restored after outage" --api-key "$WEBHOOKERY_API_KEY" +``` + +Create the incident packet: + +```bash +whcp incidents create --title "Internal integration replay" --reason "receiver outage investigation" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents add-event --incident-id inc_... --event-id evt_... --reason "DLQ replay candidate" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents export --incident-id inc_... --reason "internal incident review" --output internal-replay-evidence.tar.gz --api-key "$WEBHOOKERY_API_KEY" +go run ./cmd/whcp audit verify-bundle --file internal-replay-evidence.tar.gz +``` + +## Evidence Output + +Expected evidence includes: + +- durable capture metadata and hashes; +- delivery attempts and failure classes; +- DLQ state; +- replay reason code, free-text reason, and mode; +- audit-chain references; and +- evidence bundle verification result. + +## Non-Claims + +Replay is at-least-once work and can duplicate side effects. Webhookery records +the reason and evidence trail; receiver idempotency remains the operator's +responsibility. diff --git a/docs/use-cases/shopify-order-webhooks.md b/docs/use-cases/shopify-order-webhooks.md new file mode 100644 index 0000000..4c11f85 --- /dev/null +++ b/docs/use-cases/shopify-order-webhooks.md @@ -0,0 +1,43 @@ +# Shopify Order Webhooks + +Audience: ecommerce platform teams investigating order-related webhooks in a +development store or controlled pilot. + +## Problem + +An order-related webhook was expected to update internal systems, but the +receiver failed or the result is unclear. The operator needs topic metadata, +verification evidence, delivery history, replay governance, and a sanitized +evidence packet. + +## Workflow + +For a local walkthrough, run the evidence demo. For development-store proof, +follow `docs/live-provider-proof/shopify.md`. + +```bash +whcp events search --provider shopify --route-id rte_... --since 24h --api-key "$WEBHOOKERY_API_KEY" +whcp events timeline --event-id evt_... --format markdown --api-key "$WEBHOOKERY_API_KEY" +whcp replay-jobs create --event-id evt_... --config-mode original --reason-code support_investigation --reason "review order webhook replay" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents create --title "Shopify order webhook investigation" --reason "support investigation" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents add-event --incident-id inc_... --event-id evt_... --reason "order receiver failure" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents generate-report --incident-id inc_... --reason "support handoff" --api-key "$WEBHOOKERY_API_KEY" +``` + +## Evidence Output + +Expected evidence includes: + +- provider metadata and event type or topic when captured; +- HMAC verification result; +- route and delivery attempt evidence; +- replay reason and mode; +- retention/raw-payload access state; and +- incident report references and non-claims. + +Use `docs/providers/shopify.md` for setup and operator context. + +## Non-Claims + +This workflow does not claim universal topic-specific recovery, provider-side +completeness, exactly-once delivery, or compliance certification. diff --git a/docs/use-cases/stripe-payment-investigation.md b/docs/use-cases/stripe-payment-investigation.md new file mode 100644 index 0000000..d3e9bb7 --- /dev/null +++ b/docs/use-cases/stripe-payment-investigation.md @@ -0,0 +1,53 @@ +# Stripe Payment Investigation + +Audience: support engineers, SREs, and platform teams investigating a payment +webhook that arrived but did not produce the expected downstream business +result. + +## Problem + +A customer or internal team says payment-related state is wrong. The operator +needs to answer whether Webhookery received the event, verified it, stored +evidence, attempted downstream delivery, moved it to DLQ, replayed it, and +generated a support-safe report. + +## Workflow + +Start with the local evidence demo: + +```bash +docker compose up -d postgres +export WEBHOOKERY_TEST_DATABASE_URL='postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable' +examples/webhook-evidence-demo/run.sh +``` + +For an already-running environment, use the investigation surfaces: + +```bash +whcp events search --provider stripe --external-id evt_... --api-key "$WEBHOOKERY_API_KEY" +whcp events timeline --event-id evt_... --format markdown --api-key "$WEBHOOKERY_API_KEY" +whcp incidents create --title "Stripe payment webhook failed" --reason "support investigation" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents add-event --incident-id inc_... --event-id evt_... --reason "failed downstream delivery" --api-key "$WEBHOOKERY_API_KEY" +whcp incidents generate-report --incident-id inc_... --reason "support handoff" --api-key "$WEBHOOKERY_API_KEY" +``` + +## Evidence Output + +Expected evidence includes: + +- event identity and provider metadata; +- verification result and raw payload hash; +- delivery attempt timeline; +- DLQ or retry state when present; +- replay reason and replay result when replay is used; +- incident report snapshot; and +- evidence bundle manifest and verification command. + +For live test-mode proof, use `docs/live-provider-proof/stripe.md`. For setup +and operator context, use `docs/providers/stripe.md`. + +## Non-Claims + +This workflow does not prove downstream business processing succeeded, does +not prove provider-side event completeness, and does not claim exactly-once +delivery. diff --git a/docs/why-webhookery.md b/docs/why-webhookery.md new file mode 100644 index 0000000..4402d09 --- /dev/null +++ b/docs/why-webhookery.md @@ -0,0 +1,100 @@ +# Why Webhookery + +Webhookery is for teams that need evidence about incoming webhooks, not another +generic webhook sender. + +The core question is: + +> When an integration fails, can you prove what arrived, what verified, what +> was stored, what delivery attempted, what was replayed, and what evidence +> remains? + +## Webhooks Fail In Boring Ways + +Most webhook incidents are not novel architecture problems. They are ordinary +failure boundaries that become expensive because evidence is scattered: + +- the provider sent an event but the receiver was down; +- the receiver returned success before completing business work; +- a duplicate was processed twice or hidden by a dedupe shortcut; +- a retry happened hours later and changed state again; +- an operator replayed an event without a durable reason trail; or +- raw logs expired before the support or security review started. + +Webhookery keeps those boundaries explicit. Inbound success means durable +capture, not downstream business success. + +## Provider Retry Is Not Your Processing Guarantee + +Provider retry behavior helps, but it is not a substitute for your own +evidence model. Webhookery stores local capture, verification, dedupe, routing, +delivery, retry, DLQ, replay, retention, and audit evidence so an operator can +investigate from local records. + +Provider-specific behavior belongs in the provider guides: + +- `docs/providers/stripe.md` +- `docs/providers/github.md` +- `docs/providers/shopify.md` +- `docs/provider-conformance.md` + +Those docs are conformance and operator evidence, not provider certification. + +## Logs Are Not Evidence + +Logs are useful for operations, but they are not enough for audit-grade webhook +debugging. Webhookery records: + +- raw payload references and hashes; +- exact provider verification result; +- dedupe result; +- route, subscription, retry, and transformation version references; +- delivery attempts and failure classes; +- replay reason codes and free-text reasons; +- raw-payload access audit events; and +- hash-chain verification evidence. + +Normal event APIs and reports return references and hashes rather than raw +payload bodies by default. + +## Replay Without History Is Dangerous + +Replay can repair an incident, but it can also create duplicate side effects. +Webhookery requires replay reason evidence, records the replay mode, links new +work back to the original event or delivery, and preserves the original +history. Replay is at-least-once work, not exactly-once delivery. + +Use these surfaces during investigation: + +```bash +whcp events search --status dlq --since 24h --api-key "$WEBHOOKERY_API_KEY" +whcp events timeline --event-id evt_... --format markdown --api-key "$WEBHOOKERY_API_KEY" +whcp incidents generate-report --incident-id inc_... --reason "support handoff" --api-key "$WEBHOOKERY_API_KEY" +whcp audit verify-bundle --file evidence.tar.gz +``` + +## What Webhookery Is Good For + +Webhookery is a fit when you need: + +- self-hosted inbound provider webhook capture; +- durable evidence before inbound success; +- provider-aware verification and dedupe evidence; +- delivery attempts, retry, DLQ, and replay history; +- auditable raw-payload access; +- evidence bundles and incident reports; and +- a PostgreSQL-first control plane for controlled self-hosted pilots. + +## When Not To Use Webhookery + +Do not use Webhookery when you primarily need: + +- a hosted webhook sender; +- a generic event bus or workflow engine; +- a marketplace of integrations; +- multi-region active-active guarantees; +- provider certification or provider-side completeness guarantees; +- exactly-once delivery; or +- legal/compliance evidentiary certification. + +For the full product promise and non-claims, use `docs/security-promise.md`. diff --git a/examples/webhook-evidence-demo/README.md b/examples/webhook-evidence-demo/README.md new file mode 100644 index 0000000..9e4d773 --- /dev/null +++ b/examples/webhook-evidence-demo/README.md @@ -0,0 +1,92 @@ +# Webhook Evidence Demo + +This example is a deterministic local evidence demo for evaluators. It uses the +same fake-provider and fake-receiver paths as the release-candidate E2E tests. +It does not call Stripe, GitHub, Shopify, Slack, AWS, Vault, or customer +receivers. + +The demo proves the core Webhookery story: + +1. A Stripe-style event is accepted only after durable capture. +2. The exact raw bytes are verified and preserved. +3. A route creates signed outbound delivery work. +4. A downstream receiver failure records delivery-attempt and DLQ evidence. +5. Replay creates new work after receiver recovery without mutating the + original event. +6. A first-class incident links the failed event to a human-readable report. +7. The demo exports and verifies an incident evidence bundle locally. +8. Invalid signatures are quarantined and never routed. +9. Retention, audit export, and audit-chain verification preserve evidence. + +## Prerequisites + +- Go matching `go.mod` +- PostgreSQL reachable through `WEBHOOKERY_TEST_DATABASE_URL` +- A disposable database; the tests create and clean their own records but should + not be run against production data + +Example local database: + +```bash +docker compose up -d postgres +export WEBHOOKERY_TEST_DATABASE_URL='postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable' +``` + +## Run + +From the repository root: + +```bash +examples/webhook-evidence-demo/run.sh +``` + +Expected result: + +```text +demo: running local webhook evidence demo +demo: failed payment webhook incident packet +demo: provider ingest to signed delivery +demo: invalid signature quarantine +demo: retry, DLQ release, and replay modes +demo: retention, export, and audit-chain permission gates +demo: scenario result: downstream failure recorded before replay +demo: scenario result: replay delivery succeeded after receiver recovery +demo: output: .../examples/webhook-evidence-demo/output +demo: completed +``` + +If `WEBHOOKERY_TEST_DATABASE_URL` is not set, the script exits with setup +instructions instead of silently skipping the evidence path. + +The script writes a sanitized local packet to +`examples/webhook-evidence-demo/output/` by default: + +```text +incident-report.md +incident-report.json +evidence-manifest.json +verify-output.json +README.md +evidence.tar.gz +``` + +`verify-output.json` contains the local bundle verification result. A successful +demo has `result.valid: true`. To choose a different output directory, set +`WEBHOOKERY_DEMO_OUTPUT_DIR`; the path must stay inside the repository so the +script cannot overwrite arbitrary operator files. + +## Fixtures + +- `fixtures/stripe-invoice-paid.json` is the synthetic provider payload used in + examples and screenshots. +- `fixtures/invalid-stripe-signature-notes.md` explains the invalid-signature + scenario without storing real provider secrets. + +## Safety + +Do not replace the fixture data with real customer events. Demo output is +generated with synthetic IDs and redaction checks for the human-readable files, +but screenshots, videos, release evidence, support requests, and issues must +still be reviewed before sharing. They must not include API keys, bearer tokens, +webhook secrets, raw provider signatures, private keys, database URLs with +passwords, raw customer payloads, or customer PII. diff --git a/examples/webhook-evidence-demo/fixtures/invalid-stripe-signature-notes.md b/examples/webhook-evidence-demo/fixtures/invalid-stripe-signature-notes.md new file mode 100644 index 0000000..0360eeb --- /dev/null +++ b/examples/webhook-evidence-demo/fixtures/invalid-stripe-signature-notes.md @@ -0,0 +1,15 @@ +# Invalid Signature Scenario + +The release-candidate E2E path sends a Stripe-style payload with a signature +computed from the wrong synthetic secret. + +Expected behavior: + +- Webhookery stores rejection/quarantine evidence where feasible. +- The event is not accepted as trusted. +- No route creates side-effecting delivery work. +- The failure reason is visible without exposing the signing secret or raw + signature value. + +This fixture intentionally does not contain a real Stripe signing secret or a +real signature header. diff --git a/examples/webhook-evidence-demo/fixtures/stripe-invoice-paid.json b/examples/webhook-evidence-demo/fixtures/stripe-invoice-paid.json new file mode 100644 index 0000000..1d2852a --- /dev/null +++ b/examples/webhook-evidence-demo/fixtures/stripe-invoice-paid.json @@ -0,0 +1,13 @@ +{ + "id": "evt_demo_invoice_paid", + "type": "invoice.paid", + "account": "acct_demo", + "data": { + "object": { + "id": "in_demo", + "customer": "cus_demo", + "amount_paid": 4200, + "currency": "eur" + } + } +} diff --git a/examples/webhook-evidence-demo/run.sh b/examples/webhook-evidence-demo/run.sh new file mode 100755 index 0000000..4c43d75 --- /dev/null +++ b/examples/webhook-evidence-demo/run.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/../.." && pwd -P)" +cd "$repo_root" + +say() { + printf '%s\n' "demo: $*" +} + +if [ -z "${WEBHOOKERY_TEST_DATABASE_URL:-}" ]; then + printf '%s\n' "demo: WEBHOOKERY_TEST_DATABASE_URL is required" >&2 + printf '%s\n' "demo: start local postgres with: docker compose up -d postgres" >&2 + printf '%s\n' "demo: then export WEBHOOKERY_TEST_DATABASE_URL='postgres://webhookery:change-me@localhost:5432/webhookery?sslmode=disable'" >&2 + exit 2 +fi + +say "running local webhook evidence demo" +output_dir="${WEBHOOKERY_DEMO_OUTPUT_DIR:-examples/webhook-evidence-demo/output}" +case "$output_dir" in + /*) ;; + *) output_dir="$repo_root/$output_dir" ;; +esac +if [ "$output_dir" = "$repo_root" ]; then + printf '%s\n' "demo: WEBHOOKERY_DEMO_OUTPUT_DIR must not be the repository root" >&2 + exit 2 +fi +case "$output_dir/" in + "$repo_root"/*) ;; + *) + printf '%s\n' "demo: WEBHOOKERY_DEMO_OUTPUT_DIR must be inside the repository" >&2 + exit 2 + ;; +esac +mkdir -p "$output_dir" +output_dir="$(CDPATH= cd -- "$output_dir" && pwd -P)" +if [ "$output_dir" = "$repo_root" ]; then + printf '%s\n' "demo: WEBHOOKERY_DEMO_OUTPUT_DIR must not resolve to the repository root" >&2 + exit 2 +fi +case "$output_dir/" in + "$repo_root"/*) ;; + *) + printf '%s\n' "demo: WEBHOOKERY_DEMO_OUTPUT_DIR must resolve inside the repository" >&2 + exit 2 + ;; +esac +for file in incident-report.md incident-report.json evidence-manifest.json verify-output.json README.md evidence.tar.gz; do + rm -f "$output_dir/$file" +done + +say "failed payment webhook incident packet" +WEBHOOKERY_DEMO_OUTPUT_DIR="$output_dir" go test ./internal/e2e -run '^TestRCE2EFailedPaymentWebhookIncidentPacketDemo$' -count=1 + +say "provider ingest to signed delivery" +go test ./internal/e2e -run '^TestRCE2EProviderIngestToSignedDelivery$' -count=1 + +say "invalid signature quarantine" +go test ./internal/e2e -run '^TestRCE2EInvalidProviderSignatureQuarantinesWithoutRouting$' -count=1 + +say "retry, DLQ release, and replay modes" +go test ./internal/e2e -run '^TestRCE2ERetryExhaustionDLQReleaseAndReplayModes$' -count=1 + +say "retention, export, and audit-chain permission gates" +go test ./internal/e2e -run '^TestRCE2EEvidenceLifecycleRetentionExportAndPermissionGates$' -count=1 + +for file in incident-report.md incident-report.json evidence-manifest.json verify-output.json README.md evidence.tar.gz; do + if [ ! -s "$output_dir/$file" ]; then + printf '%s\n' "demo: expected output file was not generated: $output_dir/$file" >&2 + exit 1 + fi +done + +say "scenario result: downstream failure recorded before replay" +say "scenario result: replay delivery succeeded after receiver recovery" +say "output: $output_dir" +say "completed" diff --git a/go.mod b/go.mod index b98b3bd..3ed1831 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module webhookery -go 1.25.0 +go 1.25.11 require ( github.com/aatuh/api-toolkit/v2 v2.1.0 diff --git a/internal/adapters/deliveryhttp/client.go b/internal/adapters/deliveryhttp/client.go index a4e25e9..60e043f 100644 --- a/internal/adapters/deliveryhttp/client.go +++ b/internal/adapters/deliveryhttp/client.go @@ -32,9 +32,16 @@ type Result struct { FailureClass string } -func HTTPClient(timeout time.Duration) *http.Client { +var errUnsafeCustomTransport = errors.New("custom HTTP transport cannot enforce pinned egress") + +func HTTPClient(timeout time.Duration, resolvers ...ssrf.Resolver) *http.Client { + var resolver ssrf.Resolver + if len(resolvers) > 0 { + resolver = resolvers[0] + } return &http.Client{ - Timeout: timeout, + Timeout: timeout, + Transport: ssrf.NewPinnedTransport(nil, resolver, ssrf.DefaultPolicy()), CheckRedirect: func(req *http.Request, via []*http.Request) error { return http.ErrUseLastResponse }, @@ -77,11 +84,16 @@ func (c Client) Deliver(ctx context.Context, rawURL string, body []byte) (Result } httpClient, err := c.httpClient() if err != nil { - return Result{FailureClass: "client_certificate_error"}, err + failureClass := "client_certificate_error" + if errors.Is(err, errUnsafeCustomTransport) { + failureClass = "client_configuration_error" + } + return Result{FailureClass: failureClass}, err } resp, err := httpClient.Do(req) if err != nil { - return Result{FailureClass: "network_error"}, err + failureClass, safeErr := safeDoError(err) + return Result{FailureClass: failureClass}, safeErr } defer func() { _ = resp.Body.Close() }() bodyBytes, err := readTruncated(resp.Body, 16<<10) @@ -97,11 +109,12 @@ func (c Client) Deliver(ctx context.Context, rawURL string, body []byte) (Result } func (c Client) httpClient() (*http.Client, error) { + base, err := safeHTTPClient(c.HTTP, 10*time.Second, c.SSRF.Resolver) + if err != nil { + return nil, err + } if len(c.MTLSClientCertPEM) == 0 && len(c.MTLSClientKeyPEM) == 0 { - if c.HTTP != nil { - return c.HTTP, nil - } - return HTTPClient(10 * time.Second), nil + return base, nil } if len(c.MTLSClientCertPEM) == 0 || len(c.MTLSClientKeyPEM) == 0 { return nil, errors.New("mTLS client certificate and key are required together") @@ -110,22 +123,11 @@ func (c Client) httpClient() (*http.Client, error) { if err != nil { return nil, err } - base := HTTPClient(10 * time.Second) - if c.HTTP != nil { - copy := *c.HTTP - base = © - if base.CheckRedirect == nil { - base.CheckRedirect = func(req *http.Request, via []*http.Request) error { - return http.ErrUseLastResponse - } - } - } - transport := http.DefaultTransport.(*http.Transport).Clone() - if base.Transport != nil { - if typed, ok := base.Transport.(*http.Transport); ok { - transport = typed.Clone() - } + transport, ok := base.Transport.(*http.Transport) + if !ok { + return nil, errUnsafeCustomTransport } + transport = ssrf.NewPinnedTransport(transport, c.SSRF.Resolver, ssrf.DefaultPolicy()) tlsConfig := &tls.Config{MinVersion: tls.VersionTLS12} if transport.TLSClientConfig != nil { tlsConfig = transport.TLSClientConfig.Clone() @@ -139,6 +141,33 @@ func (c Client) httpClient() (*http.Client, error) { return base, nil } +func safeHTTPClient(base *http.Client, timeout time.Duration, resolver ssrf.Resolver) (*http.Client, error) { + if base == nil { + return HTTPClient(timeout, resolver), nil + } + copy := *base + copy.CheckRedirect = func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + } + switch transport := copy.Transport.(type) { + case nil: + copy.Transport = ssrf.NewPinnedTransport(nil, resolver, ssrf.DefaultPolicy()) + case *http.Transport: + copy.Transport = ssrf.NewPinnedTransport(transport, resolver, ssrf.DefaultPolicy()) + default: + return nil, errUnsafeCustomTransport + } + return ©, nil +} + +func safeDoError(err error) (string, error) { + var policyErr ssrf.PolicyError + if errors.As(err, &policyErr) { + return "policy_blocked", policyErr + } + return "network_error", errors.New("delivery network error") +} + func readTruncated(body io.Reader, max int64) ([]byte, error) { return io.ReadAll(io.LimitReader(body, max)) } diff --git a/internal/adapters/deliveryhttp/client_test.go b/internal/adapters/deliveryhttp/client_test.go index de7f2fe..27ea600 100644 --- a/internal/adapters/deliveryhttp/client_test.go +++ b/internal/adapters/deliveryhttp/client_test.go @@ -2,10 +2,12 @@ package deliveryhttp import ( "context" + "errors" "io" "net/http" "net/http/httptest" "net/netip" + "strings" "testing" "time" @@ -50,6 +52,80 @@ func TestClientDoesNotFollowRedirects(t *testing.T) { } } +func TestSafeHTTPClientOverridesPermissiveRedirectPolicy(t *testing.T) { + base := &http.Client{ + CheckRedirect: func(*http.Request, []*http.Request) error { + return nil + }, + } + client, err := safeHTTPClient(base, 2*time.Second, ssrf.StaticResolver{}) + if err != nil { + t.Fatal(err) + } + err = client.CheckRedirect(&http.Request{}, []*http.Request{{}}) + if err == nil { + t.Fatal("copied HTTP clients must not preserve permissive redirect policies") + } +} + +func TestHTTPClientUsesPinnedEgressTransport(t *testing.T) { + client := HTTPClient(2*time.Second, ssrf.StaticResolver{ + "customer.example.com": {netip.MustParseAddr("10.0.0.10")}, + }) + transport, ok := client.Transport.(*http.Transport) + if !ok { + t.Fatalf("expected pinned HTTP transport, got %T", client.Transport) + } + _, err := transport.DialContext(context.Background(), "tcp", "customer.example.com:443") + var policyErr ssrf.PolicyError + if !errors.As(err, &policyErr) { + t.Fatalf("expected dial-time SSRF policy error, got %v", err) + } +} + +func TestSafeDoErrorDoesNotLeakCustomerURLTokens(t *testing.T) { + failureClass, err := safeDoError(errors.New(`Post "https://customer.example/hook?token=secret-token": dial tcp 203.0.113.10:443: connect: refused`)) + if failureClass != "network_error" { + t.Fatalf("expected network_error, got %q", failureClass) + } + if err == nil || strings.Contains(err.Error(), "secret-token") || strings.Contains(err.Error(), "customer.example") { + t.Fatalf("network error leaked customer URL detail: %v", err) + } +} + +func TestBuildRequestPolicyBlockDoesNotLeakURLToken(t *testing.T) { + client := Client{ + Secret: []byte("secret"), + SSRF: ssrf.Validator{Resolver: ssrf.StaticResolver{ + "internal.example.com": {netip.MustParseAddr("10.0.0.10")}, + }}, + } + _, err := client.BuildRequest(context.Background(), "https://internal.example.com/hook?token=secret-token", []byte("{}")) + if err == nil { + t.Fatal("expected blocked endpoint URL") + } + if strings.Contains(err.Error(), "secret-token") || strings.Contains(err.Error(), "internal.example.com") { + t.Fatalf("blocked endpoint error leaked URL detail: %v", err) + } +} + +func TestSafeDoErrorHandlesTimeoutStormWithoutLeakingReceiver(t *testing.T) { + timeoutErrors := []error{ + context.DeadlineExceeded, + errors.New(`Post "https://receiver.example/hook?token=secret-token": context deadline exceeded`), + errors.New(`Post "https://receiver.example/hook": net/http: request canceled while waiting for connection`), + } + for _, timeoutErr := range timeoutErrors { + failureClass, err := safeDoError(timeoutErr) + if failureClass != "network_error" { + t.Fatalf("expected network_error, got %q", failureClass) + } + if err == nil || strings.Contains(err.Error(), "secret-token") || strings.Contains(err.Error(), "receiver.example") { + t.Fatalf("timeout storm error leaked receiver detail: %v", err) + } + } +} + func TestClientRejectsInvalidMTLSCertificatePair(t *testing.T) { client := Client{ Secret: []byte("secret"), @@ -77,3 +153,52 @@ func TestTruncateResponseBody(t *testing.T) { t.Fatalf("unexpected truncated body: %q", body) } } + +func TestClassifyDeliveryHTTPStatuses(t *testing.T) { + tests := []struct { + status int + want string + }{ + {status: http.StatusOK, want: "success"}, + {status: http.StatusAccepted, want: "success"}, + {status: http.StatusFound, want: "redirect_blocked"}, + {status: http.StatusRequestTimeout, want: "temporary_http"}, + {status: http.StatusTooManyRequests, want: "temporary_http"}, + {status: http.StatusInternalServerError, want: "temporary_http"}, + {status: http.StatusBadRequest, want: "permanent_http"}, + {status: http.StatusUnauthorized, want: "permanent_http"}, + } + for _, tt := range tests { + t.Run(http.StatusText(tt.status), func(t *testing.T) { + if got := classify(tt.status); got != tt.want { + t.Fatalf("classify(%d)=%q want %q", tt.status, got, tt.want) + } + }) + } +} + +func TestClientRejectsUnsafeCustomHTTPTransport(t *testing.T) { + client := Client{ + HTTP: &http.Client{Transport: roundTripFunc(func(*http.Request) (*http.Response, error) { + t.Fatal("unsafe custom transport must not be used") + return nil, nil + })}, + Secret: []byte("secret"), + SSRF: ssrf.Validator{Resolver: ssrf.StaticResolver{ + "example.com": {netip.MustParseAddr("93.184.216.34")}, + }}, + } + result, err := client.Deliver(context.Background(), "https://example.com/webhook", []byte("{}")) + if err == nil { + t.Fatal("expected unsafe custom transport rejection") + } + if result.FailureClass != "client_configuration_error" { + t.Fatalf("expected client_configuration_error, got %+v", result) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req) +} diff --git a/internal/adapters/httpapi/openapi_contract_test.go b/internal/adapters/httpapi/openapi_contract_test.go new file mode 100644 index 0000000..041191c --- /dev/null +++ b/internal/adapters/httpapi/openapi_contract_test.go @@ -0,0 +1,224 @@ +package httpapi + +import ( + "net/http" + "os" + "sort" + "strings" + "testing" + + "github.com/go-chi/chi/v5" + "go.yaml.in/yaml/v3" +) + +var openAPIMethods = map[string]struct{}{ + "get": {}, + "post": {}, + "put": {}, + "patch": {}, + "delete": {}, +} + +type openAPIContract struct { + Paths map[string]map[string]openAPIOperation `yaml:"paths"` +} + +type openAPIOperation struct { + RequestBody *openAPIRequestBody `yaml:"requestBody"` + Responses map[string]openAPIResponse `yaml:"responses"` +} + +type openAPIRequestBody struct { + Content map[string]openAPIMediaType `yaml:"content"` +} + +type openAPIResponse struct { + Content map[string]openAPIMediaType `yaml:"content"` +} + +type openAPIMediaType struct { + Example any `yaml:"example"` + Examples map[string]any `yaml:"examples"` +} + +func TestRouteTableMatchesOpenAPI(t *testing.T) { + doc := loadOpenAPIContract(t) + routerRoutes := registeredRouteMethods(t) + openAPIRoutes := openAPIRouteMethods(doc) + + var missing []string + for route, methods := range routerRoutes { + for method := range methods { + if !openAPIRoutes[route][method] { + missing = append(missing, strings.ToUpper(method)+" "+route) + } + } + } + if len(missing) > 0 { + sort.Strings(missing) + t.Fatalf("registered routes missing from openapi.yaml:\n%s", strings.Join(missing, "\n")) + } + + var undocumented []string + for route, methods := range openAPIRoutes { + for method := range methods { + if routerRoutes[route][method] { + continue + } + if documentedProviderAliasIsCovered(route, method, routerRoutes) { + continue + } + undocumented = append(undocumented, strings.ToUpper(method)+" "+route) + } + } + if len(undocumented) > 0 { + sort.Strings(undocumented) + t.Fatalf("openapi.yaml paths not registered by router:\n%s", strings.Join(undocumented, "\n")) + } +} + +func TestHighRiskOpenAPIOperationsHaveExamples(t *testing.T) { + doc := loadOpenAPIContract(t) + checks := []struct { + name string + path string + method string + request bool + response bool + }{ + {name: "ingest", path: "/v1/ingest/{tenant_id}/{source_id}", method: "post", request: true, response: true}, + {name: "raw read", path: "/v1/events/{event_id}/raw", method: "get", response: true}, + {name: "replay", path: "/v1/replay-jobs", method: "post", request: true, response: true}, + {name: "export", path: "/v1/audit-events:export", method: "post", request: true, response: true}, + {name: "auth", path: "/v1/auth/session", method: "get", response: true}, + {name: "alert", path: "/v1/alerts", method: "post", request: true, response: true}, + {name: "notification", path: "/v1/notification-channels", method: "post", request: true, response: true}, + {name: "siem", path: "/v1/siem-sinks", method: "post", request: true, response: true}, + {name: "producer token", path: "/v1/oauth/token", method: "post", request: true, response: true}, + } + + for _, check := range checks { + t.Run(check.name, func(t *testing.T) { + op := openAPIOperationFor(t, doc, check.path, check.method) + if check.request && !requestBodyHasExample(op) { + t.Fatalf("%s %s must include a request example", strings.ToUpper(check.method), check.path) + } + if check.response && !successResponseHasExample(op) { + t.Fatalf("%s %s must include a success response example", strings.ToUpper(check.method), check.path) + } + }) + } +} + +func loadOpenAPIContract(t *testing.T) openAPIContract { + t.Helper() + body, err := os.ReadFile("../../../openapi.yaml") + if err != nil { + t.Fatal(err) + } + var doc openAPIContract + if err := yaml.Unmarshal(body, &doc); err != nil { + t.Fatal(err) + } + return doc +} + +func registeredRouteMethods(t *testing.T) map[string]map[string]bool { + t.Helper() + routes, ok := NewServer(ServerConfig{}).Routes().(chi.Routes) + if !ok { + t.Fatal("server routes do not expose chi route metadata") + } + out := map[string]map[string]bool{} + if err := chi.Walk(routes, func(method, route string, _ http.Handler, _ ...func(http.Handler) http.Handler) error { + method = strings.ToLower(method) + if _, ok := openAPIMethods[method]; !ok { + return nil + } + addRouteMethod(out, route, method) + return nil + }); err != nil { + t.Fatal(err) + } + return out +} + +func openAPIRouteMethods(doc openAPIContract) map[string]map[string]bool { + out := map[string]map[string]bool{} + for path, pathItem := range doc.Paths { + for method := range pathItem { + method = strings.ToLower(method) + if _, ok := openAPIMethods[method]; ok { + addRouteMethod(out, path, method) + } + } + } + return out +} + +func addRouteMethod(routes map[string]map[string]bool, path, method string) { + if routes[path] == nil { + routes[path] = map[string]bool{} + } + routes[path][method] = true +} + +func documentedProviderAliasIsCovered(route, method string, routerRoutes map[string]map[string]bool) bool { + if !routerRoutes["/v1/ingest/{tenant_id}/{source_id}"][method] { + return false + } + switch route { + case "/v1/ingest/stripe/{source_id}", + "/v1/ingest/github/{source_id}", + "/v1/ingest/shopify/{source_id}", + "/v1/ingest/slack/{source_id}", + "/v1/ingest/cloudevents/{source_id}", + "/v1/ingest/generic-jwt/{source_id}": + return true + default: + return false + } +} + +func openAPIOperationFor(t *testing.T, doc openAPIContract, path, method string) openAPIOperation { + t.Helper() + pathItem, ok := doc.Paths[path] + if !ok { + t.Fatalf("openapi path missing: %s", path) + } + op, ok := pathItem[strings.ToLower(method)] + if !ok { + t.Fatalf("openapi operation missing: %s %s", strings.ToUpper(method), path) + } + return op +} + +func requestBodyHasExample(op openAPIOperation) bool { + if op.RequestBody == nil { + return false + } + for _, media := range op.RequestBody.Content { + if mediaHasExample(media) { + return true + } + } + return false +} + +func successResponseHasExample(op openAPIOperation) bool { + for status, response := range op.Responses { + if !strings.HasPrefix(status, "2") { + continue + } + for _, media := range response.Content { + if mediaHasExample(media) { + return true + } + } + } + return false +} + +func mediaHasExample(media openAPIMediaType) bool { + return media.Example != nil || len(media.Examples) > 0 +} diff --git a/internal/adapters/httpapi/server.go b/internal/adapters/httpapi/server.go index 24db459..b39e2f8 100644 --- a/internal/adapters/httpapi/server.go +++ b/internal/adapters/httpapi/server.go @@ -2,25 +2,13 @@ package httpapi import ( "context" - "crypto/subtle" - "encoding/base64" - "encoding/json" - "errors" - "fmt" - "io" "net/http" - "net/url" - "strconv" - "strings" + "net/netip" - "github.com/aatuh/api-toolkit/v2/httpx/identity" "github.com/go-chi/chi/v5" "webhookery/internal/adapters/httpui" "webhookery/internal/app" - "webhookery/internal/authz" - "webhookery/internal/domain" - "webhookery/internal/problem" ) const ( @@ -41,6 +29,7 @@ type ServerConfig struct { OpenAPI []byte EnableUI bool SessionCookieSecure bool + TrustedProxyCIDRs []netip.Prefix Health func(context.Context) error } @@ -61,8 +50,7 @@ func (s *Server) Routes() http.Handler { r.Get("/openapi.yaml", s.openapi) r.Route("/v1", func(r chi.Router) { - r.Post("/ingest/{tenant_id}/{source_id}", s.ingestGeneric) - r.Post("/ingest/{provider}/{source_id}", s.ingestProvider) + r.Post("/ingest/{tenant_id}/{source_id}", s.ingestGenericOrProvider) r.Post("/oauth/token", s.issueOAuthToken) r.With(s.requireProducerAuth).Post("/events", s.ingestProductEvent) r.Get("/auth/oidc/login", s.oidcLogin) @@ -183,6 +171,14 @@ func (s *Server) Routes() http.Handler { r.Get("/events/{event_id}/raw", s.getRawPayload) r.Get("/events/{event_id}/normalized", s.getNormalizedEvent) r.Get("/events/{event_id}/timeline", s.getEventTimeline) + r.Get("/incidents", s.listIncidents) + r.Post("/incidents", s.createIncident) + r.Get("/incidents/{incident_id}", s.getIncident) + r.Post("/incidents/{incident_id}/events", s.addIncidentEvent) + r.Delete("/incidents/{incident_id}/events/{event_id}", s.removeIncidentEvent) + r.Post("/incidents/{incident_id}/generate-report", s.generateIncidentReport) + r.Get("/incidents/{incident_id}/report", s.getIncidentReport) + r.Post("/incidents/{incident_id}/evidence-export", s.createIncidentEvidenceExport) r.Get("/transformations", s.listTransformations) r.Post("/transformations", s.createTransformation) r.Get("/transformations/{transformation_id}", s.getTransformation) @@ -195,12 +191,16 @@ func (s *Server) Routes() http.Handler { r.Post("/deliveries/{delivery_id}:cancel", s.cancelDelivery) r.Get("/delivery-attempts/{attempt_id}", s.getDeliveryAttempt) r.Post("/replay-jobs:dry-run", s.dryRunReplay) + r.Post("/replay-jobs/preview", s.dryRunReplay) r.Get("/replay-jobs", s.listReplayJobs) r.Post("/replay-jobs", s.createReplay) r.Post("/replay-jobs/{replay_job_id}:approve", s.approveReplayJob) r.Post("/replay-jobs/{replay_job_id}:pause", s.pauseReplayJob) r.Post("/replay-jobs/{replay_job_id}:resume", s.resumeReplayJob) r.Post("/replay-jobs/{replay_job_id}:cancel", s.cancelReplayJob) + r.Get("/replay-approval-policies", s.listReplayApprovalPolicies) + r.Post("/replay-approval-policies", s.createReplayApprovalPolicy) + r.Delete("/replay-approval-policies/{policy_id}", s.disableReplayApprovalPolicy) r.Post("/reconciliation-jobs:dry-run", s.dryRunReconciliation) r.Get("/reconciliation-jobs", s.listReconciliationJobs) r.Post("/reconciliation-jobs", s.createReconciliationJob) @@ -269,2633 +269,4 @@ func (s *Server) Routes() http.Handler { return r } -func (s *Server) health(w http.ResponseWriter, r *http.Request) { - writeJSON(w, http.StatusOK, map[string]any{"ok": true}) -} - -func (s *Server) ready(w http.ResponseWriter, r *http.Request) { - if s.cfg.Health != nil { - if err := s.cfg.Health(r.Context()); err != nil { - writeProblem(w, problem.New(http.StatusServiceUnavailable, "not_ready", "Not ready", "A required dependency is unavailable.", requestID(r), true)) - return - } - } - writeJSON(w, http.StatusOK, map[string]any{"ok": true}) -} - -func (s *Server) prometheusMetrics(w http.ResponseWriter, r *http.Request) { - metrics, err := s.cfg.Control.PublicOpsMetrics(r.Context()) - if err != nil { - writeProblem(w, problem.Internal(requestID(r))) - return - } - w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") - _, _ = w.Write([]byte(formatPrometheus(metrics))) -} - -func (s *Server) openapi(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/yaml") - _, _ = w.Write(s.cfg.OpenAPI) -} - -func (s *Server) requireAuth(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - requestID := requestID(r) - token := app.BearerToken(r.Header.Get("Authorization")) - authenticator := s.cfg.Auth - if token == "" && s.cfg.SessionAuth != nil { - if cookie, err := r.Cookie(sessionCookieName); err == nil { - token = cookie.Value - authenticator = s.cfg.SessionAuth - } - } - if authenticator == nil { - writeProblem(w, problem.Unauthorized(requestID)) - return - } - actor, err := authenticator.Authenticate(r.Context(), token) - if err != nil { - writeProblem(w, problem.Unauthorized(requestID)) - return - } - next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) - }) -} - -func (s *Server) requireProducerAuth(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - requestID := requestID(r) - if r.TLS != nil && len(r.TLS.PeerCertificates) > 0 && s.cfg.ProducerMTLSAuth.Lookup != nil { - if len(r.TLS.VerifiedChains) == 0 { - writeProblem(w, problem.Unauthorized(requestID)) - return - } - actor, err := s.cfg.ProducerMTLSAuth.AuthenticateCertificate(r.Context(), r.TLS.PeerCertificates[0]) - if err != nil { - writeProblem(w, problem.Unauthorized(requestID)) - return - } - if !authz.Can(actor, "events:write", actor.TenantID) { - writeProblem(w, problem.Forbidden(requestID)) - return - } - next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) - return - } - token := app.BearerToken(r.Header.Get("Authorization")) - if s.cfg.ProducerAuth != nil { - actor, err := s.cfg.ProducerAuth.Authenticate(r.Context(), token) - if err == nil { - if !authz.Can(actor, "events:write", actor.TenantID) { - writeProblem(w, problem.Forbidden(requestID)) - return - } - next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) - return - } - if !errors.Is(err, app.ErrUnauthorized) { - writeProblem(w, problem.Internal(requestID)) - return - } - } - if s.cfg.Auth == nil { - writeProblem(w, problem.Unauthorized(requestID)) - return - } - actor, err := s.cfg.Auth.Authenticate(r.Context(), token) - if err != nil { - writeProblem(w, problem.Unauthorized(requestID)) - return - } - if !authz.Can(actor, "events:write", actor.TenantID) { - writeProblem(w, problem.Forbidden(requestID)) - return - } - next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) - }) -} - -func (s *Server) requireSCIMAuth(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if s.cfg.Control == nil { - writeProblem(w, problem.Unauthorized(requestID(r))) - return - } - actor, err := s.cfg.Control.AuthenticateSCIMToken(r.Context(), app.BearerToken(r.Header.Get("Authorization"))) - if err != nil { - writeProblem(w, problem.Unauthorized(requestID(r))) - return - } - next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) - }) -} - -func (s *Server) createAPIKey(w http.ResponseWriter, r *http.Request) { - var req app.CreateAPIKeyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAPIKey(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listAPIKeys(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAPIKeys(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) revokeAPIKey(w http.ResponseWriter, r *http.Request) { - var req app.RevokeAPIKeyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RevokeAPIKey(r.Context(), actorFrom(r), chi.URLParam(r, "api_key_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) issueOAuthToken(w http.ResponseWriter, r *http.Request) { - body, ok := readLimitedBody(w, r, 64<<10) - if !ok { - return - } - form, err := url.ParseQuery(string(body)) - if err != nil { - writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Invalid form body.")) - return - } - if form.Get("grant_type") != "client_credentials" { - writeProblem(w, problem.BadRequest(requestID(r), "unsupported_grant_type", "Only client_credentials grant is supported.")) - return - } - if form.Get("client_secret") != "" { - writeProblem(w, problem.BadRequest(requestID(r), "invalid_request", "Client credentials must use HTTP Basic authentication.")) - return - } - clientID, clientSecret, basicOK := r.BasicAuth() - if !basicOK || strings.TrimSpace(clientID) == "" || clientSecret == "" { - writeProblem(w, problem.Unauthorized(requestID(r))) - return - } - result, err := s.cfg.Control.IssueProducerToken(r.Context(), clientID, clientSecret) - if err != nil { - s.writeError(w, r, err) - return - } - w.Header().Set("Cache-Control", "no-store") - w.Header().Set("Pragma", "no-cache") - writeJSON(w, http.StatusOK, result) -} - -func (s *Server) createProducerClient(w http.ResponseWriter, r *http.Request) { - var req app.CreateProducerClientRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateProducerClient(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listProducerClients(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListProducerClients(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getProducerClient(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetProducerClient(r.Context(), actorFrom(r), chi.URLParam(r, "client_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateProducerClient(w http.ResponseWriter, r *http.Request) { - var req app.UpdateProducerClientRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateProducerClient(r.Context(), actorFrom(r), chi.URLParam(r, "client_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteProducerClient(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteProducerClient(r.Context(), actorFrom(r), chi.URLParam(r, "client_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) rotateProducerClientSecret(w http.ResponseWriter, r *http.Request) { - var req app.RotateProducerClientSecretRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RotateProducerClientSecret(r.Context(), actorFrom(r), chi.URLParam(r, "client_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { - var req app.CreateProducerMTLSIdentityRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateProducerMTLSIdentity(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listProducerMTLSIdentities(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListProducerMTLSIdentities(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { - var req app.UpdateProducerMTLSIdentityRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) verifyProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { - var req app.VerifyProducerMTLSIdentityRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.VerifyProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createSource(w http.ResponseWriter, r *http.Request) { - var req app.CreateSourceRequest - if !decodeJSON(w, r, &req) { - return - } - source, err := s.cfg.Control.CreateSource(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, publicSource(source)) -} - -func (s *Server) listSources(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListSources(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - out := make([]map[string]any, 0, len(items)) - for _, item := range items { - out = append(out, publicSource(item)) - } - writeJSON(w, http.StatusOK, page(out)) -} - -func (s *Server) getSource(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetSource(r.Context(), actorFrom(r), chi.URLParam(r, "source_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, publicSource(item)) -} - -func (s *Server) updateSource(w http.ResponseWriter, r *http.Request) { - var req app.UpdateSourceRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateSource(r.Context(), actorFrom(r), chi.URLParam(r, "source_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, publicSource(item)) -} - -func (s *Server) deleteSource(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteSource(r.Context(), actorFrom(r), chi.URLParam(r, "source_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, publicSource(item)) -} - -func (s *Server) createProviderConnection(w http.ResponseWriter, r *http.Request) { - var req app.CreateProviderConnectionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateProviderConnection(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listProviderConnections(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListProviderConnections(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getProviderConnection(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetProviderConnection(r.Context(), actorFrom(r), chi.URLParam(r, "connection_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) verifyProviderConnection(w http.ResponseWriter, r *http.Request) { - var req app.ProviderConnectionStateRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.VerifyProviderConnection(r.Context(), actorFrom(r), chi.URLParam(r, "connection_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) revokeProviderConnection(w http.ResponseWriter, r *http.Request) { - var req app.ProviderConnectionStateRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RevokeProviderConnection(r.Context(), actorFrom(r), chi.URLParam(r, "connection_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) rotateSourceSecret(w http.ResponseWriter, r *http.Request) { - var req app.RotateSourceSecretRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RotateSourceSecret(r.Context(), actorFrom(r), chi.URLParam(r, "source_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createEndpoint(w http.ResponseWriter, r *http.Request) { - var req app.CreateEndpointRequest - if !decodeJSON(w, r, &req) { - return - } - endpoint, validation, err := s.cfg.Control.CreateEndpoint(r.Context(), actorFrom(r), req) - if err != nil { - if len(validation.BlockedReasons) > 0 { - writeJSON(w, http.StatusUnprocessableEntity, validation) - return - } - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, map[string]any{"endpoint": endpoint, "ssrf": validation}) -} - -func (s *Server) listEndpoints(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListEndpoints(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getEndpoint(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateEndpoint(w http.ResponseWriter, r *http.Request) { - var req app.UpdateEndpointRequest - if !decodeJSON(w, r, &req) { - return - } - item, validation, err := s.cfg.Control.UpdateEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) - if err != nil { - if len(validation.BlockedReasons) > 0 { - writeJSON(w, http.StatusUnprocessableEntity, validation) - return - } - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteEndpoint(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) testEndpoint(w http.ResponseWriter, r *http.Request) { - var req app.TestEndpointRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.TestEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, item) -} - -func (s *Server) validateEndpointURL(w http.ResponseWriter, r *http.Request) { - var req struct { - URL string `json:"url"` - } - if !decodeJSON(w, r, &req) { - return - } - writeJSON(w, http.StatusOK, s.cfg.Control.ValidateEndpointURL(r.Context(), req.URL)) -} - -func (s *Server) rotateEndpointSecret(w http.ResponseWriter, r *http.Request) { - var req app.RotateEndpointSecretRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RotateEndpointSecret(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createSubscription(w http.ResponseWriter, r *http.Request) { - var req app.CreateSubscriptionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateSubscription(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listSubscriptions(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListSubscriptions(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getSubscription(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetSubscription(r.Context(), actorFrom(r), chi.URLParam(r, "subscription_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateSubscription(w http.ResponseWriter, r *http.Request) { - var req app.UpdateSubscriptionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateSubscription(r.Context(), actorFrom(r), chi.URLParam(r, "subscription_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteSubscription(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteSubscription(r.Context(), actorFrom(r), chi.URLParam(r, "subscription_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createRetryPolicy(w http.ResponseWriter, r *http.Request) { - var req app.CreateRetryPolicyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateRetryPolicy(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listRetryPolicies(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListRetryPolicies(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getRetryPolicy(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetRetryPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "retry_policy_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateRetryPolicy(w http.ResponseWriter, r *http.Request) { - var req app.UpdateRetryPolicyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateRetryPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "retry_policy_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteRetryPolicy(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteRetryPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "retry_policy_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createRoute(w http.ResponseWriter, r *http.Request) { - var req app.CreateRouteRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateRoute(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listRoutes(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListRoutes(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getRoute(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateRoute(w http.ResponseWriter, r *http.Request) { - var req app.UpdateRouteRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteRoute(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listRouteVersions(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListRouteVersions(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) activateRoute(w http.ResponseWriter, r *http.Request) { - var req app.ActivateRouteRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.ActivateRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req.Reason) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) dryRunRoute(w http.ResponseWriter, r *http.Request) { - var req struct { - EventID string `json:"event_id"` - } - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DryRunRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req.EventID) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createEventType(w http.ResponseWriter, r *http.Request) { - var req app.CreateEventTypeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateEventType(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listEventTypes(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListEventTypes(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getEventType(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetEventType(r.Context(), actorFrom(r), chi.URLParam(r, "event_type")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateEventType(w http.ResponseWriter, r *http.Request) { - var req app.UpdateEventTypeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateEventType(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteEventType(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteEventType(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createEventSchema(w http.ResponseWriter, r *http.Request) { - var req app.CreateEventSchemaRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listEventSchemas(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListEventSchemas(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getEventSchema(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateEventSchema(w http.ResponseWriter, r *http.Request) { - var req app.UpdateEventSchemaRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteEventSchema(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) validateEventSchema(w http.ResponseWriter, r *http.Request) { - var req app.ValidateSchemaRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.ValidateEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) checkEventSchemaCompatibility(w http.ResponseWriter, r *http.Request) { - var req app.CheckSchemaCompatibilityRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CheckEventSchemaCompatibility(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listEvents(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListEvents(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getEvent(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetEvent(r.Context(), actorFrom(r), chi.URLParam(r, "event_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) getRawPayload(w http.ResponseWriter, r *http.Request) { - raw, err := s.cfg.Control.GetRawPayload(r.Context(), actorFrom(r), chi.URLParam(r, "event_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, map[string]any{ - "event_id": raw.EventID, - "raw_payload_hash": raw.SHA256, - "content_type": raw.ContentType, - "size_bytes": raw.SizeBytes, - "storage_backend": raw.StorageBackend, - "storage_status": raw.StorageStatus, - "body_base64": base64.StdEncoding.EncodeToString(raw.Body), - }) -} - -func (s *Server) getNormalizedEvent(w http.ResponseWriter, r *http.Request) { - includeData := strings.EqualFold(r.URL.Query().Get("include_data"), "true") - item, err := s.cfg.Control.GetNormalizedEvent(r.Context(), actorFrom(r), chi.URLParam(r, "event_id"), includeData) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) getEventTimeline(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListEventTimeline(r.Context(), actorFrom(r), chi.URLParam(r, "event_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) createTransformation(w http.ResponseWriter, r *http.Request) { - var req app.CreateTransformationRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateTransformation(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listTransformations(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListTransformations(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getTransformation(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetTransformation(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createTransformationVersion(w http.ResponseWriter, r *http.Request) { - var req app.CreateTransformationVersionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateTransformationVersion(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listTransformationVersions(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListTransformationVersions(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) activateTransformationVersion(w http.ResponseWriter, r *http.Request) { - var req app.ActivateTransformationVersionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.ActivateTransformationVersion(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id"), chi.URLParam(r, "version_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) ingestProductEvent(w http.ResponseWriter, r *http.Request) { - body, ok := readLimitedBody(w, r, maxIngressBodyBytes) - if !ok { - return - } - sourceID := productSourceID(body) - if sourceID == "" { - writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Product event body must include source_id.")) - return - } - actor := actorFrom(r) - if actor.SourceID != "" && actor.SourceID != sourceID { - writeProblem(w, problem.Forbidden(requestID(r))) - return - } - result, err := s.cfg.Ingest.Ingest(r.Context(), app.IngestRequest{ - TenantID: actor.TenantID, - SourceID: sourceID, - Provider: "internal", - RawBody: body, - Headers: headers(r.Header), - ContentType: r.Header.Get("Content-Type"), - RemoteIP: r.RemoteAddr, - }) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, result) -} - -func productSourceID(raw []byte) string { - var req struct { - SourceID string `json:"source_id"` - } - if err := json.Unmarshal(raw, &req); err != nil { - return "" - } - return strings.TrimSpace(req.SourceID) -} - -func (s *Server) ingestGeneric(w http.ResponseWriter, r *http.Request) { - body, ok := readLimitedBody(w, r, maxIngressBodyBytes) - if !ok { - return - } - result, err := s.cfg.Ingest.Ingest(r.Context(), app.IngestRequest{ - TenantID: chi.URLParam(r, "tenant_id"), - SourceID: chi.URLParam(r, "source_id"), - Provider: "generic-hmac", - RawBody: body, - Headers: headers(r.Header), - ContentType: r.Header.Get("Content-Type"), - RemoteIP: r.RemoteAddr, - }) - s.writeIngestResult(w, r, result, err) -} - -func (s *Server) ingestProvider(w http.ResponseWriter, r *http.Request) { - body, ok := readLimitedBody(w, r, maxIngressBodyBytes) - if !ok { - return - } - providerName := chi.URLParam(r, "provider") - result, err := s.cfg.Ingest.IngestProviderPath(r.Context(), providerName, chi.URLParam(r, "source_id"), app.IngestRequest{ - Provider: providerName, - RawBody: body, - Headers: headers(r.Header), - ContentType: r.Header.Get("Content-Type"), - RemoteIP: r.RemoteAddr, - }) - if err == nil && result.Accepted && strings.EqualFold(providerName, "slack") { - if challenge := slackChallenge(body); challenge != "" { - writeJSON(w, http.StatusOK, map[string]string{"challenge": challenge}) - return - } - } - s.writeIngestResult(w, r, result, err) -} - -func slackChallenge(raw []byte) string { - var payload struct { - Type string `json:"type"` - Challenge string `json:"challenge"` - } - if err := json.Unmarshal(raw, &payload); err != nil { - return "" - } - if payload.Type != "url_verification" { - return "" - } - return strings.TrimSpace(payload.Challenge) -} - -func (s *Server) writeIngestResult(w http.ResponseWriter, r *http.Request, result app.IngestResult, err error) { - if err != nil { - s.writeError(w, r, err) - return - } - if !result.Accepted { - writeProblem(w, problem.New(http.StatusUnauthorized, "invalid_signature", "Invalid webhook signature", "Webhook evidence was captured, but the signature did not verify.", requestID(r), false)) - return - } - writeJSON(w, http.StatusOK, map[string]any{"received": true, "event_id": result.EventID, "duplicate": result.DedupeStatus != domain.DedupeUnique}) -} - -func (s *Server) listDeliveries(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListDeliveries(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) listDeliveryAttempts(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListDeliveryAttempts(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getDeliveryAttempt(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetDeliveryAttempt(r.Context(), actorFrom(r), chi.URLParam(r, "attempt_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) retryDelivery(w http.ResponseWriter, r *http.Request) { - var req struct { - Reason string `json:"reason"` - } - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RetryDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req.Reason) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, item) -} - -func (s *Server) cancelDelivery(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CancelDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) dryRunReplay(w http.ResponseWriter, r *http.Request) { - var req app.ReplayRequest - if !decodeJSON(w, r, &req) { - return - } - res, err := s.cfg.Control.DryRunReplay(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, res) -} - -func (s *Server) createReplay(w http.ResponseWriter, r *http.Request) { - var req app.ReplayRequest - if !decodeJSON(w, r, &req) { - return - } - res, err := s.cfg.Control.CreateReplay(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, res) -} - -func (s *Server) listReplayJobs(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListReplayJobs(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) approveReplayJob(w http.ResponseWriter, r *http.Request) { - s.changeReplayJobState(w, r, s.cfg.Control.ApproveReplayJob) -} - -func (s *Server) pauseReplayJob(w http.ResponseWriter, r *http.Request) { - s.changeReplayJobState(w, r, s.cfg.Control.PauseReplayJob) -} - -func (s *Server) resumeReplayJob(w http.ResponseWriter, r *http.Request) { - s.changeReplayJobState(w, r, s.cfg.Control.ResumeReplayJob) -} - -func (s *Server) cancelReplayJob(w http.ResponseWriter, r *http.Request) { - s.changeReplayJobState(w, r, s.cfg.Control.CancelReplayJob) -} - -func (s *Server) changeReplayJobState(w http.ResponseWriter, r *http.Request, fn func(context.Context, authz.Actor, string, app.StateChangeRequest) (app.ReplayJob, error)) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := fn(r.Context(), actorFrom(r), chi.URLParam(r, "replay_job_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) dryRunReconciliation(w http.ResponseWriter, r *http.Request) { - var req app.ReconciliationJobRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DryRunReconciliation(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createReconciliationJob(w http.ResponseWriter, r *http.Request) { - var req app.ReconciliationJobRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateReconciliationJob(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listReconciliationJobs(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListReconciliationJobs(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getReconciliationJob(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetReconciliationJob(r.Context(), actorFrom(r), chi.URLParam(r, "job_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listReconciliationItems(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListReconciliationItems(r.Context(), actorFrom(r), chi.URLParam(r, "job_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) cancelReconciliationJob(w http.ResponseWriter, r *http.Request) { - var req app.ProviderConnectionStateRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CancelReconciliationJob(r.Context(), actorFrom(r), chi.URLParam(r, "job_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listDeadLetter(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListDeadLetter(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) releaseDeadLetter(w http.ResponseWriter, r *http.Request) { - var req app.DeadLetterReleaseRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.ReleaseDeadLetter(r.Context(), actorFrom(r), chi.URLParam(r, "entry_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, item) -} - -func (s *Server) bulkReleaseDeadLetter(w http.ResponseWriter, r *http.Request) { - var req app.DeadLetterBulkReleaseRequest - if !decodeJSON(w, r, &req) { - return - } - items, err := s.cfg.Control.BulkReleaseDeadLetter(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, map[string]any{"data": items}) -} - -func (s *Server) listQuarantine(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListQuarantine(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) approveQuarantine(w http.ResponseWriter, r *http.Request) { - var req app.QuarantineDecisionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.ApproveQuarantine(r.Context(), actorFrom(r), chi.URLParam(r, "entry_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) rejectQuarantine(w http.ResponseWriter, r *http.Request) { - var req app.QuarantineDecisionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RejectQuarantine(r.Context(), actorFrom(r), chi.URLParam(r, "entry_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listAuditEvents(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAuditEvents(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getAuditChainHead(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetAuditChainHead(r.Context(), actorFrom(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) verifyAuditChain(w http.ResponseWriter, r *http.Request) { - var req app.AuditChainVerifyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.VerifyAuditChain(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createAuditChainAnchor(w http.ResponseWriter, r *http.Request) { - var req app.AuditChainAnchorRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAuditChainAnchor(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listAuditChainAnchors(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAuditChainAnchors(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getAuditChainAnchor(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetAuditChainAnchor(r.Context(), actorFrom(r), chi.URLParam(r, "anchor_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createAuditExport(w http.ResponseWriter, r *http.Request) { - var req app.CreateAuditExportRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAuditExport(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, item) -} - -func (s *Server) listAuditExports(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAuditExports(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getAuditExport(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetAuditExport(r.Context(), actorFrom(r), chi.URLParam(r, "export_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) downloadAuditExport(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.DownloadAuditExport(r.Context(), actorFrom(r), chi.URLParam(r, "export_id")) - if err != nil { - s.writeError(w, r, err) - return - } - w.Header().Set("Content-Type", item.ContentType) - w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", item.Filename)) - w.Header().Set("X-Webhookery-Export-SHA256", item.Export.SHA256) - w.WriteHeader(http.StatusOK) - _, _ = w.Write(item.Body) -} - -func (s *Server) listRetentionPolicies(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListRetentionPolicies(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) createRetentionPolicy(w http.ResponseWriter, r *http.Request) { - var req app.CreateRetentionPolicyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateRetentionPolicy(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) updateRetentionPolicy(w http.ResponseWriter, r *http.Request) { - var req app.UpdateRetentionPolicyRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateRetentionPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listEndpointHealth(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListEndpointHealth(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) opsMetrics(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.OpsMetrics(r.Context(), actorFrom(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listMetricRollups(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListMetricRollups(r.Context(), actorFrom(r), r.URL.Query().Get("metric_name"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) listWorkers(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListWorkers(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getWorker(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetWorker(r.Context(), actorFrom(r), chi.URLParam(r, "worker_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listQueues(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListQueues(r.Context(), actorFrom(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) opsStorage(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.OpsStorage(r.Context(), actorFrom(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) opsConfig(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.OpsConfig(r.Context(), actorFrom(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) oidcLogin(w http.ResponseWriter, r *http.Request) { - result, err := s.cfg.Control.BeginOIDCLogin(r.Context(), r.URL.Query().Get("tenant_id"), r.URL.Query().Get("provider_id"), r.URL.Query().Get("redirect_after")) - if err != nil { - s.writeError(w, r, err) - return - } - s.setCookie(w, &http.Cookie{Name: "webhookery_oidc_state", Value: result.State, Path: "/v1/auth/oidc", MaxAge: 600, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) - http.Redirect(w, r, result.AuthURL, http.StatusFound) -} - -func (s *Server) oidcCallback(w http.ResponseWriter, r *http.Request) { - state := r.URL.Query().Get("state") - cookie, err := r.Cookie("webhookery_oidc_state") - if err != nil || state == "" || subtle.ConstantTimeCompare([]byte(state), []byte(cookie.Value)) != 1 { - writeProblem(w, problem.Unauthorized(requestID(r))) - return - } - result, err := s.cfg.Control.CompleteOIDCCallback(r.Context(), state, r.URL.Query().Get("code"), r.UserAgent(), remoteAddr(r)) - if err != nil { - s.writeError(w, r, err) - return - } - s.setCookie(w, &http.Cookie{Name: sessionCookieName, Value: result.SessionToken, Path: "/", Expires: result.Session.ExpiresAt, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) - s.setCookie(w, &http.Cookie{Name: "webhookery_oidc_state", Value: "", Path: "/v1/auth/oidc", MaxAge: -1, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) - writeJSON(w, http.StatusOK, map[string]any{"session": result.Session, "actor": result.Actor}) -} - -func (s *Server) logout(w http.ResponseWriter, r *http.Request) { - cookie, err := r.Cookie(sessionCookieName) - if err != nil { - writeProblem(w, problem.Unauthorized(requestID(r))) - return - } - if err := s.cfg.Control.LogoutSession(r.Context(), actorFrom(r), cookie.Value); err != nil { - s.writeError(w, r, err) - return - } - s.setCookie(w, &http.Cookie{Name: sessionCookieName, Value: "", Path: "/", MaxAge: -1, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) - w.WriteHeader(http.StatusNoContent) -} - -func (s *Server) currentSession(w http.ResponseWriter, r *http.Request) { - cookie, err := r.Cookie(sessionCookieName) - if err != nil { - writeProblem(w, problem.Unauthorized(requestID(r))) - return - } - item, err := s.cfg.Control.CurrentAuthSession(r.Context(), actorFrom(r), cookie.Value) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listAuthSessions(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAuthSessions(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) revokeAuthSession(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RevokeAuthSessionByID(r.Context(), actorFrom(r), chi.URLParam(r, "session_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) setCookie(w http.ResponseWriter, cookie *http.Cookie) { - cookie.Secure = true - cookie.HttpOnly = true - if cookie.SameSite == http.SameSiteDefaultMode { - cookie.SameSite = http.SameSiteLaxMode - } - http.SetCookie(w, cookie) -} - -func (s *Server) createIdentityProvider(w http.ResponseWriter, r *http.Request) { - var req app.CreateIdentityProviderRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateIdentityProvider(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listIdentityProviders(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListIdentityProviders(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getIdentityProvider(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateIdentityProvider(w http.ResponseWriter, r *http.Request) { - var req app.UpdateIdentityProviderRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) disableIdentityProvider(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DisableIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) testIdentityProvider(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.TestIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createSCIMToken(w http.ResponseWriter, r *http.Request) { - var req app.CreateSCIMTokenRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateSCIMToken(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listSCIMTokens(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListSCIMTokens(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) revokeSCIMToken(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RevokeSCIMToken(r.Context(), actorFrom(r), chi.URLParam(r, "token_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimListUsers(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.SCIMListUsers(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, scimListResponse(items)) -} - -func (s *Server) scimCreateUser(w http.ResponseWriter, r *http.Request) { - var req app.SCIMUserRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.SCIMCreateUser(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) scimGetUser(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.SCIMGetUser(r.Context(), actorFrom(r), chi.URLParam(r, "user_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimReplaceUser(w http.ResponseWriter, r *http.Request) { - var req app.SCIMUserRequest - if !decodeJSON(w, r, &req) { - return - } - req.ID = chi.URLParam(r, "user_id") - item, err := s.cfg.Control.SCIMReplaceUser(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimPatchUser(w http.ResponseWriter, r *http.Request) { - var req app.SCIMPatchRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.SCIMPatchUser(r.Context(), actorFrom(r), chi.URLParam(r, "user_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimDeleteUser(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.SCIMDeactivateUser(r.Context(), actorFrom(r), chi.URLParam(r, "user_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimListGroups(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.SCIMListGroups(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, scimListResponse(items)) -} - -func (s *Server) scimCreateGroup(w http.ResponseWriter, r *http.Request) { - var req app.SCIMGroupRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.SCIMCreateGroup(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) scimGetGroup(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.SCIMGetGroup(r.Context(), actorFrom(r), chi.URLParam(r, "group_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimReplaceGroup(w http.ResponseWriter, r *http.Request) { - var req app.SCIMGroupRequest - if !decodeJSON(w, r, &req) { - return - } - req.ID = chi.URLParam(r, "group_id") - item, err := s.cfg.Control.SCIMReplaceGroup(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimPatchGroup(w http.ResponseWriter, r *http.Request) { - var req app.SCIMPatchRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.SCIMPatchGroup(r.Context(), actorFrom(r), chi.URLParam(r, "group_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) scimDeleteGroup(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.SCIMDeactivateGroup(r.Context(), actorFrom(r), chi.URLParam(r, "group_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createRoleBinding(w http.ResponseWriter, r *http.Request) { - var req app.CreateRoleBindingRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateRoleBinding(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listRoleBindings(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListRoleBindings(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) updateRoleBinding(w http.ResponseWriter, r *http.Request) { - var req app.UpdateRoleBindingRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateRoleBinding(r.Context(), actorFrom(r), chi.URLParam(r, "binding_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) disableRoleBinding(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DisableRoleBinding(r.Context(), actorFrom(r), chi.URLParam(r, "binding_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createAccessPolicyRule(w http.ResponseWriter, r *http.Request) { - var req app.CreateAccessPolicyRuleRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAccessPolicyRule(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listAccessPolicyRules(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAccessPolicyRules(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) updateAccessPolicyRule(w http.ResponseWriter, r *http.Request) { - var req app.UpdateAccessPolicyRuleRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateAccessPolicyRule(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) disableAccessPolicyRule(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DisableAccessPolicyRule(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) authzExplain(w http.ResponseWriter, r *http.Request) { - var req app.AuthzExplainRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.ExplainAuthorization(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createProviderAdapter(w http.ResponseWriter, r *http.Request) { - var req app.CreateProviderAdapterRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateProviderAdapter(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listProviderAdapters(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListProviderAdapters(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getProviderAdapter(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetProviderAdapter(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createAdapterVersion(w http.ResponseWriter, r *http.Request) { - var req app.CreateAdapterVersionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAdapterVersion(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listAdapterVersions(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAdapterVersions(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) createAdapterTestVector(w http.ResponseWriter, r *http.Request) { - var req app.CreateAdapterTestVectorRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAdapterTestVector(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), chi.URLParam(r, "version_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) transitionAdapterVersion(w http.ResponseWriter, r *http.Request) { - var req app.AdapterVersionTransitionRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.TransitionAdapterVersion(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), chi.URLParam(r, "version_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createAlertRule(w http.ResponseWriter, r *http.Request) { - var req app.CreateAlertRuleRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.CreateAlertRule(r.Context(), actorFrom(r), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listAlertRules(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAlertRules(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getAlertRule(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetAlertRule(r.Context(), actorFrom(r), chi.URLParam(r, "alert_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateAlertRule(w http.ResponseWriter, r *http.Request) { - var req app.UpdateAlertRuleRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.UpdateAlertRule(r.Context(), actorFrom(r), chi.URLParam(r, "alert_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteAlertRule(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteAlertRule(r.Context(), actorFrom(r), chi.URLParam(r, "alert_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) listAlertFirings(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListAlertFirings(r.Context(), actorFrom(r), r.URL.Query().Get("state"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getAlertFiring(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetAlertFiring(r.Context(), actorFrom(r), chi.URLParam(r, "firing_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) acknowledgeAlertFiring(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.AcknowledgeAlertFiring(r.Context(), actorFrom(r), chi.URLParam(r, "firing_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createNotificationChannel(w http.ResponseWriter, r *http.Request) { - var req app.CreateNotificationChannelRequest - if !decodeJSON(w, r, &req) { - return - } - item, result, err := s.cfg.Control.CreateNotificationChannel(r.Context(), actorFrom(r), req) - if err != nil { - if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { - writeProblem(w, problem.BadRequest(requestID(r), "notification_channel_url_blocked", strings.Join(result.BlockedReasons, ","))) - return - } - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listNotificationChannels(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListNotificationChannels(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getNotificationChannel(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateNotificationChannel(w http.ResponseWriter, r *http.Request) { - var req app.UpdateNotificationChannelRequest - if !decodeJSON(w, r, &req) { - return - } - item, result, err := s.cfg.Control.UpdateNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id"), req) - if err != nil { - if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { - writeProblem(w, problem.BadRequest(requestID(r), "notification_channel_url_blocked", strings.Join(result.BlockedReasons, ","))) - return - } - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteNotificationChannel(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) testNotificationChannel(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.TestNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, item) -} - -func (s *Server) listNotificationDeliveries(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListNotificationDeliveries(r.Context(), actorFrom(r), r.URL.Query().Get("state"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) listNotificationDeliveryAttempts(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListNotificationDeliveryAttempts(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) retryNotificationDelivery(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RetryNotificationDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) createSIEMSink(w http.ResponseWriter, r *http.Request) { - var req app.CreateSIEMSinkRequest - if !decodeJSON(w, r, &req) { - return - } - item, result, err := s.cfg.Control.CreateSIEMSink(r.Context(), actorFrom(r), req) - if err != nil { - if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { - writeProblem(w, problem.BadRequest(requestID(r), "siem_sink_url_blocked", strings.Join(result.BlockedReasons, ","))) - return - } - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusCreated, item) -} - -func (s *Server) listSIEMSinks(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListSIEMSinks(r.Context(), actorFrom(r), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) getSIEMSink(w http.ResponseWriter, r *http.Request) { - item, err := s.cfg.Control.GetSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id")) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) updateSIEMSink(w http.ResponseWriter, r *http.Request) { - var req app.UpdateSIEMSinkRequest - if !decodeJSON(w, r, &req) { - return - } - item, result, err := s.cfg.Control.UpdateSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id"), req) - if err != nil { - if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { - writeProblem(w, problem.BadRequest(requestID(r), "siem_sink_url_blocked", strings.Join(result.BlockedReasons, ","))) - return - } - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) deleteSIEMSink(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.DeleteSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) testSIEMSink(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.TestSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusAccepted, item) -} - -func (s *Server) listSIEMDeliveries(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListSIEMDeliveries(r.Context(), actorFrom(r), r.URL.Query().Get("state"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) listSIEMDeliveryAttempts(w http.ResponseWriter, r *http.Request) { - items, err := s.cfg.Control.ListSIEMDeliveryAttempts(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), queryLimit(r)) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, page(items)) -} - -func (s *Server) retrySIEMDelivery(w http.ResponseWriter, r *http.Request) { - var req app.StateChangeRequest - if !decodeJSON(w, r, &req) { - return - } - item, err := s.cfg.Control.RetrySIEMDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req) - if err != nil { - s.writeError(w, r, err) - return - } - writeJSON(w, http.StatusOK, item) -} - -func (s *Server) writeError(w http.ResponseWriter, r *http.Request, err error) { - requestID := requestID(r) - switch { - case errors.Is(err, app.ErrUnauthorized): - writeProblem(w, problem.Unauthorized(requestID)) - case errors.Is(err, app.ErrForbidden): - writeProblem(w, problem.Forbidden(requestID)) - case errors.Is(err, app.ErrNotFound): - writeProblem(w, problem.New(http.StatusNotFound, "not_found", "Not found", "The requested resource was not found.", requestID, false)) - case errors.Is(err, app.ErrGone): - writeProblem(w, problem.New(http.StatusGone, "payload_expired", "Payload unavailable", "The requested payload body has expired or was removed by retention policy; metadata and hashes remain available.", requestID, false)) - case errors.Is(err, app.ErrInvalidInput): - writeProblem(w, problem.BadRequest(requestID, "validation_error", err.Error())) - default: - writeProblem(w, problem.Internal(requestID)) - } -} - type actorContextKey struct{} - -func actorFrom(r *http.Request) authz.Actor { - actor, _ := r.Context().Value(actorContextKey{}).(authz.Actor) - return actor -} - -func decodeJSON(w http.ResponseWriter, r *http.Request, dst any) bool { - dec := json.NewDecoder(io.LimitReader(r.Body, 1<<20)) - dec.DisallowUnknownFields() - if err := dec.Decode(dst); err != nil { - writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Invalid JSON body.")) - return false - } - if dec.Decode(&struct{}{}) != io.EOF { - writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "JSON body must contain a single value.")) - return false - } - return true -} - -func readLimitedBody(w http.ResponseWriter, r *http.Request, max int64) ([]byte, bool) { - body, err := io.ReadAll(io.LimitReader(r.Body, max+1)) - if err != nil { - writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Could not read request body.")) - return nil, false - } - if int64(len(body)) > max { - writeProblem(w, problem.New(http.StatusRequestEntityTooLarge, "payload_too_large", "Payload too large", "The webhook payload exceeds the configured limit.", requestID(r), false)) - return nil, false - } - return body, true -} - -func rejectOversizedHeaders(next http.Handler) http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if !requestHeadersWithinLimits(w, r) { - return - } - next.ServeHTTP(w, r) - }) -} - -func requestHeadersWithinLimits(w http.ResponseWriter, r *http.Request) bool { - var pairs int - var total int - for name, values := range r.Header { - if len(values) == 0 { - pairs++ - total += len(name) - if pairs > maxHeaderPairs || total > maxHeaderBytes { - writeProblem(w, problem.New(http.StatusRequestHeaderFieldsTooLarge, "headers_too_large", "Headers too large", "The request headers exceed the configured limit.", requestID(r), false)) - return false - } - continue - } - for _, value := range values { - pairs++ - total += len(name) + len(value) - if len(value) > maxHeaderValueBytes || pairs > maxHeaderPairs || total > maxHeaderBytes { - writeProblem(w, problem.New(http.StatusRequestHeaderFieldsTooLarge, "headers_too_large", "Headers too large", "The request headers exceed the configured limit.", requestID(r), false)) - return false - } - } - } - return true -} - -func headers(h http.Header) []domain.HeaderPair { - var out []domain.HeaderPair - for name, values := range h { - for _, value := range values { - out = append(out, domain.HeaderPair{Name: name, Value: value}) - } - } - return out -} - -func writeJSON(w http.ResponseWriter, status int, body any) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(body) -} - -func writeProblem(w http.ResponseWriter, p problem.Problem) { - writeJSON(w, p.Status, p) -} - -func page[T any](items []T) map[string]any { - if items == nil { - items = []T{} - } - return map[string]any{"data": items, "next_cursor": nil, "has_more": false} -} - -func scimListResponse[T any](items []T) map[string]any { - if items == nil { - items = []T{} - } - return map[string]any{ - "schemas": []string{"urn:ietf:params:scim:api:messages:2.0:ListResponse"}, - "totalResults": len(items), - "Resources": items, - "startIndex": 1, - "itemsPerPage": len(items), - } -} - -func remoteAddr(r *http.Request) string { - if forwarded := strings.TrimSpace(r.Header.Get("X-Forwarded-For")); forwarded != "" { - first, _, _ := strings.Cut(forwarded, ",") - return strings.TrimSpace(first) - } - return r.RemoteAddr -} - -func formatPrometheus(metrics domain.OpsMetrics) string { - var b strings.Builder - fmt.Fprintf(&b, "# HELP webhookery_events_total Total captured canonical events.\n") - fmt.Fprintf(&b, "# TYPE webhookery_events_total counter\n") - fmt.Fprintf(&b, "webhookery_events_total %d\n", metrics.EventsTotal) - fmt.Fprintf(&b, "# HELP webhookery_outbox_pending Pending durable outbox rows.\n") - fmt.Fprintf(&b, "# TYPE webhookery_outbox_pending gauge\n") - fmt.Fprintf(&b, "webhookery_outbox_pending %d\n", metrics.OutboxPending) - fmt.Fprintf(&b, "# HELP webhookery_outbox_oldest_age_seconds Oldest pending outbox age.\n") - fmt.Fprintf(&b, "# TYPE webhookery_outbox_oldest_age_seconds gauge\n") - fmt.Fprintf(&b, "webhookery_outbox_oldest_age_seconds %d\n", metrics.OldestOutboxAgeSec) - fmt.Fprintf(&b, "# HELP webhookery_dead_letter_open Open dead-letter entries.\n") - fmt.Fprintf(&b, "# TYPE webhookery_dead_letter_open gauge\n") - fmt.Fprintf(&b, "webhookery_dead_letter_open %d\n", metrics.DeadLetterOpen) - fmt.Fprintf(&b, "# HELP webhookery_quarantine_open Open quarantine entries.\n") - fmt.Fprintf(&b, "# TYPE webhookery_quarantine_open gauge\n") - fmt.Fprintf(&b, "webhookery_quarantine_open %d\n", metrics.QuarantineOpen) - fmt.Fprintf(&b, "# HELP webhookery_endpoint_circuit_open Open endpoint circuits.\n") - fmt.Fprintf(&b, "# TYPE webhookery_endpoint_circuit_open gauge\n") - fmt.Fprintf(&b, "webhookery_endpoint_circuit_open %d\n", metrics.EndpointCircuitOpen) - fmt.Fprintf(&b, "# HELP webhookery_audit_chain_unchained_events Audit events without chain entries.\n") - fmt.Fprintf(&b, "# TYPE webhookery_audit_chain_unchained_events gauge\n") - fmt.Fprintf(&b, "webhookery_audit_chain_unchained_events %d\n", metrics.AuditChainUnchainedEvents) - fmt.Fprintf(&b, "# HELP webhookery_audit_chain_verification_failures Audit chain entries that cannot verify against available audit rows.\n") - fmt.Fprintf(&b, "# TYPE webhookery_audit_chain_verification_failures gauge\n") - fmt.Fprintf(&b, "webhookery_audit_chain_verification_failures %d\n", metrics.AuditChainVerificationFailures) - fmt.Fprintf(&b, "# HELP webhookery_audit_chain_last_anchor_age_seconds Age of the newest audit chain anchor.\n") - fmt.Fprintf(&b, "# TYPE webhookery_audit_chain_last_anchor_age_seconds gauge\n") - fmt.Fprintf(&b, "webhookery_audit_chain_last_anchor_age_seconds %d\n", metrics.AuditChainLastAnchorAgeSec) - for state, count := range metrics.DeliveriesByState { - fmt.Fprintf(&b, "webhookery_deliveries{state=%q} %d\n", state, count) - } - for state, count := range metrics.ReplayJobsByState { - fmt.Fprintf(&b, "webhookery_replay_jobs{state=%q} %d\n", state, count) - } - for state, count := range metrics.ReconciliationJobsByState { - fmt.Fprintf(&b, "webhookery_reconciliation_jobs{state=%q} %d\n", state, count) - } - for outcome, count := range metrics.ReconciliationItemsByOutcome { - fmt.Fprintf(&b, "webhookery_reconciliation_items{outcome=%q} %d\n", outcome, count) - } - return b.String() -} - -func publicSource(source domain.Source) map[string]any { - return map[string]any{ - "id": source.ID, - "tenant_id": source.TenantID, - "name": source.Name, - "provider": source.Provider, - "adapter": source.Adapter, - "state": source.State, - } -} - -func queryLimit(r *http.Request) int { - raw := r.URL.Query().Get("limit") - if raw == "" { - return 50 - } - limit, err := strconv.Atoi(raw) - if err != nil { - return 50 - } - return limit -} - -func requestID(r *http.Request) string { - reqID := identity.RequestID(r) - if strings.TrimSpace(reqID) == "" { - return "req_unknown" - } - return reqID -} diff --git a/internal/adapters/httpapi/server_audit_retention_handlers.go b/internal/adapters/httpapi/server_audit_retention_handlers.go new file mode 100644 index 0000000..40f515a --- /dev/null +++ b/internal/adapters/httpapi/server_audit_retention_handlers.go @@ -0,0 +1,151 @@ +package httpapi + +import ( + "fmt" + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) listAuditEvents(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAuditEvents(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getAuditChainHead(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetAuditChainHead(r.Context(), actorFrom(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) verifyAuditChain(w http.ResponseWriter, r *http.Request) { + var req app.AuditChainVerifyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.VerifyAuditChain(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createAuditChainAnchor(w http.ResponseWriter, r *http.Request) { + var req app.AuditChainAnchorRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAuditChainAnchor(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listAuditChainAnchors(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAuditChainAnchors(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getAuditChainAnchor(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetAuditChainAnchor(r.Context(), actorFrom(r), chi.URLParam(r, "anchor_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createAuditExport(w http.ResponseWriter, r *http.Request) { + var req app.CreateAuditExportRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAuditExport(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, item) +} + +func (s *Server) listAuditExports(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAuditExports(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getAuditExport(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetAuditExport(r.Context(), actorFrom(r), chi.URLParam(r, "export_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) downloadAuditExport(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.DownloadAuditExport(r.Context(), actorFrom(r), chi.URLParam(r, "export_id")) + if err != nil { + s.writeError(w, r, err) + return + } + w.Header().Set("Content-Type", item.ContentType) + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", item.Filename)) + w.Header().Set("X-Webhookery-Export-SHA256", item.Export.SHA256) + w.WriteHeader(http.StatusOK) + _, _ = w.Write(item.Body) +} + +func (s *Server) listRetentionPolicies(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListRetentionPolicies(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) createRetentionPolicy(w http.ResponseWriter, r *http.Request) { + var req app.CreateRetentionPolicyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateRetentionPolicy(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) updateRetentionPolicy(w http.ResponseWriter, r *http.Request) { + var req app.UpdateRetentionPolicyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateRetentionPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_auth_identity.go b/internal/adapters/httpapi/server_auth_identity.go new file mode 100644 index 0000000..93590d1 --- /dev/null +++ b/internal/adapters/httpapi/server_auth_identity.go @@ -0,0 +1,575 @@ +package httpapi + +import ( + "context" + "crypto/subtle" + "errors" + "net/http" + "net/url" + "strings" + + "webhookery/internal/app" + "webhookery/internal/authz" + "webhookery/internal/problem" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) requireAuth(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestID := requestID(r) + token := app.BearerToken(r.Header.Get("Authorization")) + authenticator := s.cfg.Auth + if token == "" && s.cfg.SessionAuth != nil { + if cookie, err := r.Cookie(sessionCookieName); err == nil { + token = cookie.Value + authenticator = s.cfg.SessionAuth + } + } + if authenticator == nil { + writeProblem(w, problem.Unauthorized(requestID)) + return + } + actor, err := authenticator.Authenticate(r.Context(), token) + if err != nil { + writeProblem(w, problem.Unauthorized(requestID)) + return + } + next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) + }) +} + +func (s *Server) requireProducerAuth(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestID := requestID(r) + if r.TLS != nil && len(r.TLS.PeerCertificates) > 0 && s.cfg.ProducerMTLSAuth.Lookup != nil { + if len(r.TLS.VerifiedChains) == 0 { + writeProblem(w, problem.Unauthorized(requestID)) + return + } + actor, err := s.cfg.ProducerMTLSAuth.AuthenticateCertificate(r.Context(), r.TLS.PeerCertificates[0]) + if err != nil { + writeProblem(w, problem.Unauthorized(requestID)) + return + } + if !authz.Can(actor, "events:write", actor.TenantID) { + writeProblem(w, problem.Forbidden(requestID)) + return + } + next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) + return + } + token := app.BearerToken(r.Header.Get("Authorization")) + if s.cfg.ProducerAuth != nil { + actor, err := s.cfg.ProducerAuth.Authenticate(r.Context(), token) + if err == nil { + if !authz.Can(actor, "events:write", actor.TenantID) { + writeProblem(w, problem.Forbidden(requestID)) + return + } + next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) + return + } + if !errors.Is(err, app.ErrUnauthorized) { + writeProblem(w, problem.Internal(requestID)) + return + } + } + if s.cfg.Auth == nil { + writeProblem(w, problem.Unauthorized(requestID)) + return + } + actor, err := s.cfg.Auth.Authenticate(r.Context(), token) + if err != nil { + writeProblem(w, problem.Unauthorized(requestID)) + return + } + if !authz.Can(actor, "events:write", actor.TenantID) { + writeProblem(w, problem.Forbidden(requestID)) + return + } + next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) + }) +} + +func (s *Server) requireSCIMAuth(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if s.cfg.Control == nil { + writeProblem(w, problem.Unauthorized(requestID(r))) + return + } + actor, err := s.cfg.Control.AuthenticateSCIMToken(r.Context(), app.BearerToken(r.Header.Get("Authorization"))) + if err != nil { + writeProblem(w, problem.Unauthorized(requestID(r))) + return + } + next.ServeHTTP(w, r.WithContext(context.WithValue(r.Context(), actorContextKey{}, actor))) + }) +} + +func (s *Server) issueOAuthToken(w http.ResponseWriter, r *http.Request) { + body, ok := readLimitedBody(w, r, 64<<10) + if !ok { + return + } + form, err := url.ParseQuery(string(body)) + if err != nil { + writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Invalid form body.")) + return + } + if form.Get("grant_type") != "client_credentials" { + writeProblem(w, problem.BadRequest(requestID(r), "unsupported_grant_type", "Only client_credentials grant is supported.")) + return + } + if form.Get("client_secret") != "" { + writeProblem(w, problem.BadRequest(requestID(r), "invalid_request", "Client credentials must use HTTP Basic authentication.")) + return + } + clientID, clientSecret, basicOK := r.BasicAuth() + if !basicOK || strings.TrimSpace(clientID) == "" || clientSecret == "" { + writeProblem(w, problem.Unauthorized(requestID(r))) + return + } + result, err := s.cfg.Control.IssueProducerToken(r.Context(), clientID, clientSecret) + if err != nil { + s.writeError(w, r, err) + return + } + w.Header().Set("Cache-Control", "no-store") + w.Header().Set("Pragma", "no-cache") + writeJSON(w, http.StatusOK, result) +} + +func (s *Server) oidcLogin(w http.ResponseWriter, r *http.Request) { + result, err := s.cfg.Control.BeginOIDCLogin(r.Context(), r.URL.Query().Get("tenant_id"), r.URL.Query().Get("provider_id"), r.URL.Query().Get("redirect_after")) + if err != nil { + s.writeError(w, r, err) + return + } + s.setCookie(w, &http.Cookie{Name: "webhookery_oidc_state", Value: result.State, Path: "/v1/auth/oidc", MaxAge: 600, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) + http.Redirect(w, r, result.AuthURL, http.StatusFound) +} + +func (s *Server) oidcCallback(w http.ResponseWriter, r *http.Request) { + state := r.URL.Query().Get("state") + cookie, err := r.Cookie("webhookery_oidc_state") + if err != nil || state == "" || subtle.ConstantTimeCompare([]byte(state), []byte(cookie.Value)) != 1 { + writeProblem(w, problem.Unauthorized(requestID(r))) + return + } + result, err := s.cfg.Control.CompleteOIDCCallback(r.Context(), state, r.URL.Query().Get("code"), r.UserAgent(), s.remoteAddr(r)) + if err != nil { + s.writeError(w, r, err) + return + } + s.setCookie(w, &http.Cookie{Name: sessionCookieName, Value: result.SessionToken, Path: "/", Expires: result.Session.ExpiresAt, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) + s.setCookie(w, &http.Cookie{Name: "webhookery_oidc_state", Value: "", Path: "/v1/auth/oidc", MaxAge: -1, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) + writeJSON(w, http.StatusOK, map[string]any{"session": result.Session, "actor": result.Actor}) +} + +func (s *Server) logout(w http.ResponseWriter, r *http.Request) { + cookie, err := r.Cookie(sessionCookieName) + if err != nil { + writeProblem(w, problem.Unauthorized(requestID(r))) + return + } + if err := s.cfg.Control.LogoutSession(r.Context(), actorFrom(r), cookie.Value); err != nil { + s.writeError(w, r, err) + return + } + s.setCookie(w, &http.Cookie{Name: sessionCookieName, Value: "", Path: "/", MaxAge: -1, HttpOnly: true, Secure: true, SameSite: http.SameSiteLaxMode}) + w.WriteHeader(http.StatusNoContent) +} + +func (s *Server) currentSession(w http.ResponseWriter, r *http.Request) { + cookie, err := r.Cookie(sessionCookieName) + if err != nil { + writeProblem(w, problem.Unauthorized(requestID(r))) + return + } + item, err := s.cfg.Control.CurrentAuthSession(r.Context(), actorFrom(r), cookie.Value) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listAuthSessions(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAuthSessions(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) revokeAuthSession(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RevokeAuthSessionByID(r.Context(), actorFrom(r), chi.URLParam(r, "session_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) setCookie(w http.ResponseWriter, cookie *http.Cookie) { + cookie.Secure = true + cookie.HttpOnly = true + if cookie.SameSite == http.SameSiteDefaultMode { + cookie.SameSite = http.SameSiteLaxMode + } + http.SetCookie(w, cookie) +} + +func (s *Server) createIdentityProvider(w http.ResponseWriter, r *http.Request) { + var req app.CreateIdentityProviderRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateIdentityProvider(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listIdentityProviders(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListIdentityProviders(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getIdentityProvider(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateIdentityProvider(w http.ResponseWriter, r *http.Request) { + var req app.UpdateIdentityProviderRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) disableIdentityProvider(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DisableIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) testIdentityProvider(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.TestIdentityProvider(r.Context(), actorFrom(r), chi.URLParam(r, "provider_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createSCIMToken(w http.ResponseWriter, r *http.Request) { + var req app.CreateSCIMTokenRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateSCIMToken(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listSCIMTokens(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListSCIMTokens(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) revokeSCIMToken(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RevokeSCIMToken(r.Context(), actorFrom(r), chi.URLParam(r, "token_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimListUsers(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.SCIMListUsers(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, scimListResponse(items)) +} + +func (s *Server) scimCreateUser(w http.ResponseWriter, r *http.Request) { + var req app.SCIMUserRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.SCIMCreateUser(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) scimGetUser(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.SCIMGetUser(r.Context(), actorFrom(r), chi.URLParam(r, "user_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimReplaceUser(w http.ResponseWriter, r *http.Request) { + var req app.SCIMUserRequest + if !decodeJSON(w, r, &req) { + return + } + req.ID = chi.URLParam(r, "user_id") + item, err := s.cfg.Control.SCIMReplaceUser(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimPatchUser(w http.ResponseWriter, r *http.Request) { + var req app.SCIMPatchRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.SCIMPatchUser(r.Context(), actorFrom(r), chi.URLParam(r, "user_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimDeleteUser(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.SCIMDeactivateUser(r.Context(), actorFrom(r), chi.URLParam(r, "user_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimListGroups(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.SCIMListGroups(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, scimListResponse(items)) +} + +func (s *Server) scimCreateGroup(w http.ResponseWriter, r *http.Request) { + var req app.SCIMGroupRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.SCIMCreateGroup(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) scimGetGroup(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.SCIMGetGroup(r.Context(), actorFrom(r), chi.URLParam(r, "group_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimReplaceGroup(w http.ResponseWriter, r *http.Request) { + var req app.SCIMGroupRequest + if !decodeJSON(w, r, &req) { + return + } + req.ID = chi.URLParam(r, "group_id") + item, err := s.cfg.Control.SCIMReplaceGroup(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimPatchGroup(w http.ResponseWriter, r *http.Request) { + var req app.SCIMPatchRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.SCIMPatchGroup(r.Context(), actorFrom(r), chi.URLParam(r, "group_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) scimDeleteGroup(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.SCIMDeactivateGroup(r.Context(), actorFrom(r), chi.URLParam(r, "group_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createRoleBinding(w http.ResponseWriter, r *http.Request) { + var req app.CreateRoleBindingRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateRoleBinding(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listRoleBindings(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListRoleBindings(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) updateRoleBinding(w http.ResponseWriter, r *http.Request) { + var req app.UpdateRoleBindingRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateRoleBinding(r.Context(), actorFrom(r), chi.URLParam(r, "binding_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) disableRoleBinding(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DisableRoleBinding(r.Context(), actorFrom(r), chi.URLParam(r, "binding_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createAccessPolicyRule(w http.ResponseWriter, r *http.Request) { + var req app.CreateAccessPolicyRuleRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAccessPolicyRule(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listAccessPolicyRules(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAccessPolicyRules(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) updateAccessPolicyRule(w http.ResponseWriter, r *http.Request) { + var req app.UpdateAccessPolicyRuleRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateAccessPolicyRule(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) disableAccessPolicyRule(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DisableAccessPolicyRule(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) authzExplain(w http.ResponseWriter, r *http.Request) { + var req app.AuthzExplainRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.ExplainAuthorization(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_delivery_replay_handlers.go b/internal/adapters/httpapi/server_delivery_replay_handlers.go new file mode 100644 index 0000000..44103a3 --- /dev/null +++ b/internal/adapters/httpapi/server_delivery_replay_handlers.go @@ -0,0 +1,301 @@ +package httpapi + +import ( + "context" + "net/http" + + "webhookery/internal/app" + "webhookery/internal/authz" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) listDeliveries(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListDeliveries(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) listDeliveryAttempts(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListDeliveryAttempts(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getDeliveryAttempt(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetDeliveryAttempt(r.Context(), actorFrom(r), chi.URLParam(r, "attempt_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) retryDelivery(w http.ResponseWriter, r *http.Request) { + var req struct { + Reason string `json:"reason"` + } + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RetryDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req.Reason) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, item) +} + +func (s *Server) cancelDelivery(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CancelDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) dryRunReplay(w http.ResponseWriter, r *http.Request) { + var req app.ReplayRequest + if !decodeJSON(w, r, &req) { + return + } + res, err := s.cfg.Control.DryRunReplay(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, res) +} + +func (s *Server) createReplay(w http.ResponseWriter, r *http.Request) { + var req app.ReplayRequest + if !decodeJSON(w, r, &req) { + return + } + res, err := s.cfg.Control.CreateReplay(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, res) +} + +func (s *Server) listReplayJobs(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListReplayJobs(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) approveReplayJob(w http.ResponseWriter, r *http.Request) { + s.changeReplayJobState(w, r, s.cfg.Control.ApproveReplayJob) +} + +func (s *Server) pauseReplayJob(w http.ResponseWriter, r *http.Request) { + s.changeReplayJobState(w, r, s.cfg.Control.PauseReplayJob) +} + +func (s *Server) resumeReplayJob(w http.ResponseWriter, r *http.Request) { + s.changeReplayJobState(w, r, s.cfg.Control.ResumeReplayJob) +} + +func (s *Server) cancelReplayJob(w http.ResponseWriter, r *http.Request) { + s.changeReplayJobState(w, r, s.cfg.Control.CancelReplayJob) +} + +func (s *Server) listReplayApprovalPolicies(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListReplayApprovalPolicies(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) createReplayApprovalPolicy(w http.ResponseWriter, r *http.Request) { + var req app.CreateReplayApprovalPolicyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateReplayApprovalPolicy(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) disableReplayApprovalPolicy(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DisableReplayApprovalPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "policy_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) changeReplayJobState(w http.ResponseWriter, r *http.Request, fn func(context.Context, authz.Actor, string, app.StateChangeRequest) (app.ReplayJob, error)) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := fn(r.Context(), actorFrom(r), chi.URLParam(r, "replay_job_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) dryRunReconciliation(w http.ResponseWriter, r *http.Request) { + var req app.ReconciliationJobRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DryRunReconciliation(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createReconciliationJob(w http.ResponseWriter, r *http.Request) { + var req app.ReconciliationJobRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateReconciliationJob(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listReconciliationJobs(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListReconciliationJobs(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getReconciliationJob(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetReconciliationJob(r.Context(), actorFrom(r), chi.URLParam(r, "job_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listReconciliationItems(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListReconciliationItems(r.Context(), actorFrom(r), chi.URLParam(r, "job_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) cancelReconciliationJob(w http.ResponseWriter, r *http.Request) { + var req app.ProviderConnectionStateRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CancelReconciliationJob(r.Context(), actorFrom(r), chi.URLParam(r, "job_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listDeadLetter(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListDeadLetter(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) releaseDeadLetter(w http.ResponseWriter, r *http.Request) { + var req app.DeadLetterReleaseRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.ReleaseDeadLetter(r.Context(), actorFrom(r), chi.URLParam(r, "entry_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, item) +} + +func (s *Server) bulkReleaseDeadLetter(w http.ResponseWriter, r *http.Request) { + var req app.DeadLetterBulkReleaseRequest + if !decodeJSON(w, r, &req) { + return + } + items, err := s.cfg.Control.BulkReleaseDeadLetter(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, map[string]any{"data": items}) +} + +func (s *Server) listQuarantine(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListQuarantine(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) approveQuarantine(w http.ResponseWriter, r *http.Request) { + var req app.QuarantineDecisionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.ApproveQuarantine(r.Context(), actorFrom(r), chi.URLParam(r, "entry_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) rejectQuarantine(w http.ResponseWriter, r *http.Request) { + var req app.QuarantineDecisionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RejectQuarantine(r.Context(), actorFrom(r), chi.URLParam(r, "entry_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_endpoint_subscription_handlers.go b/internal/adapters/httpapi/server_endpoint_subscription_handlers.go new file mode 100644 index 0000000..c0058ff --- /dev/null +++ b/internal/adapters/httpapi/server_endpoint_subscription_handlers.go @@ -0,0 +1,224 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) createEndpoint(w http.ResponseWriter, r *http.Request) { + var req app.CreateEndpointRequest + if !decodeJSON(w, r, &req) { + return + } + endpoint, validation, err := s.cfg.Control.CreateEndpoint(r.Context(), actorFrom(r), req) + if err != nil { + if len(validation.BlockedReasons) > 0 { + writeJSON(w, http.StatusUnprocessableEntity, validation) + return + } + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, map[string]any{"endpoint": endpoint, "ssrf": validation}) +} + +func (s *Server) listEndpoints(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListEndpoints(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getEndpoint(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateEndpoint(w http.ResponseWriter, r *http.Request) { + var req app.UpdateEndpointRequest + if !decodeJSON(w, r, &req) { + return + } + item, validation, err := s.cfg.Control.UpdateEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) + if err != nil { + if len(validation.BlockedReasons) > 0 { + writeJSON(w, http.StatusUnprocessableEntity, validation) + return + } + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteEndpoint(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) testEndpoint(w http.ResponseWriter, r *http.Request) { + var req app.TestEndpointRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.TestEndpoint(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, item) +} + +func (s *Server) validateEndpointURL(w http.ResponseWriter, r *http.Request) { + var req struct { + URL string `json:"url"` + } + if !decodeJSON(w, r, &req) { + return + } + writeJSON(w, http.StatusOK, s.cfg.Control.ValidateEndpointURL(r.Context(), req.URL)) +} + +func (s *Server) rotateEndpointSecret(w http.ResponseWriter, r *http.Request) { + var req app.RotateEndpointSecretRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RotateEndpointSecret(r.Context(), actorFrom(r), chi.URLParam(r, "endpoint_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createSubscription(w http.ResponseWriter, r *http.Request) { + var req app.CreateSubscriptionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateSubscription(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listSubscriptions(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListSubscriptions(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getSubscription(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetSubscription(r.Context(), actorFrom(r), chi.URLParam(r, "subscription_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateSubscription(w http.ResponseWriter, r *http.Request) { + var req app.UpdateSubscriptionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateSubscription(r.Context(), actorFrom(r), chi.URLParam(r, "subscription_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteSubscription(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteSubscription(r.Context(), actorFrom(r), chi.URLParam(r, "subscription_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createRetryPolicy(w http.ResponseWriter, r *http.Request) { + var req app.CreateRetryPolicyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateRetryPolicy(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listRetryPolicies(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListRetryPolicies(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getRetryPolicy(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetRetryPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "retry_policy_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateRetryPolicy(w http.ResponseWriter, r *http.Request) { + var req app.UpdateRetryPolicyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateRetryPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "retry_policy_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteRetryPolicy(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteRetryPolicy(r.Context(), actorFrom(r), chi.URLParam(r, "retry_policy_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_event_ingest_handlers.go b/internal/adapters/httpapi/server_event_ingest_handlers.go new file mode 100644 index 0000000..26e62db --- /dev/null +++ b/internal/adapters/httpapi/server_event_ingest_handlers.go @@ -0,0 +1,242 @@ +package httpapi + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "net/http" + "strings" + "time" + + "webhookery/internal/app" + "webhookery/internal/domain" + "webhookery/internal/problem" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) listEvents(w http.ResponseWriter, r *http.Request) { + search, err := eventSearchRequestFromQuery(r) + if err != nil { + s.writeError(w, r, err) + return + } + items, err := s.cfg.Control.SearchEvents(r.Context(), actorFrom(r), search) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func eventSearchRequestFromQuery(r *http.Request) (app.EventSearchRequest, error) { + q := r.URL.Query() + single := func(name string) (string, error) { + values, ok := q[name] + if !ok { + return "", nil + } + if len(values) > 1 { + return "", fmt.Errorf("%w: %s must not be repeated", app.ErrInvalidInput, name) + } + return strings.TrimSpace(values[0]), nil + } + for _, name := range []string{"limit", "provider", "external_id", "delivery_id", "status", "verification", "received_after", "route_id"} { + if _, err := single(name); err != nil { + return app.EventSearchRequest{}, err + } + } + receivedAfterRaw, _ := single("received_after") + var receivedAfter time.Time + if receivedAfterRaw != "" { + parsed, err := time.Parse(time.RFC3339, receivedAfterRaw) + if err != nil { + return app.EventSearchRequest{}, fmt.Errorf("%w: received_after must be RFC3339", app.ErrInvalidInput) + } + receivedAfter = parsed + } + provider, _ := single("provider") + externalID, _ := single("external_id") + deliveryID, _ := single("delivery_id") + status, _ := single("status") + verification, _ := single("verification") + routeID, _ := single("route_id") + return app.EventSearchRequest{ + Limit: queryLimit(r), + Provider: provider, + ExternalID: externalID, + DeliveryID: deliveryID, + Status: status, + Verification: verification, + ReceivedAfter: receivedAfter, + RouteID: routeID, + }, nil +} + +func (s *Server) getEvent(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetEvent(r.Context(), actorFrom(r), chi.URLParam(r, "event_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) getRawPayload(w http.ResponseWriter, r *http.Request) { + raw, err := s.cfg.Control.GetRawPayload(r.Context(), actorFrom(r), chi.URLParam(r, "event_id"), r.URL.Query().Get("reason")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, map[string]any{ + "event_id": raw.EventID, + "raw_payload_hash": raw.SHA256, + "content_type": raw.ContentType, + "size_bytes": raw.SizeBytes, + "storage_backend": raw.StorageBackend, + "storage_status": raw.StorageStatus, + "body_base64": base64.StdEncoding.EncodeToString(raw.Body), + }) +} + +func (s *Server) getNormalizedEvent(w http.ResponseWriter, r *http.Request) { + includeData := strings.EqualFold(r.URL.Query().Get("include_data"), "true") + item, err := s.cfg.Control.GetNormalizedEvent(r.Context(), actorFrom(r), chi.URLParam(r, "event_id"), includeData) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) getEventTimeline(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListEventTimeline(r.Context(), actorFrom(r), chi.URLParam(r, "event_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) ingestProductEvent(w http.ResponseWriter, r *http.Request) { + body, ok := readLimitedBody(w, r, maxIngressBodyBytes) + if !ok { + return + } + sourceID := productSourceID(body) + if sourceID == "" { + writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Product event body must include source_id.")) + return + } + actor := actorFrom(r) + if actor.SourceID != "" && actor.SourceID != sourceID { + writeProblem(w, problem.Forbidden(requestID(r))) + return + } + result, err := s.cfg.Ingest.Ingest(r.Context(), app.IngestRequest{ + TenantID: actor.TenantID, + SourceID: sourceID, + Provider: "internal", + RawBody: body, + Headers: headers(r.Header), + ContentType: r.Header.Get("Content-Type"), + RemoteIP: r.RemoteAddr, + }) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, result) +} + +func productSourceID(raw []byte) string { + var req struct { + SourceID string `json:"source_id"` + } + if err := json.Unmarshal(raw, &req); err != nil { + return "" + } + return strings.TrimSpace(req.SourceID) +} + +func (s *Server) ingestGeneric(w http.ResponseWriter, r *http.Request) { + body, ok := readLimitedBody(w, r, maxIngressBodyBytes) + if !ok { + return + } + result, err := s.cfg.Ingest.Ingest(r.Context(), app.IngestRequest{ + TenantID: chi.URLParam(r, "tenant_id"), + SourceID: chi.URLParam(r, "source_id"), + Provider: "generic-hmac", + RawBody: body, + Headers: headers(r.Header), + ContentType: r.Header.Get("Content-Type"), + RemoteIP: r.RemoteAddr, + }) + s.writeIngestResult(w, r, result, err) +} + +func (s *Server) ingestGenericOrProvider(w http.ResponseWriter, r *http.Request) { + firstSegment := chi.URLParam(r, "tenant_id") + if documentedProviderPath(firstSegment) { + s.ingestProviderName(w, r, firstSegment) + return + } + s.ingestGeneric(w, r) +} + +func (s *Server) ingestProviderName(w http.ResponseWriter, r *http.Request, providerName string) { + body, ok := readLimitedBody(w, r, maxIngressBodyBytes) + if !ok { + return + } + result, err := s.cfg.Ingest.IngestProviderPath(r.Context(), providerName, chi.URLParam(r, "source_id"), app.IngestRequest{ + Provider: providerName, + RawBody: body, + Headers: headers(r.Header), + ContentType: r.Header.Get("Content-Type"), + RemoteIP: r.RemoteAddr, + }) + if err == nil && result.Accepted && strings.EqualFold(providerName, "slack") { + if challenge := slackChallenge(body); challenge != "" { + writeJSON(w, http.StatusOK, map[string]string{"challenge": challenge}) + return + } + } + s.writeIngestResult(w, r, result, err) +} + +func documentedProviderPath(providerName string) bool { + switch strings.ToLower(providerName) { + case "stripe", "github", "shopify", "slack", "cloudevents", "generic-jwt": + return true + default: + return false + } +} + +func slackChallenge(raw []byte) string { + var payload struct { + Type string `json:"type"` + Challenge string `json:"challenge"` + } + if err := json.Unmarshal(raw, &payload); err != nil { + return "" + } + if payload.Type != "url_verification" { + return "" + } + return strings.TrimSpace(payload.Challenge) +} + +func (s *Server) writeIngestResult(w http.ResponseWriter, r *http.Request, result app.IngestResult, err error) { + if err != nil { + s.writeError(w, r, err) + return + } + if !result.Accepted { + writeProblem(w, problem.New(http.StatusUnauthorized, "invalid_signature", "Invalid webhook signature", "Webhook evidence was captured, but the signature did not verify.", requestID(r), false)) + return + } + writeJSON(w, http.StatusOK, map[string]any{"received": true, "event_id": result.EventID, "duplicate": result.DedupeStatus != domain.DedupeUnique}) +} diff --git a/internal/adapters/httpapi/server_helpers.go b/internal/adapters/httpapi/server_helpers.go new file mode 100644 index 0000000..6143810 --- /dev/null +++ b/internal/adapters/httpapi/server_helpers.go @@ -0,0 +1,275 @@ +package httpapi + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "net" + "net/http" + "net/netip" + "sort" + "strconv" + "strings" + + "webhookery/internal/app" + "webhookery/internal/authz" + "webhookery/internal/domain" + "webhookery/internal/problem" + + "github.com/aatuh/api-toolkit/v2/httpx/identity" +) + +func (s *Server) writeError(w http.ResponseWriter, r *http.Request, err error) { + requestID := requestID(r) + switch { + case errors.Is(err, app.ErrUnauthorized): + writeProblem(w, problem.Unauthorized(requestID)) + case errors.Is(err, app.ErrForbidden): + writeProblem(w, problem.Forbidden(requestID)) + case errors.Is(err, app.ErrNotFound): + writeProblem(w, problem.New(http.StatusNotFound, "not_found", "Not found", "The requested resource was not found.", requestID, false)) + case errors.Is(err, app.ErrGone): + writeProblem(w, problem.New(http.StatusGone, "payload_expired", "Payload unavailable", "The requested payload body has expired or was removed by retention policy; metadata and hashes remain available.", requestID, false)) + case errors.Is(err, app.ErrInvalidInput): + writeProblem(w, problem.BadRequest(requestID, "validation_error", err.Error())) + default: + writeProblem(w, problem.Internal(requestID)) + } +} + +func actorFrom(r *http.Request) authz.Actor { + actor, _ := r.Context().Value(actorContextKey{}).(authz.Actor) + return actor +} + +func decodeJSON(w http.ResponseWriter, r *http.Request, dst any) bool { + dec := json.NewDecoder(io.LimitReader(r.Body, 1<<20)) + dec.DisallowUnknownFields() + if err := dec.Decode(dst); err != nil { + writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Invalid JSON body.")) + return false + } + if dec.Decode(&struct{}{}) != io.EOF { + writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "JSON body must contain a single value.")) + return false + } + return true +} + +func readLimitedBody(w http.ResponseWriter, r *http.Request, max int64) ([]byte, bool) { + body, err := io.ReadAll(io.LimitReader(r.Body, max+1)) + if err != nil { + writeProblem(w, problem.BadRequest(requestID(r), "validation_error", "Could not read request body.")) + return nil, false + } + if int64(len(body)) > max { + writeProblem(w, problem.New(http.StatusRequestEntityTooLarge, "payload_too_large", "Payload too large", "The webhook payload exceeds the configured limit.", requestID(r), false)) + return nil, false + } + return body, true +} + +func rejectOversizedHeaders(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !requestHeadersWithinLimits(w, r) { + return + } + next.ServeHTTP(w, r) + }) +} + +func requestHeadersWithinLimits(w http.ResponseWriter, r *http.Request) bool { + var pairs int + var total int + for name, values := range r.Header { + if len(values) == 0 { + pairs++ + total += len(name) + if pairs > maxHeaderPairs || total > maxHeaderBytes { + writeProblem(w, problem.New(http.StatusRequestHeaderFieldsTooLarge, "headers_too_large", "Headers too large", "The request headers exceed the configured limit.", requestID(r), false)) + return false + } + continue + } + for _, value := range values { + pairs++ + total += len(name) + len(value) + if len(value) > maxHeaderValueBytes || pairs > maxHeaderPairs || total > maxHeaderBytes { + writeProblem(w, problem.New(http.StatusRequestHeaderFieldsTooLarge, "headers_too_large", "Headers too large", "The request headers exceed the configured limit.", requestID(r), false)) + return false + } + } + } + return true +} + +func headers(h http.Header) []domain.HeaderPair { + var out []domain.HeaderPair + for name, values := range h { + for _, value := range values { + out = append(out, domain.HeaderPair{Name: name, Value: value}) + } + } + return out +} + +func writeJSON(w http.ResponseWriter, status int, body any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(body) +} + +func writeProblem(w http.ResponseWriter, p problem.Problem) { + writeJSON(w, p.Status, p) +} + +func page[T any](items []T) map[string]any { + if items == nil { + items = []T{} + } + return map[string]any{"data": items, "next_cursor": nil, "has_more": false} +} + +func scimListResponse[T any](items []T) map[string]any { + if items == nil { + items = []T{} + } + return map[string]any{ + "schemas": []string{"urn:ietf:params:scim:api:messages:2.0:ListResponse"}, + "totalResults": len(items), + "Resources": items, + "startIndex": 1, + "itemsPerPage": len(items), + } +} + +func (s *Server) remoteAddr(r *http.Request) string { + peer, ok := parseRemoteAddrIP(r.RemoteAddr) + if !ok || !s.trustsProxy(peer) { + return r.RemoteAddr + } + if forwarded := strings.TrimSpace(r.Header.Get("X-Forwarded-For")); forwarded != "" { + first, _, _ := strings.Cut(forwarded, ",") + addr, err := netip.ParseAddr(strings.TrimSpace(first)) + if err == nil { + return addr.Unmap().String() + } + } + return r.RemoteAddr +} + +func (s *Server) trustsProxy(addr netip.Addr) bool { + addr = addr.Unmap() + for _, prefix := range s.cfg.TrustedProxyCIDRs { + if prefix.Contains(addr) { + return true + } + } + return false +} + +func parseRemoteAddrIP(raw string) (netip.Addr, bool) { + host, _, err := net.SplitHostPort(raw) + if err != nil { + host = raw + } + addr, err := netip.ParseAddr(strings.Trim(host, "[]")) + if err != nil { + return netip.Addr{}, false + } + return addr.Unmap(), true +} + +func formatPrometheus(metrics domain.OpsMetrics) string { + var b strings.Builder + fmt.Fprintf(&b, "# HELP webhookery_events_total Total captured canonical events.\n") + fmt.Fprintf(&b, "# TYPE webhookery_events_total counter\n") + fmt.Fprintf(&b, "webhookery_events_total %d\n", metrics.EventsTotal) + fmt.Fprintf(&b, "# HELP webhookery_outbox_pending Pending durable outbox rows.\n") + fmt.Fprintf(&b, "# TYPE webhookery_outbox_pending gauge\n") + fmt.Fprintf(&b, "webhookery_outbox_pending %d\n", metrics.OutboxPending) + fmt.Fprintf(&b, "# HELP webhookery_outbox_oldest_age_seconds Oldest pending outbox age.\n") + fmt.Fprintf(&b, "# TYPE webhookery_outbox_oldest_age_seconds gauge\n") + fmt.Fprintf(&b, "webhookery_outbox_oldest_age_seconds %d\n", metrics.OldestOutboxAgeSec) + fmt.Fprintf(&b, "# HELP webhookery_dead_letter_open Open dead-letter entries.\n") + fmt.Fprintf(&b, "# TYPE webhookery_dead_letter_open gauge\n") + fmt.Fprintf(&b, "webhookery_dead_letter_open %d\n", metrics.DeadLetterOpen) + fmt.Fprintf(&b, "# HELP webhookery_quarantine_open Open quarantine entries.\n") + fmt.Fprintf(&b, "# TYPE webhookery_quarantine_open gauge\n") + fmt.Fprintf(&b, "webhookery_quarantine_open %d\n", metrics.QuarantineOpen) + fmt.Fprintf(&b, "# HELP webhookery_endpoint_circuit_open Open endpoint circuits.\n") + fmt.Fprintf(&b, "# TYPE webhookery_endpoint_circuit_open gauge\n") + fmt.Fprintf(&b, "webhookery_endpoint_circuit_open %d\n", metrics.EndpointCircuitOpen) + fmt.Fprintf(&b, "# HELP webhookery_audit_chain_unchained_events Audit events without chain entries.\n") + fmt.Fprintf(&b, "# TYPE webhookery_audit_chain_unchained_events gauge\n") + fmt.Fprintf(&b, "webhookery_audit_chain_unchained_events %d\n", metrics.AuditChainUnchainedEvents) + fmt.Fprintf(&b, "# HELP webhookery_audit_chain_verification_failures Audit chain entries that cannot verify against available audit rows.\n") + fmt.Fprintf(&b, "# TYPE webhookery_audit_chain_verification_failures gauge\n") + fmt.Fprintf(&b, "webhookery_audit_chain_verification_failures %d\n", metrics.AuditChainVerificationFailures) + fmt.Fprintf(&b, "# HELP webhookery_audit_chain_last_anchor_age_seconds Age of the newest audit chain anchor.\n") + fmt.Fprintf(&b, "# TYPE webhookery_audit_chain_last_anchor_age_seconds gauge\n") + fmt.Fprintf(&b, "webhookery_audit_chain_last_anchor_age_seconds %d\n", metrics.AuditChainLastAnchorAgeSec) + writeMetricCounts(&b, "webhookery_deliveries", "state", metrics.DeliveriesByState) + writeMetricCounts(&b, "webhookery_replay_jobs", "state", metrics.ReplayJobsByState) + writeMetricCounts(&b, "webhookery_reconciliation_jobs", "state", metrics.ReconciliationJobsByState) + writeMetricCounts(&b, "webhookery_reconciliation_items", "outcome", metrics.ReconciliationItemsByOutcome) + return b.String() +} + +func writeMetricCounts(b *strings.Builder, metricName, labelName string, values map[string]int64) { + counts := map[string]int64{} + for value, count := range values { + counts[safePublicMetricLabel(value)] += count + } + labels := make([]string, 0, len(counts)) + for label := range counts { + labels = append(labels, label) + } + sort.Strings(labels) + for _, label := range labels { + fmt.Fprintf(b, "%s{%s=%q} %d\n", metricName, labelName, label, counts[label]) + } +} + +func safePublicMetricLabel(value string) string { + switch value { + case "active", "canceled", "captured", "completed", "dead_lettered", "failed", "in_progress", + "matched", "missing", "open", "paused", "pending", "pending_approval", "redelivery_requested", "released", + "running", "scheduled", "succeeded", "unknown", "unrecoverable": + return value + default: + return "unknown" + } +} + +func publicSource(source domain.Source) map[string]any { + return map[string]any{ + "id": source.ID, + "tenant_id": source.TenantID, + "name": source.Name, + "provider": source.Provider, + "adapter": source.Adapter, + "state": source.State, + } +} + +func queryLimit(r *http.Request) int { + raw := r.URL.Query().Get("limit") + if raw == "" { + return 50 + } + limit, err := strconv.Atoi(raw) + if err != nil { + return 50 + } + return limit +} + +func requestID(r *http.Request) string { + reqID := identity.RequestID(r) + if strings.TrimSpace(reqID) == "" { + return "req_unknown" + } + return reqID +} diff --git a/internal/adapters/httpapi/server_incident_handlers.go b/internal/adapters/httpapi/server_incident_handlers.go new file mode 100644 index 0000000..ee153c7 --- /dev/null +++ b/internal/adapters/httpapi/server_incident_handlers.go @@ -0,0 +1,108 @@ +package httpapi + +import ( + "net/http" + "strings" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) listIncidents(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListIncidents(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) createIncident(w http.ResponseWriter, r *http.Request) { + var req app.CreateIncidentRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateIncident(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) getIncident(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetIncident(r.Context(), actorFrom(r), chi.URLParam(r, "incident_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) addIncidentEvent(w http.ResponseWriter, r *http.Request) { + var req app.AddIncidentEventRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.AddIncidentEvent(r.Context(), actorFrom(r), chi.URLParam(r, "incident_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) removeIncidentEvent(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RemoveIncidentEvent(r.Context(), actorFrom(r), chi.URLParam(r, "incident_id"), chi.URLParam(r, "event_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) generateIncidentReport(w http.ResponseWriter, r *http.Request) { + var req app.IncidentReportRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.GenerateIncidentReport(r.Context(), actorFrom(r), chi.URLParam(r, "incident_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) getIncidentReport(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetIncidentReport(r.Context(), actorFrom(r), chi.URLParam(r, "incident_id")) + if err != nil { + s.writeError(w, r, err) + return + } + if strings.EqualFold(r.URL.Query().Get("format"), "markdown") { + w.Header().Set("Content-Type", "text/markdown; charset=utf-8") + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(item.Markdown)) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createIncidentEvidenceExport(w http.ResponseWriter, r *http.Request) { + var req app.CreateIncidentEvidenceExportRequest + if !decodeJSON(w, r, &req) { + return + } + _, export, err := s.cfg.Control.CreateIncidentEvidenceExport(r.Context(), actorFrom(r), chi.URLParam(r, "incident_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, export) +} diff --git a/internal/adapters/httpapi/server_keys_producer_handlers.go b/internal/adapters/httpapi/server_keys_producer_handlers.go new file mode 100644 index 0000000..dae9337 --- /dev/null +++ b/internal/adapters/httpapi/server_keys_producer_handlers.go @@ -0,0 +1,184 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) createAPIKey(w http.ResponseWriter, r *http.Request) { + var req app.CreateAPIKeyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAPIKey(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listAPIKeys(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAPIKeys(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) revokeAPIKey(w http.ResponseWriter, r *http.Request) { + var req app.RevokeAPIKeyRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RevokeAPIKey(r.Context(), actorFrom(r), chi.URLParam(r, "api_key_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createProducerClient(w http.ResponseWriter, r *http.Request) { + var req app.CreateProducerClientRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateProducerClient(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listProducerClients(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListProducerClients(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getProducerClient(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetProducerClient(r.Context(), actorFrom(r), chi.URLParam(r, "client_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateProducerClient(w http.ResponseWriter, r *http.Request) { + var req app.UpdateProducerClientRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateProducerClient(r.Context(), actorFrom(r), chi.URLParam(r, "client_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteProducerClient(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteProducerClient(r.Context(), actorFrom(r), chi.URLParam(r, "client_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) rotateProducerClientSecret(w http.ResponseWriter, r *http.Request) { + var req app.RotateProducerClientSecretRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RotateProducerClientSecret(r.Context(), actorFrom(r), chi.URLParam(r, "client_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { + var req app.CreateProducerMTLSIdentityRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateProducerMTLSIdentity(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listProducerMTLSIdentities(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListProducerMTLSIdentities(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { + var req app.UpdateProducerMTLSIdentityRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) verifyProducerMTLSIdentity(w http.ResponseWriter, r *http.Request) { + var req app.VerifyProducerMTLSIdentityRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.VerifyProducerMTLSIdentity(r.Context(), actorFrom(r), chi.URLParam(r, "identity_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_ops_signal_handlers.go b/internal/adapters/httpapi/server_ops_signal_handlers.go new file mode 100644 index 0000000..1f7d936 --- /dev/null +++ b/internal/adapters/httpapi/server_ops_signal_handlers.go @@ -0,0 +1,390 @@ +package httpapi + +import ( + "errors" + "net/http" + "strings" + + "webhookery/internal/app" + "webhookery/internal/problem" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) listEndpointHealth(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListEndpointHealth(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) opsMetrics(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.OpsMetrics(r.Context(), actorFrom(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listMetricRollups(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListMetricRollups(r.Context(), actorFrom(r), r.URL.Query().Get("metric_name"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) listWorkers(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListWorkers(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getWorker(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetWorker(r.Context(), actorFrom(r), chi.URLParam(r, "worker_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listQueues(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListQueues(r.Context(), actorFrom(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) opsStorage(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.OpsStorage(r.Context(), actorFrom(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) opsConfig(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.OpsConfig(r.Context(), actorFrom(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createAlertRule(w http.ResponseWriter, r *http.Request) { + var req app.CreateAlertRuleRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAlertRule(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listAlertRules(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAlertRules(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getAlertRule(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetAlertRule(r.Context(), actorFrom(r), chi.URLParam(r, "alert_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateAlertRule(w http.ResponseWriter, r *http.Request) { + var req app.UpdateAlertRuleRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateAlertRule(r.Context(), actorFrom(r), chi.URLParam(r, "alert_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteAlertRule(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteAlertRule(r.Context(), actorFrom(r), chi.URLParam(r, "alert_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listAlertFirings(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAlertFirings(r.Context(), actorFrom(r), r.URL.Query().Get("state"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getAlertFiring(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetAlertFiring(r.Context(), actorFrom(r), chi.URLParam(r, "firing_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) acknowledgeAlertFiring(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.AcknowledgeAlertFiring(r.Context(), actorFrom(r), chi.URLParam(r, "firing_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createNotificationChannel(w http.ResponseWriter, r *http.Request) { + var req app.CreateNotificationChannelRequest + if !decodeJSON(w, r, &req) { + return + } + item, result, err := s.cfg.Control.CreateNotificationChannel(r.Context(), actorFrom(r), req) + if err != nil { + if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { + writeProblem(w, problem.BadRequest(requestID(r), "notification_channel_url_blocked", strings.Join(result.BlockedReasons, ","))) + return + } + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listNotificationChannels(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListNotificationChannels(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getNotificationChannel(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateNotificationChannel(w http.ResponseWriter, r *http.Request) { + var req app.UpdateNotificationChannelRequest + if !decodeJSON(w, r, &req) { + return + } + item, result, err := s.cfg.Control.UpdateNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id"), req) + if err != nil { + if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { + writeProblem(w, problem.BadRequest(requestID(r), "notification_channel_url_blocked", strings.Join(result.BlockedReasons, ","))) + return + } + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteNotificationChannel(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) testNotificationChannel(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.TestNotificationChannel(r.Context(), actorFrom(r), chi.URLParam(r, "channel_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, item) +} + +func (s *Server) listNotificationDeliveries(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListNotificationDeliveries(r.Context(), actorFrom(r), r.URL.Query().Get("state"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) listNotificationDeliveryAttempts(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListNotificationDeliveryAttempts(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) retryNotificationDelivery(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RetryNotificationDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createSIEMSink(w http.ResponseWriter, r *http.Request) { + var req app.CreateSIEMSinkRequest + if !decodeJSON(w, r, &req) { + return + } + item, result, err := s.cfg.Control.CreateSIEMSink(r.Context(), actorFrom(r), req) + if err != nil { + if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { + writeProblem(w, problem.BadRequest(requestID(r), "siem_sink_url_blocked", strings.Join(result.BlockedReasons, ","))) + return + } + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listSIEMSinks(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListSIEMSinks(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getSIEMSink(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateSIEMSink(w http.ResponseWriter, r *http.Request) { + var req app.UpdateSIEMSinkRequest + if !decodeJSON(w, r, &req) { + return + } + item, result, err := s.cfg.Control.UpdateSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id"), req) + if err != nil { + if errors.Is(err, app.ErrInvalidInput) && len(result.BlockedReasons) > 0 { + writeProblem(w, problem.BadRequest(requestID(r), "siem_sink_url_blocked", strings.Join(result.BlockedReasons, ","))) + return + } + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteSIEMSink(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) testSIEMSink(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.TestSIEMSink(r.Context(), actorFrom(r), chi.URLParam(r, "sink_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusAccepted, item) +} + +func (s *Server) listSIEMDeliveries(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListSIEMDeliveries(r.Context(), actorFrom(r), r.URL.Query().Get("state"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) listSIEMDeliveryAttempts(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListSIEMDeliveryAttempts(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) retrySIEMDelivery(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RetrySIEMDelivery(r.Context(), actorFrom(r), chi.URLParam(r, "delivery_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_provider_adapter_handlers.go b/internal/adapters/httpapi/server_provider_adapter_handlers.go new file mode 100644 index 0000000..48c8008 --- /dev/null +++ b/internal/adapters/httpapi/server_provider_adapter_handlers.go @@ -0,0 +1,88 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) createProviderAdapter(w http.ResponseWriter, r *http.Request) { + var req app.CreateProviderAdapterRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateProviderAdapter(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listProviderAdapters(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListProviderAdapters(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getProviderAdapter(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetProviderAdapter(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createAdapterVersion(w http.ResponseWriter, r *http.Request) { + var req app.CreateAdapterVersionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAdapterVersion(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listAdapterVersions(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListAdapterVersions(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) createAdapterTestVector(w http.ResponseWriter, r *http.Request) { + var req app.CreateAdapterTestVectorRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateAdapterTestVector(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), chi.URLParam(r, "version_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) transitionAdapterVersion(w http.ResponseWriter, r *http.Request) { + var req app.AdapterVersionTransitionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.TransitionAdapterVersion(r.Context(), actorFrom(r), chi.URLParam(r, "adapter_id"), chi.URLParam(r, "version_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_route_handlers.go b/internal/adapters/httpapi/server_route_handlers.go new file mode 100644 index 0000000..77b0cff --- /dev/null +++ b/internal/adapters/httpapi/server_route_handlers.go @@ -0,0 +1,103 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) createRoute(w http.ResponseWriter, r *http.Request) { + var req app.CreateRouteRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateRoute(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listRoutes(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListRoutes(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getRoute(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateRoute(w http.ResponseWriter, r *http.Request) { + var req app.UpdateRouteRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteRoute(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) listRouteVersions(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListRouteVersions(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) activateRoute(w http.ResponseWriter, r *http.Request) { + var req app.ActivateRouteRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.ActivateRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req.Reason) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) dryRunRoute(w http.ResponseWriter, r *http.Request) { + var req struct { + EventID string `json:"event_id"` + } + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DryRunRoute(r.Context(), actorFrom(r), chi.URLParam(r, "route_id"), req.EventID) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_schema_transform_handlers.go b/internal/adapters/httpapi/server_schema_transform_handlers.go new file mode 100644 index 0000000..8c6705a --- /dev/null +++ b/internal/adapters/httpapi/server_schema_transform_handlers.go @@ -0,0 +1,215 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) createEventType(w http.ResponseWriter, r *http.Request) { + var req app.CreateEventTypeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateEventType(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listEventTypes(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListEventTypes(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getEventType(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetEventType(r.Context(), actorFrom(r), chi.URLParam(r, "event_type")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateEventType(w http.ResponseWriter, r *http.Request) { + var req app.UpdateEventTypeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateEventType(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteEventType(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteEventType(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createEventSchema(w http.ResponseWriter, r *http.Request) { + var req app.CreateEventSchemaRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listEventSchemas(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListEventSchemas(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getEventSchema(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) updateEventSchema(w http.ResponseWriter, r *http.Request) { + var req app.UpdateEventSchemaRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) deleteEventSchema(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) validateEventSchema(w http.ResponseWriter, r *http.Request) { + var req app.ValidateSchemaRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.ValidateEventSchema(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) checkEventSchemaCompatibility(w http.ResponseWriter, r *http.Request) { + var req app.CheckSchemaCompatibilityRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CheckEventSchemaCompatibility(r.Context(), actorFrom(r), chi.URLParam(r, "event_type"), chi.URLParam(r, "schema_version"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createTransformation(w http.ResponseWriter, r *http.Request) { + var req app.CreateTransformationRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateTransformation(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listTransformations(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListTransformations(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getTransformation(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetTransformation(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) createTransformationVersion(w http.ResponseWriter, r *http.Request) { + var req app.CreateTransformationVersionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateTransformationVersion(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listTransformationVersions(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListTransformationVersions(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id"), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) activateTransformationVersion(w http.ResponseWriter, r *http.Request) { + var req app.ActivateTransformationVersionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.ActivateTransformationVersion(r.Context(), actorFrom(r), chi.URLParam(r, "transformation_id"), chi.URLParam(r, "version_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_sources_provider_handlers.go b/internal/adapters/httpapi/server_sources_provider_handlers.go new file mode 100644 index 0000000..5906d80 --- /dev/null +++ b/internal/adapters/httpapi/server_sources_provider_handlers.go @@ -0,0 +1,140 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/app" + + "github.com/go-chi/chi/v5" +) + +func (s *Server) createSource(w http.ResponseWriter, r *http.Request) { + var req app.CreateSourceRequest + if !decodeJSON(w, r, &req) { + return + } + source, err := s.cfg.Control.CreateSource(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, publicSource(source)) +} + +func (s *Server) listSources(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListSources(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + out := make([]map[string]any, 0, len(items)) + for _, item := range items { + out = append(out, publicSource(item)) + } + writeJSON(w, http.StatusOK, page(out)) +} + +func (s *Server) getSource(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetSource(r.Context(), actorFrom(r), chi.URLParam(r, "source_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, publicSource(item)) +} + +func (s *Server) updateSource(w http.ResponseWriter, r *http.Request) { + var req app.UpdateSourceRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.UpdateSource(r.Context(), actorFrom(r), chi.URLParam(r, "source_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, publicSource(item)) +} + +func (s *Server) deleteSource(w http.ResponseWriter, r *http.Request) { + var req app.StateChangeRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.DeleteSource(r.Context(), actorFrom(r), chi.URLParam(r, "source_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, publicSource(item)) +} + +func (s *Server) createProviderConnection(w http.ResponseWriter, r *http.Request) { + var req app.CreateProviderConnectionRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.CreateProviderConnection(r.Context(), actorFrom(r), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusCreated, item) +} + +func (s *Server) listProviderConnections(w http.ResponseWriter, r *http.Request) { + items, err := s.cfg.Control.ListProviderConnections(r.Context(), actorFrom(r), queryLimit(r)) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, page(items)) +} + +func (s *Server) getProviderConnection(w http.ResponseWriter, r *http.Request) { + item, err := s.cfg.Control.GetProviderConnection(r.Context(), actorFrom(r), chi.URLParam(r, "connection_id")) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) verifyProviderConnection(w http.ResponseWriter, r *http.Request) { + var req app.ProviderConnectionStateRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.VerifyProviderConnection(r.Context(), actorFrom(r), chi.URLParam(r, "connection_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) revokeProviderConnection(w http.ResponseWriter, r *http.Request) { + var req app.ProviderConnectionStateRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RevokeProviderConnection(r.Context(), actorFrom(r), chi.URLParam(r, "connection_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} + +func (s *Server) rotateSourceSecret(w http.ResponseWriter, r *http.Request) { + var req app.RotateSourceSecretRequest + if !decodeJSON(w, r, &req) { + return + } + item, err := s.cfg.Control.RotateSourceSecret(r.Context(), actorFrom(r), chi.URLParam(r, "source_id"), req) + if err != nil { + s.writeError(w, r, err) + return + } + writeJSON(w, http.StatusOK, item) +} diff --git a/internal/adapters/httpapi/server_system.go b/internal/adapters/httpapi/server_system.go new file mode 100644 index 0000000..cde6672 --- /dev/null +++ b/internal/adapters/httpapi/server_system.go @@ -0,0 +1,36 @@ +package httpapi + +import ( + "net/http" + + "webhookery/internal/problem" +) + +func (s *Server) health(w http.ResponseWriter, r *http.Request) { + writeJSON(w, http.StatusOK, map[string]any{"ok": true}) +} + +func (s *Server) ready(w http.ResponseWriter, r *http.Request) { + if s.cfg.Health != nil { + if err := s.cfg.Health(r.Context()); err != nil { + writeProblem(w, problem.New(http.StatusServiceUnavailable, "not_ready", "Not ready", "A required dependency is unavailable.", requestID(r), true)) + return + } + } + writeJSON(w, http.StatusOK, map[string]any{"ok": true}) +} + +func (s *Server) prometheusMetrics(w http.ResponseWriter, r *http.Request) { + metrics, err := s.cfg.Control.PublicOpsMetrics(r.Context()) + if err != nil { + writeProblem(w, problem.Internal(requestID(r))) + return + } + w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + _, _ = w.Write([]byte(formatPrometheus(metrics))) +} + +func (s *Server) openapi(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/yaml") + _, _ = w.Write(s.cfg.OpenAPI) +} diff --git a/internal/adapters/httpapi/server_test.go b/internal/adapters/httpapi/server_test.go index aa0b231..1d03511 100644 --- a/internal/adapters/httpapi/server_test.go +++ b/internal/adapters/httpapi/server_test.go @@ -5,11 +5,16 @@ import ( "context" "crypto/tls" "crypto/x509" + "encoding/base64" + "encoding/json" + "errors" "fmt" "net/http" "net/http/httptest" + "net/netip" "strings" "testing" + "time" "webhookery/internal/app" "webhookery/internal/authz" @@ -32,6 +37,73 @@ func TestOpenAPIAndRoutes(t *testing.T) { } } +func TestSystemRoutesExposeHealthReadinessAndMetrics(t *testing.T) { + server := NewServer(ServerConfig{ + Control: NewNoopControl(), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleAdmin, Scopes: []string{"*"}}), + Health: func(context.Context) error { + return nil + }, + }) + + for _, path := range []string{"/healthz", "/readyz"} { + t.Run(path, func(t *testing.T) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, path, nil) + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if !strings.Contains(rec.Body.String(), `"ok":true`) { + t.Fatalf("unexpected health body %s", rec.Body.String()) + } + }) + } + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + server.Routes().ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected metrics 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if got := rec.Header().Get("Content-Type"); !strings.Contains(got, "text/plain") { + t.Fatalf("unexpected metrics content-type %q", got) + } + if !strings.Contains(rec.Body.String(), "webhookery_events_total") { + t.Fatalf("metrics body did not include expected series: %s", rec.Body.String()) + } +} + +func TestReadyRouteReportsDependencyFailureAsRetryableProblem(t *testing.T) { + server := NewServer(ServerConfig{ + Control: NewNoopControl(), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleAdmin, Scopes: []string{"*"}}), + Health: func(context.Context) error { + return errors.New("database unavailable") + }, + }) + + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/readyz", nil) + req.Header.Set("X-Request-ID", "req_ready") + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusServiceUnavailable { + t.Fatalf("expected 503, got %d body=%s", rec.Code, rec.Body.String()) + } + for _, want := range []string{`"code":"not_ready"`, `"stable_code":"WEBHOOKERY_DURABLE_CAPTURE_UNAVAILABLE"`, `"request_id":"req_ready"`, `"retryable":true`} { + if !strings.Contains(rec.Body.String(), want) { + t.Fatalf("readiness problem %s did not contain %s", rec.Body.String(), want) + } + } + if strings.Contains(rec.Body.String(), "database unavailable") { + t.Fatalf("readiness response leaked dependency detail: %s", rec.Body.String()) + } +} + func TestControlRoutesRequireBearer(t *testing.T) { server := NewServer(ServerConfig{ Control: NewNoopControl(), @@ -47,6 +119,289 @@ func TestControlRoutesRequireBearer(t *testing.T) { } } +func TestEventsSearchParsesFiltersAndRejectsDuplicateSingletons(t *testing.T) { + store := &eventSearchControlStore{} + control := app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + server := NewServer(ServerConfig{ + Control: control, + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_1", TenantID: "ten_events", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + + req := httptest.NewRequest(http.MethodGet, "/v1/events?provider=stripe&external_id=evt_external&verification=invalid&status=dlq&received_after=2026-06-04T10:00:00Z&route_id=rte_1&delivery_id=del_1&limit=25", nil) + req.Header.Set("Authorization", "Bearer token") + rec := httptest.NewRecorder() + server.Routes().ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected search to succeed, got %d body=%s", rec.Code, rec.Body.String()) + } + if store.tenantID != "ten_events" { + t.Fatalf("expected tenant-scoped search, got %q", store.tenantID) + } + if store.req.Provider != "stripe" || store.req.ExternalID != "evt_external" || store.req.Verification != "invalid" || store.req.Status != "dlq" || store.req.RouteID != "rte_1" || store.req.DeliveryID != "del_1" || store.req.Limit != 25 { + t.Fatalf("unexpected search request: %+v", store.req) + } + if store.req.ReceivedAfter.IsZero() || store.req.ReceivedAfter.Location() != time.UTC { + t.Fatalf("received_after was not parsed as UTC: %s", store.req.ReceivedAfter) + } + + dup := httptest.NewRequest(http.MethodGet, "/v1/events?verification=valid&verification=invalid", nil) + dup.Header.Set("Authorization", "Bearer token") + rec = httptest.NewRecorder() + server.Routes().ServeHTTP(rec, dup) + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected duplicate singleton query to fail, got %d body=%s", rec.Code, rec.Body.String()) + } +} + +func TestAuthenticatedReadRoutesReturnJSON(t *testing.T) { + server := testServerWithActor(authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"*"}}) + routes := []string{ + "/v1/auth/session", + "/v1/auth/sessions", + "/v1/identity-providers", + "/v1/identity-providers/idp_1", + "/v1/scim-tokens", + "/v1/role-bindings", + "/v1/access-policies", + "/v1/api-keys", + "/v1/producer-clients", + "/v1/producer-clients/pcl_1", + "/v1/producer-mtls-identities", + "/v1/producer-mtls-identities/pmi_1", + "/v1/sources", + "/v1/sources/src_1", + "/v1/provider-connections", + "/v1/provider-connections/pcn_1", + "/v1/adapters", + "/v1/adapters/pad_1", + "/v1/adapters/pad_1/versions", + "/v1/endpoints", + "/v1/endpoints/end_1", + "/v1/subscriptions", + "/v1/subscriptions/sub_1", + "/v1/retry-policies", + "/v1/retry-policies/rtp_1", + "/v1/routes", + "/v1/routes/rte_1", + "/v1/routes/rte_1/versions", + "/v1/event-types", + "/v1/event-types/invoice.paid", + "/v1/event-types/invoice.paid/schemas", + "/v1/event-types/invoice.paid/schemas/2026-05-01", + "/v1/events", + "/v1/events/evt_1", + "/v1/events/evt_1/timeline", + "/v1/incidents", + "/v1/incidents/inc_1", + "/v1/incidents/inc_1/report", + "/v1/transformations", + "/v1/transformations/trn_1", + "/v1/transformations/trn_1/versions", + "/v1/deliveries", + "/v1/deliveries/del_1/attempts", + "/v1/delivery-attempts/att_1", + "/v1/replay-jobs", + "/v1/replay-approval-policies", + "/v1/reconciliation-jobs", + "/v1/reconciliation-jobs/rec_1", + "/v1/reconciliation-jobs/rec_1/items", + "/v1/dead-letter", + "/v1/quarantine", + "/v1/audit-events", + "/v1/audit-chain/head", + "/v1/audit-chain/anchors", + "/v1/audit-chain/anchors/anc_1", + "/v1/audit-exports", + "/v1/audit-exports/exp_1", + "/v1/admin/retention-policies", + "/v1/endpoint-health", + "/v1/ops/metrics", + "/v1/ops/metrics/rollups?metric_name=deliveries", + "/v1/ops/storage", + "/v1/ops/workers", + "/v1/ops/workers/wrk_1", + "/v1/ops/queues", + "/v1/alerts", + "/v1/alerts/alr_1", + "/v1/alert-firings", + "/v1/alert-firings/afr_1", + "/v1/notification-channels", + "/v1/notification-channels/nch_1", + "/v1/notification-deliveries", + "/v1/notification-deliveries/ndel_1/attempts", + "/v1/siem-sinks", + "/v1/siem-sinks/snk_1", + "/v1/siem-deliveries", + "/v1/siem-deliveries/sdel_1/attempts", + } + expectedStatus := map[string]int{ + "/v1/auth/session": http.StatusUnauthorized, + "/v1/auth/sessions": http.StatusBadRequest, + "/v1/identity-providers": http.StatusBadRequest, + "/v1/identity-providers/idp_1": http.StatusBadRequest, + "/v1/scim-tokens": http.StatusBadRequest, + "/v1/role-bindings": http.StatusBadRequest, + "/v1/access-policies": http.StatusBadRequest, + "/v1/producer-clients": http.StatusNotFound, + "/v1/producer-clients/pcl_1": http.StatusNotFound, + "/v1/producer-mtls-identities": http.StatusNotFound, + "/v1/producer-mtls-identities/pmi_1": http.StatusNotFound, + } + for _, path := range routes { + t.Run(path, func(t *testing.T) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, path, nil) + req.Header.Set("Authorization", "Bearer token") + + server.Routes().ServeHTTP(rec, req) + + wantStatus := http.StatusOK + if status := expectedStatus[path]; status != 0 { + wantStatus = status + } + if rec.Code != wantStatus { + t.Fatalf("expected %d, got %d body=%s", wantStatus, rec.Code, rec.Body.String()) + } + if got := rec.Header().Get("Content-Type"); !strings.Contains(got, "application/json") { + t.Fatalf("expected JSON response, got content-type %q body=%s", got, rec.Body.String()) + } + }) + } +} + +func TestAuthenticatedMutationRoutesPreserveContracts(t *testing.T) { + server := testServerWithActor(authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"*"}}) + tests := []struct { + name string + method string + path string + body string + wantStatus int + }{ + {name: "create api key", method: http.MethodPost, path: "/v1/api-keys", body: `{"name":"ops","user_id":"usr_2","email":"ops@example.com","role":"operator","scopes":["events:read"]}`, wantStatus: http.StatusCreated}, + {name: "revoke api key", method: http.MethodPost, path: "/v1/api-keys/key_1:revoke", body: `{"reason":"rotate"}`, wantStatus: http.StatusOK}, + {name: "create identity provider unavailable", method: http.MethodPost, path: "/v1/identity-providers", body: `{"name":"corp","provider_type":"oidc","issuer_url":"https://idp.example","authorization_endpoint":"https://idp.example/auth","token_endpoint":"https://idp.example/token","jwks_uri":"https://idp.example/keys","client_id":"webhookery","client_secret":"oidc-secret","allowed_email_domains":["example.com"]}`, wantStatus: http.StatusBadRequest}, + {name: "update identity provider unavailable", method: http.MethodPatch, path: "/v1/identity-providers/idp_1", body: `{"name":"corp-renamed","reason":"rename"}`, wantStatus: http.StatusBadRequest}, + {name: "disable identity provider unavailable", method: http.MethodDelete, path: "/v1/identity-providers/idp_1", body: `{"reason":"disable"}`, wantStatus: http.StatusBadRequest}, + {name: "test identity provider unavailable", method: http.MethodPost, path: "/v1/identity-providers/idp_1:test", body: `{"reason":"smoke"}`, wantStatus: http.StatusBadRequest}, + {name: "create scim token unavailable", method: http.MethodPost, path: "/v1/scim-tokens", body: `{"name":"directory-sync"}`, wantStatus: http.StatusBadRequest}, + {name: "revoke scim token unavailable", method: http.MethodDelete, path: "/v1/scim-tokens/sct_1", body: `{"reason":"rotate"}`, wantStatus: http.StatusBadRequest}, + {name: "create role binding unavailable", method: http.MethodPost, path: "/v1/role-bindings", body: `{"principal_type":"user","principal_id":"usr_2","role":"operator","resource_family":"tenant","resource_id":"ten_1","environment":"prod","reason":"delegate"}`, wantStatus: http.StatusBadRequest}, + {name: "update role binding unavailable", method: http.MethodPatch, path: "/v1/role-bindings/rbd_1", body: `{"role":"viewer","reason":"least privilege"}`, wantStatus: http.StatusBadRequest}, + {name: "disable role binding unavailable", method: http.MethodDelete, path: "/v1/role-bindings/rbd_1", body: `{"reason":"remove"}`, wantStatus: http.StatusBadRequest}, + {name: "create access policy unavailable", method: http.MethodPost, path: "/v1/access-policies", body: `{"name":"deny-export","action":"audit:export","effect":"deny","resource_family":"audit_export","environment":"prod","reason":"policy"}`, wantStatus: http.StatusBadRequest}, + {name: "update access policy unavailable", method: http.MethodPatch, path: "/v1/access-policies/apr_1", body: `{"effect":"allow","reason":"policy change"}`, wantStatus: http.StatusBadRequest}, + {name: "disable access policy unavailable", method: http.MethodDelete, path: "/v1/access-policies/apr_1", body: `{"reason":"retire"}`, wantStatus: http.StatusBadRequest}, + {name: "authz explain unavailable", method: http.MethodPost, path: "/v1/authz:explain", body: `{"actor_id":"usr_2","action":"events:read","resource_family":"event","resource_id":"evt_1","environment":"prod"}`, wantStatus: http.StatusBadRequest}, + {name: "create producer client unavailable", method: http.MethodPost, path: "/v1/producer-clients", body: `{"name":"billing","source_id":"src_1","scopes":["events:write"],"token_ttl_seconds":900}`, wantStatus: http.StatusNotFound}, + {name: "update producer client unavailable", method: http.MethodPatch, path: "/v1/producer-clients/pcl_1", body: `{"name":"billing-v2","reason":"rename"}`, wantStatus: http.StatusNotFound}, + {name: "delete producer client unavailable", method: http.MethodDelete, path: "/v1/producer-clients/pcl_1", body: `{"reason":"retire"}`, wantStatus: http.StatusNotFound}, + {name: "rotate producer client secret unavailable", method: http.MethodPost, path: "/v1/producer-clients/pcl_1/secrets:rotate", body: `{"reason":"rotate"}`, wantStatus: http.StatusNotFound}, + {name: "create producer mtls unavailable", method: http.MethodPost, path: "/v1/producer-mtls-identities", body: `{"name":"billing","source_id":"src_1","certificate_pem":"not a cert"}`, wantStatus: http.StatusNotFound}, + {name: "update producer mtls unavailable", method: http.MethodPatch, path: "/v1/producer-mtls-identities/pmi_1", body: `{"name":"billing-v2","reason":"rename"}`, wantStatus: http.StatusNotFound}, + {name: "delete producer mtls unavailable", method: http.MethodDelete, path: "/v1/producer-mtls-identities/pmi_1", body: `{"reason":"retire"}`, wantStatus: http.StatusNotFound}, + {name: "verify producer mtls unavailable", method: http.MethodPost, path: "/v1/producer-mtls-identities/pmi_1:verify", body: `{"certificate_pem":"not a cert"}`, wantStatus: http.StatusNotFound}, + {name: "create source", method: http.MethodPost, path: "/v1/sources", body: `{"name":"stripe-primary","provider":"stripe","adapter":"stripe","verification_secret":"whsec_test"}`, wantStatus: http.StatusCreated}, + {name: "update source", method: http.MethodPatch, path: "/v1/sources/src_1", body: `{"name":"stripe-renamed","state":"active","reason":"rename"}`, wantStatus: http.StatusOK}, + {name: "delete source", method: http.MethodDelete, path: "/v1/sources/src_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "rotate source secret", method: http.MethodPost, path: "/v1/sources/src_1/secrets:rotate", body: `{"new_secret":"next-secret","reason":"rotate","grace_period_hours":1}`, wantStatus: http.StatusOK}, + {name: "create provider connection", method: http.MethodPost, path: "/v1/provider-connections", body: `{"name":"stripe-api","provider":"stripe","credential_type":"api_key","credential":"sk_test_secret","config":{"source_id":"src_1"}}`, wantStatus: http.StatusCreated}, + {name: "verify provider connection", method: http.MethodPost, path: "/v1/provider-connections/pcn_1:verify", body: `{"reason":"check"}`, wantStatus: http.StatusOK}, + {name: "revoke provider connection", method: http.MethodPost, path: "/v1/provider-connections/pcn_1:revoke", body: `{"reason":"rotate"}`, wantStatus: http.StatusOK}, + {name: "create adapter", method: http.MethodPost, path: "/v1/adapters", body: `{"name":"custom-provider","kind":"declarative","description":"test adapter","risk_level":"low"}`, wantStatus: http.StatusCreated}, + {name: "create adapter version", method: http.MethodPost, path: "/v1/adapters/pad_1/versions", body: `{"version":"2026-05-28","definition":{"provider":"custom"},"reason":"initial","risk_level":"low"}`, wantStatus: http.StatusCreated}, + {name: "create adapter test vector", method: http.MethodPost, path: "/v1/adapters/pad_1/versions/adv_1/test-vectors", body: `{"name":"valid-signature","purpose":"happy path","request":{"body":"{}"},"expected":{"verified":true}}`, wantStatus: http.StatusCreated}, + {name: "transition adapter", method: http.MethodPost, path: "/v1/adapters/pad_1/versions/adv_1:transition", body: `{"action":"activate","reason":"promote"}`, wantStatus: http.StatusOK}, + {name: "create endpoint", method: http.MethodPost, path: "/v1/endpoints", body: `{"name":"receiver","url":"https://receiver.example/hook"}`, wantStatus: http.StatusCreated}, + {name: "update endpoint", method: http.MethodPatch, path: "/v1/endpoints/end_1", body: `{"name":"receiver-renamed","url":"https://receiver.example/hook","state":"active","reason":"rename"}`, wantStatus: http.StatusOK}, + {name: "delete endpoint", method: http.MethodDelete, path: "/v1/endpoints/end_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "validate endpoint url", method: http.MethodPost, path: "/v1/endpoints:validate-url", body: `{"url":"https://receiver.example/hook"}`, wantStatus: http.StatusOK}, + {name: "test endpoint", method: http.MethodPost, path: "/v1/endpoints/end_1:test", body: `{"reason":"smoke"}`, wantStatus: http.StatusAccepted}, + {name: "rotate endpoint secret", method: http.MethodPost, path: "/v1/endpoints/end_1/secrets:rotate", body: `{"reason":"rotate","grace_period_hours":1}`, wantStatus: http.StatusOK}, + {name: "create subscription", method: http.MethodPost, path: "/v1/subscriptions", body: `{"endpoint_id":"end_1","event_types":["invoice.paid"],"payload_format":"canonical_json"}`, wantStatus: http.StatusCreated}, + {name: "update subscription", method: http.MethodPatch, path: "/v1/subscriptions/sub_1", body: `{"state":"disabled","reason":"pause"}`, wantStatus: http.StatusOK}, + {name: "delete subscription", method: http.MethodDelete, path: "/v1/subscriptions/sub_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "create retry policy", method: http.MethodPost, path: "/v1/retry-policies", body: `{"name":"standard","max_attempts":3,"max_duration_seconds":3600,"initial_delay_seconds":1,"max_delay_seconds":60,"state":"active"}`, wantStatus: http.StatusCreated}, + {name: "update retry policy", method: http.MethodPatch, path: "/v1/retry-policies/rtp_1", body: `{"max_attempts":6,"reason":"tune"}`, wantStatus: http.StatusOK}, + {name: "delete retry policy", method: http.MethodDelete, path: "/v1/retry-policies/rtp_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "create route", method: http.MethodPost, path: "/v1/routes", body: `{"source_id":"src_1","name":"invoice-route","priority":10,"event_types":["invoice.paid"],"endpoint_id":"end_1","state":"active"}`, wantStatus: http.StatusCreated}, + {name: "update route", method: http.MethodPatch, path: "/v1/routes/rte_1", body: `{"priority":20,"reason":"reprioritize"}`, wantStatus: http.StatusOK}, + {name: "delete route", method: http.MethodDelete, path: "/v1/routes/rte_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "activate route", method: http.MethodPost, path: "/v1/routes/rte_1:activate", body: `{"reason":"publish"}`, wantStatus: http.StatusOK}, + {name: "dry run route", method: http.MethodPost, path: "/v1/routes/rte_1:dry-run", body: `{"event_id":"evt_1"}`, wantStatus: http.StatusOK}, + {name: "create event type", method: http.MethodPost, path: "/v1/event-types", body: `{"name":"invoice.paid","description":"Invoice paid"}`, wantStatus: http.StatusCreated}, + {name: "update event type", method: http.MethodPatch, path: "/v1/event-types/invoice.paid", body: `{"description":"Invoice paid v2","state":"active","reason":"document"}`, wantStatus: http.StatusOK}, + {name: "delete event type", method: http.MethodDelete, path: "/v1/event-types/invoice.paid", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "create schema", method: http.MethodPost, path: "/v1/event-types/invoice.paid/schemas", body: `{"version":"2026-05-01","schema":"{\"type\":\"object\"}"}`, wantStatus: http.StatusCreated}, + {name: "update schema", method: http.MethodPatch, path: "/v1/event-types/invoice.paid/schemas/2026-05-01", body: `{"state":"deprecated","reason":"replace"}`, wantStatus: http.StatusOK}, + {name: "delete schema", method: http.MethodDelete, path: "/v1/event-types/invoice.paid/schemas/2026-05-01", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "validate schema", method: http.MethodPost, path: "/v1/event-types/invoice.paid/schemas/2026-05-01:validate", body: `{"payload":"{\"id\":\"evt_1\"}"}`, wantStatus: http.StatusOK}, + {name: "check schema compatibility", method: http.MethodPost, path: "/v1/event-types/invoice.paid/schemas/2026-05-01:check-compatibility", body: `{"new_schema":"{\"type\":\"object\"}"}`, wantStatus: http.StatusOK}, + {name: "create transformation", method: http.MethodPost, path: "/v1/transformations", body: `{"name":"redact-email","operations":[{"op":"redact","path":"/data/email"}]}`, wantStatus: http.StatusCreated}, + {name: "create transformation version", method: http.MethodPost, path: "/v1/transformations/trn_1/versions", body: `{"operations":[{"op":"redact","path":"/data/email"}]}`, wantStatus: http.StatusCreated}, + {name: "activate transformation version", method: http.MethodPost, path: "/v1/transformations/trn_1/versions/trv_1:activate", body: `{"reason":"publish"}`, wantStatus: http.StatusOK}, + {name: "retry delivery", method: http.MethodPost, path: "/v1/deliveries/del_1:retry", body: `{"reason":"retry"}`, wantStatus: http.StatusAccepted}, + {name: "cancel delivery", method: http.MethodPost, path: "/v1/deliveries/del_1:cancel", body: `{"reason":"cancel"}`, wantStatus: http.StatusOK}, + {name: "dry run replay", method: http.MethodPost, path: "/v1/replay-jobs:dry-run", body: `{"event_id":"evt_1","reason_code":"operator_requested","reason":"inspect"}`, wantStatus: http.StatusOK}, + {name: "preview replay", method: http.MethodPost, path: "/v1/replay-jobs/preview", body: `{"event_id":"evt_1","reason_code":"operator_requested","reason":"inspect"}`, wantStatus: http.StatusOK}, + {name: "create reconciliation", method: http.MethodPost, path: "/v1/reconciliation-jobs", body: `{"connection_id":"pcn_1","reason":"recover"}`, wantStatus: http.StatusCreated}, + {name: "approve replay", method: http.MethodPost, path: "/v1/replay-jobs/rpl_1:approve", body: `{"reason":"approve"}`, wantStatus: http.StatusOK}, + {name: "pause replay", method: http.MethodPost, path: "/v1/replay-jobs/rpl_1:pause", body: `{"reason":"pause"}`, wantStatus: http.StatusOK}, + {name: "resume replay", method: http.MethodPost, path: "/v1/replay-jobs/rpl_1:resume", body: `{"reason":"resume"}`, wantStatus: http.StatusOK}, + {name: "cancel replay", method: http.MethodPost, path: "/v1/replay-jobs/rpl_1:cancel", body: `{"reason":"cancel"}`, wantStatus: http.StatusOK}, + {name: "create replay approval policy", method: http.MethodPost, path: "/v1/replay-approval-policies", body: `{"scope_type":"source","scope_id":"src_1","reason":"sensitive source"}`, wantStatus: http.StatusCreated}, + {name: "disable replay approval policy", method: http.MethodDelete, path: "/v1/replay-approval-policies/rap_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "dry run reconciliation", method: http.MethodPost, path: "/v1/reconciliation-jobs:dry-run", body: `{"connection_id":"pcn_1","reason":"preview"}`, wantStatus: http.StatusOK}, + {name: "cancel reconciliation", method: http.MethodPost, path: "/v1/reconciliation-jobs/rec_1:cancel", body: `{"reason":"stop"}`, wantStatus: http.StatusOK}, + {name: "bulk release dead letter", method: http.MethodPost, path: "/v1/dead-letter:bulk-release", body: `{"entry_ids":["dlq_1"],"reason_code":"incident_recovery","reason":"release"}`, wantStatus: http.StatusAccepted}, + {name: "approve quarantine", method: http.MethodPost, path: "/v1/quarantine/qrn_1:approve", body: `{"reason":"safe","route_after_release":true}`, wantStatus: http.StatusOK}, + {name: "reject quarantine", method: http.MethodPost, path: "/v1/quarantine/qrn_1:reject", body: `{"reason":"reject"}`, wantStatus: http.StatusOK}, + {name: "verify audit chain", method: http.MethodPost, path: "/v1/audit-chain:verify", body: `{"from_sequence":1,"to_sequence":2}`, wantStatus: http.StatusOK}, + {name: "anchor audit chain", method: http.MethodPost, path: "/v1/audit-chain:anchor", body: `{"from_sequence":1,"to_sequence":2,"reason":"daily"}`, wantStatus: http.StatusCreated}, + {name: "create audit export", method: http.MethodPost, path: "/v1/audit-events:export", body: `{"include_raw_payloads":false,"include_timelines":true,"reason":"support"}`, wantStatus: http.StatusAccepted}, + {name: "create incident", method: http.MethodPost, path: "/v1/incidents", body: `{"title":"Stripe payment webhook failed","reason":"support investigation"}`, wantStatus: http.StatusCreated}, + {name: "add incident event", method: http.MethodPost, path: "/v1/incidents/inc_1/events", body: `{"event_id":"evt_1","reason":"attach failed payment"}`, wantStatus: http.StatusCreated}, + {name: "remove incident event", method: http.MethodDelete, path: "/v1/incidents/inc_1/events/evt_1", body: `{"reason":"not related"}`, wantStatus: http.StatusOK}, + {name: "generate incident report", method: http.MethodPost, path: "/v1/incidents/inc_1/generate-report", body: `{"reason":"support handoff"}`, wantStatus: http.StatusCreated}, + {name: "create incident evidence export", method: http.MethodPost, path: "/v1/incidents/inc_1/evidence-export", body: `{"reason":"customer evidence"}`, wantStatus: http.StatusAccepted}, + {name: "create retention", method: http.MethodPost, path: "/v1/admin/retention-policies", body: `{"resource_type":"raw_payload","retention_days":30}`, wantStatus: http.StatusCreated}, + {name: "update retention", method: http.MethodPatch, path: "/v1/admin/retention-policies/ret_1", body: `{"retention_days":30}`, wantStatus: http.StatusOK}, + {name: "create alert", method: http.MethodPost, path: "/v1/alerts", body: `{"name":"dlq-open","rule_type":"dead_letter_open","threshold":1,"comparator":">=","window_seconds":60,"state":"active","channel_ids":["nch_1"]}`, wantStatus: http.StatusCreated}, + {name: "update alert", method: http.MethodPatch, path: "/v1/alerts/alr_1", body: `{"threshold":2,"reason":"tune"}`, wantStatus: http.StatusOK}, + {name: "delete alert", method: http.MethodDelete, path: "/v1/alerts/alr_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "ack alert", method: http.MethodPost, path: "/v1/alert-firings/afr_1:acknowledge", body: `{"reason":"seen"}`, wantStatus: http.StatusOK}, + {name: "create notification", method: http.MethodPost, path: "/v1/notification-channels", body: `{"name":"ops-webhook","channel_type":"webhook","url":"https://signals.example/hook","signing_secret":"notify-secret-123"}`, wantStatus: http.StatusCreated}, + {name: "update notification", method: http.MethodPatch, path: "/v1/notification-channels/nch_1", body: `{"name":"ops-webhook-2","reason":"rename"}`, wantStatus: http.StatusOK}, + {name: "delete notification", method: http.MethodDelete, path: "/v1/notification-channels/nch_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "test notification", method: http.MethodPost, path: "/v1/notification-channels/nch_1:test", body: `{"reason":"smoke"}`, wantStatus: http.StatusAccepted}, + {name: "retry notification", method: http.MethodPost, path: "/v1/notification-deliveries/ndel_1:retry", body: `{"reason":"retry"}`, wantStatus: http.StatusOK}, + {name: "create siem", method: http.MethodPost, path: "/v1/siem-sinks", body: `{"name":"secops","sink_type":"webhook","url":"https://siem.example/ingest","signing_secret":"siem-secret-1234"}`, wantStatus: http.StatusCreated}, + {name: "update siem", method: http.MethodPatch, path: "/v1/siem-sinks/snk_1", body: `{"name":"secops-2","reason":"rename"}`, wantStatus: http.StatusOK}, + {name: "delete siem", method: http.MethodDelete, path: "/v1/siem-sinks/snk_1", body: `{"reason":"retire"}`, wantStatus: http.StatusOK}, + {name: "test siem", method: http.MethodPost, path: "/v1/siem-sinks/snk_1:test", body: `{"reason":"smoke"}`, wantStatus: http.StatusAccepted}, + {name: "retry siem", method: http.MethodPost, path: "/v1/siem-deliveries/sdel_1:retry", body: `{"reason":"retry"}`, wantStatus: http.StatusOK}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(tt.method, tt.path, bytes.NewBufferString(tt.body)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != tt.wantStatus { + t.Fatalf("expected %d, got %d body=%s", tt.wantStatus, rec.Code, rec.Body.String()) + } + if got := rec.Header().Get("Content-Type"); !strings.Contains(got, "application/json") { + t.Fatalf("expected JSON response, got content-type %q body=%s", got, rec.Body.String()) + } + }) + } +} + func TestProductEventSourceIDExtractionPreservesRawBodyPath(t *testing.T) { sourceID := productSourceID([]byte(`{"source_id":"src_internal","id":"evt_1"}`)) if sourceID != "src_internal" { @@ -271,6 +626,46 @@ func TestPrometheusAuditChainMetricsAreAggregate(t *testing.T) { } } +func TestPrometheusMetricLabelsDoNotExposeSecretShapedValues(t *testing.T) { + body := formatPrometheus(domain.OpsMetrics{ + DeliveriesByState: map[string]int64{ + "scheduled": 2, + "whsec_secret_marker": 1, + }, + ReplayJobsByState: map[string]int64{ + "pending_approval": 3, + "sk_test_secret": 4, + }, + ReconciliationJobsByState: map[string]int64{ + "running": 5, + "tenant_secret_state": 6, + }, + ReconciliationItemsByOutcome: map[string]int64{ + "matched": 7, + "raw-body-secret-out": 8, + }, + }) + for _, forbidden := range []string{"whsec_secret_marker", "sk_test_secret", "tenant_secret_state", "raw-body-secret-out"} { + if strings.Contains(body, forbidden) { + t.Fatalf("public metrics leaked secret-shaped label %q:\n%s", forbidden, body) + } + } + for _, want := range []string{ + `webhookery_deliveries{state="scheduled"} 2`, + `webhookery_deliveries{state="unknown"} 1`, + `webhookery_replay_jobs{state="pending_approval"} 3`, + `webhookery_replay_jobs{state="unknown"} 4`, + `webhookery_reconciliation_jobs{state="running"} 5`, + `webhookery_reconciliation_jobs{state="unknown"} 6`, + `webhookery_reconciliation_items{outcome="matched"} 7`, + `webhookery_reconciliation_items{outcome="unknown"} 8`, + } { + if !strings.Contains(body, want) { + t.Fatalf("missing sanitized metric line %q in:\n%s", want, body) + } + } +} + func TestOpsConfigRouteReturnsRedactedRuntimeMetadata(t *testing.T) { control := app.NewControlServiceWithRuntimeConfig(noopControlStore{}, ssrf.Validator{Resolver: ssrf.StaticResolver{}}, domain.OpsConfig{ Environment: "production", @@ -363,6 +758,36 @@ func TestIngressRejectsTooManyHeadersBeforeCapture(t *testing.T) { } } +func TestIngestRouteDispatchesTenantAndProviderAliases(t *testing.T) { + store := &routeDispatchIngestStore{} + server := NewServer(ServerConfig{ + Control: NewNoopControl(), + Ingest: app.NewIngestService(store, app.SystemClock{}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + + generic := httptest.NewRecorder() + genericReq := httptest.NewRequest(http.MethodPost, "/v1/ingest/ten_route/src_route", strings.NewReader(`{"specversion":"1.0","id":"evt_1","type":"thing.created","source":"tests"}`)) + server.Routes().ServeHTTP(generic, genericReq) + if generic.Code != http.StatusOK { + t.Fatalf("expected generic tenant ingest to succeed, got %d body=%s", generic.Code, generic.Body.String()) + } + if store.lastTenantID != "ten_route" || store.lastSourceID != "src_route" || store.providerLookupCalled { + t.Fatalf("generic route used wrong lookup path: %+v", store) + } + + store.providerLookupCalled = false + provider := httptest.NewRecorder() + providerReq := httptest.NewRequest(http.MethodPost, "/v1/ingest/cloudevents/src_route", strings.NewReader(`{"specversion":"1.0","id":"evt_2","type":"thing.created","source":"tests"}`)) + server.Routes().ServeHTTP(provider, providerReq) + if provider.Code != http.StatusOK { + t.Fatalf("expected provider alias ingest to succeed, got %d body=%s", provider.Code, provider.Body.String()) + } + if !store.providerLookupCalled || store.lastProvider != "cloudevents" || store.lastProviderSourceID != "src_route" { + t.Fatalf("provider alias route used wrong lookup path: %+v", store) + } +} + func TestAuditExportWithRawRequiresRawScope(t *testing.T) { server := testServerWithActor(authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleAdmin, Scopes: []string{"audit:read"}}) rec := httptest.NewRecorder() @@ -423,6 +848,250 @@ func TestNormalizedEventBodyRequiresRawScope(t *testing.T) { } } +func TestRawPayloadRequiresRawScopeBeforeStoreAccess(t *testing.T) { + store := &rawPayloadControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/v1/events/evt_1/raw", nil) + req.Header.Set("Authorization", "Bearer token") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusForbidden { + t.Fatalf("expected 403, got %d body=%s", rec.Code, rec.Body.String()) + } + if store.rawCalled { + t.Fatal("raw payload store must not be called without events:raw") + } +} + +func TestRawPayloadEndpointRequiresReasonBeforeStoreAccess(t *testing.T) { + store := &rawPayloadControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_raw", TenantID: "ten_raw", Role: authz.RoleOwner, Scopes: []string{"events:raw"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/v1/events/evt_raw/raw", nil) + req.Header.Set("Authorization", "Bearer token") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String()) + } + if store.rawCalled { + t.Fatal("raw payload store must not be called without reason") + } +} + +func TestRawPayloadEndpointEncodesBodyAndTenantActorContext(t *testing.T) { + store := &rawPayloadControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_raw", TenantID: "ten_raw", Role: authz.RoleOwner, Scopes: []string{"events:raw"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "/v1/events/evt_raw/raw?reason=support+case", nil) + req.Header.Set("Authorization", "Bearer token") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if !store.rawCalled || store.tenantID != "ten_raw" || store.eventID != "evt_raw" || store.actorID != "usr_raw" || store.reason != "support case" { + t.Fatalf("raw payload store called with wrong context: %+v", store) + } + wantBody := base64.StdEncoding.EncodeToString([]byte("raw evidence bytes")) + body := rec.Body.String() + if !strings.Contains(body, `"body_base64":"`+wantBody+`"`) { + t.Fatalf("raw payload body was not base64 encoded: %s", body) + } + if strings.Contains(body, "raw evidence bytes") { + t.Fatalf("raw payload response leaked plaintext body outside base64 field: %s", body) + } +} + +func TestCrossTenantEvidenceAccessReturnsNotFoundWithoutLeak(t *testing.T) { + store := &tenantIsolationControlStore{ownerTenantID: "ten_owner", foreignEventID: "evt_foreign"} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ + ID: "usr_security", + TenantID: "ten_requester", + Role: authz.RoleSecurity, + Scopes: []string{"events:read", "events:raw", "incidents:write"}, + }), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + + cases := []struct { + name string + method string + path string + body string + }{ + {name: "event detail", method: http.MethodGet, path: "/v1/events/evt_foreign"}, + {name: "raw payload", method: http.MethodGet, path: "/v1/events/evt_foreign/raw?reason=security+review"}, + {name: "incident link", method: http.MethodPost, path: "/v1/incidents/inc_requester/events", body: `{"event_id":"evt_foreign","reason":"investigate"}`}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + rec := httptest.NewRecorder() + req := httptest.NewRequest(tc.method, tc.path, bytes.NewBufferString(tc.body)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusNotFound { + t.Fatalf("expected 404, got %d body=%s", rec.Code, rec.Body.String()) + } + body := rec.Body.String() + for _, forbidden := range []string{"evt_foreign", "ten_owner", "raw-secret"} { + if strings.Contains(body, forbidden) { + t.Fatalf("wrong-tenant response leaked %q: %s", forbidden, body) + } + } + }) + } + if store.lastEventTenantID != "ten_requester" || store.lastRawTenantID != "ten_requester" || store.lastIncidentTenantID != "ten_requester" { + t.Fatalf("wrong-tenant attempts were not scoped to actor tenant: %+v", store) + } + if store.addIncidentCalled { + t.Fatal("foreign event must not be linked into an incident") + } +} + +func TestCreateReplayPropagatesReasonCodeReasonAndConfig(t *testing.T) { + store := &replayControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_replay", TenantID: "ten_replay", Role: authz.RoleOperator, Scopes: []string{"replay:write"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/replay-jobs", bytes.NewBufferString(`{"event_id":"evt_1","reason_code":"receiver_fixed","reason":"customer fixed receiver","config_mode":"original","rate_limit_per_minute":25}`)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusAccepted { + t.Fatalf("expected 202, got %d body=%s", rec.Code, rec.Body.String()) + } + if !store.replayCalled || store.tenantID != "ten_replay" || store.actorID != "usr_replay" { + t.Fatalf("replay store called with wrong context: %+v", store) + } + if store.replayReq.EventID != "evt_1" || store.replayReq.ReasonCode != app.ReplayReasonReceiverFixed || store.replayReq.Reason != "customer fixed receiver" || store.replayReq.ConfigMode != app.ReplayConfigOriginal || store.replayReq.RateLimitPerMinute != 25 { + t.Fatalf("replay request was not propagated: %+v", store.replayReq) + } +} + +func TestCreateReplayRejectsMissingReasonBeforeStoreAccess(t *testing.T) { + store := &replayControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_replay", TenantID: "ten_replay", Role: authz.RoleOperator, Scopes: []string{"replay:write"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/replay-jobs", bytes.NewBufferString(`{"event_id":"evt_1"}`)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String()) + } + if store.replayCalled { + t.Fatal("missing replay reason must be rejected before store side effects") + } +} + +func TestCreateReplayRejectsMissingReasonCodeBeforeStoreAccess(t *testing.T) { + store := &replayControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_replay", TenantID: "ten_replay", Role: authz.RoleOperator, Scopes: []string{"replay:write"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/replay-jobs", bytes.NewBufferString(`{"event_id":"evt_1","reason":"customer fixed receiver"}`)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("expected 400, got %d body=%s", rec.Code, rec.Body.String()) + } + if store.replayCalled { + t.Fatal("missing replay reason code must be rejected before store side effects") + } +} + +func TestDeadLetterReleasePropagatesReasonCodeAndReason(t *testing.T) { + store := &replayControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_ops", TenantID: "ten_ops", Role: authz.RoleOperator, Scopes: []string{"deliveries:retry"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/dead-letter/dlq_1:release", bytes.NewBufferString(`{"reason_code":"receiver_fixed","reason":"receiver recovered"}`)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusAccepted { + t.Fatalf("expected 202, got %d body=%s", rec.Code, rec.Body.String()) + } + if !store.deadLetterCalled || store.entryID != "dlq_1" || store.reasonCode != app.ReplayReasonReceiverFixed || store.reason != "receiver recovered" || store.tenantID != "ten_ops" || store.actorID != "usr_ops" { + t.Fatalf("dead-letter release used wrong context: %+v", store) + } +} + +func TestDeadLetterReleaseRequiresRetryScopeBeforeStoreAccess(t *testing.T) { + store := &replayControlStore{} + server := NewServer(ServerConfig{ + Control: app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}), + Ingest: app.NewIngestService(&fakeIngestStore{}, app.SystemClock{}), + Auth: app.NewStaticAuthenticator("token", authz.Actor{ID: "usr_support", TenantID: "ten_ops", Role: authz.RoleSupport, Scopes: []string{"deliveries:read"}}), + OpenAPI: []byte("openapi: 3.1.0\n"), + }) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodPost, "/v1/dead-letter/dlq_1:release", bytes.NewBufferString(`{"reason_code":"receiver_fixed","reason":"receiver recovered"}`)) + req.Header.Set("Authorization", "Bearer token") + req.Header.Set("Content-Type", "application/json") + + server.Routes().ServeHTTP(rec, req) + + if rec.Code != http.StatusForbidden { + t.Fatalf("expected 403, got %d body=%s", rec.Code, rec.Body.String()) + } + if store.deadLetterCalled { + t.Fatal("dead-letter release store must not be called without deliveries:retry") + } +} + func TestTransformationWriteRequiresRoutesWrite(t *testing.T) { server := testServerWithActor(authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleDeveloper, Scopes: []string{"routes:read"}}) rec := httptest.NewRecorder() @@ -633,6 +1302,31 @@ func (f *acceptingIngestStore) CaptureInbound(context.Context, app.CaptureInboun return app.CaptureInboundResult{EventID: "evt_1", ReceiptID: "rcp_1", RawPayloadID: "raw_1", DedupeStatus: domain.DedupeUnique}, nil } +type routeDispatchIngestStore struct { + providerLookupCalled bool + lastTenantID string + lastSourceID string + lastProvider string + lastProviderSourceID string +} + +func (f *routeDispatchIngestStore) FindSource(_ context.Context, tenantID, sourceID string) (domain.Source, error) { + f.lastTenantID = tenantID + f.lastSourceID = sourceID + return domain.Source{ID: sourceID, TenantID: tenantID, Provider: "cloudevents", Adapter: "cloudevents", State: domain.StateActive}, nil +} + +func (f *routeDispatchIngestStore) FindSourceByProviderPath(_ context.Context, provider, sourceID string) (domain.Source, error) { + f.providerLookupCalled = true + f.lastProvider = provider + f.lastProviderSourceID = sourceID + return domain.Source{ID: sourceID, TenantID: "ten_provider", Provider: provider, Adapter: provider, State: domain.StateActive}, nil +} + +func (f *routeDispatchIngestStore) CaptureInbound(context.Context, app.CaptureInboundInput) (app.CaptureInboundResult, error) { + return app.CaptureInboundResult{EventID: "evt_1", ReceiptID: "rcp_1", RawPayloadID: "raw_1", DedupeStatus: domain.DedupeUnique}, nil +} + type fakeProducerMTLSLookup struct { fingerprint string } @@ -664,8 +1358,24 @@ func (f *producerTokenControlStore) CreateProducerAccessToken(_ context.Context, return input.Token, nil } +type eventSearchControlStore struct { + noopControlStore + tenantID string + req app.EventSearchRequest +} + +func (f *eventSearchControlStore) ListEvents(_ context.Context, tenantID string, req app.EventSearchRequest) ([]domain.Event, error) { + f.tenantID = tenantID + f.req = req + return []domain.Event{{ID: "evt_1", TenantID: tenantID, Provider: req.Provider, ProviderID: req.ExternalID, RawPayloadHash: "sha256:raw"}}, nil +} + func NewNoopControl() *app.ControlService { - return app.NewControlService(noopControlStore{}, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + return app.NewControlService(noopControlStore{}, ssrf.Validator{Resolver: ssrf.StaticResolver{ + "receiver.example": {netip.MustParseAddr("93.184.216.34")}, + "signals.example": {netip.MustParseAddr("93.184.216.34")}, + "siem.example": {netip.MustParseAddr("93.184.216.34")}, + }}) } func testServerWithActor(actor authz.Actor) *Server { @@ -803,7 +1513,7 @@ func (noopControlStore) ListEventSchemas(context.Context, string, string, int) ( return nil, nil } func (noopControlStore) GetEventSchema(context.Context, string, string, string) (domain.EventSchema, error) { - return domain.EventSchema{}, nil + return domain.EventSchema{Schema: `{"type":"object"}`}, nil } func (noopControlStore) UpdateEventSchema(context.Context, string, string, string, string, app.UpdateEventSchemaRequest) (domain.EventSchema, error) { return domain.EventSchema{}, nil @@ -811,21 +1521,50 @@ func (noopControlStore) UpdateEventSchema(context.Context, string, string, strin func (noopControlStore) DeleteEventSchema(context.Context, string, string, string, string, string) (domain.EventSchema, error) { return domain.EventSchema{}, nil } -func (noopControlStore) ListEvents(context.Context, string, int) ([]domain.Event, error) { +func (noopControlStore) ListEvents(context.Context, string, app.EventSearchRequest) ([]domain.Event, error) { return nil, nil } func (noopControlStore) GetEvent(context.Context, string, string) (domain.Event, error) { return domain.Event{}, nil } -func (noopControlStore) GetRawPayload(context.Context, string, string, string) (domain.RawPayload, error) { +func (noopControlStore) GetRawPayload(context.Context, string, string, string, string) (domain.RawPayload, error) { return domain.RawPayload{}, nil } func (noopControlStore) GetNormalizedEvent(_ context.Context, tenantID, eventID, actorID string, includeData bool) (domain.NormalizedEnvelope, error) { return domain.NormalizedEnvelope{ID: "nenv_1", TenantID: tenantID, EventID: eventID, StorageStatus: domain.StorageStatusStored}, nil } -func (noopControlStore) ListEventTimeline(context.Context, string, string, int) ([]map[string]any, error) { +func (noopControlStore) ListEventTimeline(context.Context, string, string, int) ([]app.EventTimelineEntry, error) { + return []app.EventTimelineEntry{{SchemaVersion: app.EventTimelineSchemaV1, Sequence: 1, Kind: "event", RefID: "evt_1", State: "unique", Detail: "valid_signature", OccurredAt: time.Unix(1, 0).UTC()}}, nil +} +func (noopControlStore) CreateIncident(_ context.Context, incident domain.Incident) (domain.Incident, error) { + incident.ID = "inc_1" + return incident, nil +} +func (noopControlStore) ListIncidents(context.Context, string, int) ([]domain.Incident, error) { return nil, nil } +func (noopControlStore) GetIncident(_ context.Context, tenantID, incidentID string) (domain.Incident, error) { + return domain.Incident{ID: incidentID, TenantID: tenantID, Title: "Stripe payment failed", Reason: "support case", State: domain.StateActive}, nil +} +func (noopControlStore) AddIncidentEvent(_ context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + return domain.IncidentEvent{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: eventID, AddedBy: actorID, Reason: reason}, nil +} +func (noopControlStore) RemoveIncidentEvent(_ context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + return domain.IncidentEvent{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: eventID, AddedBy: actorID, Reason: reason}, nil +} +func (noopControlStore) ListIncidentEvents(_ context.Context, tenantID, incidentID string) ([]domain.IncidentEvent, error) { + return []domain.IncidentEvent{{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: "evt_1", AddedBy: "usr_1", Reason: "investigate"}}, nil +} +func (noopControlStore) CreateIncidentReportSnapshot(_ context.Context, tenantID, incidentID, actorID, reason string, report app.IncidentReport, markdown string) (domain.IncidentReportSnapshot, error) { + raw, _ := json.Marshal(report) + return domain.IncidentReportSnapshot{ID: "irs_1", TenantID: tenantID, IncidentID: incidentID, SchemaVersion: report.SchemaVersion, Report: raw, Markdown: markdown, GeneratedBy: actorID}, nil +} +func (noopControlStore) GetIncidentReportSnapshot(_ context.Context, tenantID, incidentID string) (domain.IncidentReportSnapshot, error) { + return domain.IncidentReportSnapshot{ID: "irs_1", TenantID: tenantID, IncidentID: incidentID, SchemaVersion: "webhookery.incident_report.v1", Markdown: "incident report"}, nil +} +func (noopControlStore) CreateIncidentEvidenceExport(_ context.Context, tenantID, incidentID, actorID string, req app.CreateIncidentEvidenceExportRequest, report app.IncidentReport, markdown string) (domain.IncidentEvidenceExport, domain.EvidenceExport, error) { + return domain.IncidentEvidenceExport{ID: "iex_1", TenantID: tenantID, IncidentID: incidentID, ExportID: "exp_1", CreatedBy: actorID}, domain.EvidenceExport{ID: "exp_1", TenantID: tenantID, State: domain.EvidenceExportStateReady, IncludeTimelines: true, CreatedBy: actorID}, nil +} func (noopControlStore) ListDeliveries(context.Context, string, int) ([]domain.Delivery, error) { return nil, nil } @@ -1046,10 +1785,10 @@ func (noopControlStore) DownloadAuditExport(_ context.Context, tenantID, exportI func (noopControlStore) ListDeadLetter(context.Context, string, int) ([]map[string]any, error) { return nil, nil } -func (noopControlStore) ReleaseDeadLetter(context.Context, string, string, string, string) (app.ReplayJob, error) { +func (noopControlStore) ReleaseDeadLetter(context.Context, string, string, string, string, string) (app.ReplayJob, error) { return app.ReplayJob{}, nil } -func (noopControlStore) BulkReleaseDeadLetter(context.Context, string, []string, string, string) ([]app.ReplayJob, error) { +func (noopControlStore) BulkReleaseDeadLetter(context.Context, string, []string, string, string, string) ([]app.ReplayJob, error) { return nil, nil } func (noopControlStore) ListQuarantine(context.Context, string, int) ([]map[string]any, error) { @@ -1082,6 +1821,15 @@ func (noopControlStore) ResumeReplayJob(context.Context, string, string, string, func (noopControlStore) CancelReplayJob(context.Context, string, string, string, string) (app.ReplayJob, error) { return app.ReplayJob{}, nil } +func (noopControlStore) CreateReplayApprovalPolicy(_ context.Context, tenantID, actorID string, req app.CreateReplayApprovalPolicyRequest) (domain.ReplayApprovalPolicy, error) { + return domain.ReplayApprovalPolicy{ID: "rap_1", TenantID: tenantID, ScopeType: req.ScopeType, ScopeID: req.ScopeID, RequireApproval: req.RequireApproval, DefaultExpirySeconds: req.DefaultExpirySeconds, State: domain.StateActive, CreatedBy: actorID}, nil +} +func (noopControlStore) ListReplayApprovalPolicies(context.Context, string, int) ([]domain.ReplayApprovalPolicy, error) { + return nil, nil +} +func (noopControlStore) DisableReplayApprovalPolicy(_ context.Context, tenantID, policyID, actorID, reason string) (domain.ReplayApprovalPolicy, error) { + return domain.ReplayApprovalPolicy{ID: policyID, TenantID: tenantID, ScopeType: app.ReplayApprovalScopeTenant, RequireApproval: true, DefaultExpirySeconds: int(app.ReplayApprovalDefaultExpiry / time.Second), State: domain.StateDisabled, CreatedBy: actorID, Reason: reason}, nil +} func (noopControlStore) CreateTransformation(_ context.Context, tenantID, actorID string, req app.CreateTransformationRequest) (domain.Transformation, error) { return domain.Transformation{ID: "trn_1", TenantID: tenantID, Name: req.Name, CreatedBy: actorID}, nil } @@ -1100,3 +1848,97 @@ func (noopControlStore) ListTransformationVersions(context.Context, string, stri func (noopControlStore) ActivateTransformationVersion(context.Context, string, string, string, string, string) (domain.TransformationVersion, error) { return domain.TransformationVersion{}, nil } + +type rawPayloadControlStore struct { + noopControlStore + rawCalled bool + tenantID string + eventID string + actorID string + reason string +} + +func (s *rawPayloadControlStore) GetRawPayload(_ context.Context, tenantID, eventID, actorID, reason string) (domain.RawPayload, error) { + s.rawCalled = true + s.tenantID = tenantID + s.eventID = eventID + s.actorID = actorID + s.reason = reason + return domain.RawPayload{ + ID: "raw_1", + TenantID: tenantID, + EventID: eventID, + SHA256: domain.HashSHA256([]byte("raw evidence bytes")), + ContentType: "application/json", + SizeBytes: int64(len("raw evidence bytes")), + Body: []byte("raw evidence bytes"), + StorageBackend: domain.RawStoragePostgres, + StorageStatus: domain.StorageStatusStored, + }, nil +} + +type tenantIsolationControlStore struct { + noopControlStore + ownerTenantID string + foreignEventID string + lastEventTenantID string + lastRawTenantID string + lastIncidentTenantID string + addIncidentCalled bool +} + +func (s *tenantIsolationControlStore) GetEvent(_ context.Context, tenantID, eventID string) (domain.Event, error) { + s.lastEventTenantID = tenantID + if tenantID != s.ownerTenantID && eventID == s.foreignEventID { + return domain.Event{}, app.ErrNotFound + } + return domain.Event{ID: eventID, TenantID: tenantID, RawPayloadHash: "sha256:raw"}, nil +} + +func (s *tenantIsolationControlStore) GetRawPayload(_ context.Context, tenantID, eventID, actorID, reason string) (domain.RawPayload, error) { + s.lastRawTenantID = tenantID + if tenantID != s.ownerTenantID && eventID == s.foreignEventID { + return domain.RawPayload{}, app.ErrNotFound + } + return domain.RawPayload{ID: "raw_1", TenantID: tenantID, EventID: eventID, Body: []byte("raw-secret"), StorageStatus: domain.StorageStatusStored}, nil +} + +func (s *tenantIsolationControlStore) GetIncident(_ context.Context, tenantID, incidentID string) (domain.Incident, error) { + s.lastIncidentTenantID = tenantID + return domain.Incident{ID: incidentID, TenantID: tenantID, State: domain.StateActive}, nil +} + +func (s *tenantIsolationControlStore) AddIncidentEvent(_ context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + s.addIncidentCalled = true + return domain.IncidentEvent{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: eventID, AddedBy: actorID, Reason: reason}, nil +} + +type replayControlStore struct { + noopControlStore + replayCalled bool + deadLetterCalled bool + tenantID string + actorID string + entryID string + reasonCode string + reason string + replayReq app.ReplayRequest +} + +func (s *replayControlStore) CreateReplay(_ context.Context, tenantID, actorID string, req app.ReplayRequest) (app.ReplayJob, error) { + s.replayCalled = true + s.tenantID = tenantID + s.actorID = actorID + s.replayReq = req + return app.ReplayJob{ID: "rpl_1", State: "scheduled", ReasonCode: req.ReasonCode, Reason: req.Reason, ConfigMode: req.ConfigMode, RateLimitPerMinute: req.RateLimitPerMinute, TotalItems: 1}, nil +} + +func (s *replayControlStore) ReleaseDeadLetter(_ context.Context, tenantID, entryID, actorID, reasonCode, reason string) (app.ReplayJob, error) { + s.deadLetterCalled = true + s.tenantID = tenantID + s.actorID = actorID + s.entryID = entryID + s.reasonCode = reasonCode + s.reason = reason + return app.ReplayJob{ID: "rpl_dlq_1", State: "scheduled", ReasonCode: reasonCode, Reason: reason, TotalItems: 1}, nil +} diff --git a/internal/adapters/httpapi/trusted_proxy_test.go b/internal/adapters/httpapi/trusted_proxy_test.go new file mode 100644 index 0000000..c896758 --- /dev/null +++ b/internal/adapters/httpapi/trusted_proxy_test.go @@ -0,0 +1,42 @@ +package httpapi + +import ( + "net/http/httptest" + "net/netip" + "testing" +) + +func TestSessionRemoteAddrIgnoresForwardedForWithoutTrustedProxy(t *testing.T) { + server := NewServer(ServerConfig{}) + req := httptest.NewRequest("GET", "/v1/auth/oidc/callback", nil) + req.RemoteAddr = "203.0.113.10:4231" + req.Header.Set("X-Forwarded-For", "198.51.100.25") + + if got := server.remoteAddr(req); got != req.RemoteAddr { + t.Fatalf("remote addr=%q want untrusted peer address %q", got, req.RemoteAddr) + } +} + +func TestSessionRemoteAddrUsesForwardedForFromTrustedProxy(t *testing.T) { + trusted := netip.MustParsePrefix("10.0.0.0/8") + server := NewServer(ServerConfig{TrustedProxyCIDRs: []netip.Prefix{trusted}}) + req := httptest.NewRequest("GET", "/v1/auth/oidc/callback", nil) + req.RemoteAddr = "10.0.0.5:443" + req.Header.Set("X-Forwarded-For", "198.51.100.25, 10.0.0.5") + + if got := server.remoteAddr(req); got != "198.51.100.25" { + t.Fatalf("remote addr=%q want trusted forwarded client IP", got) + } +} + +func TestSessionRemoteAddrFallsBackOnInvalidForwardedFor(t *testing.T) { + trusted := netip.MustParsePrefix("10.0.0.0/8") + server := NewServer(ServerConfig{TrustedProxyCIDRs: []netip.Prefix{trusted}}) + req := httptest.NewRequest("GET", "/v1/auth/oidc/callback", nil) + req.RemoteAddr = "10.0.0.5:443" + req.Header.Set("X-Forwarded-For", "not-an-ip") + + if got := server.remoteAddr(req); got != req.RemoteAddr { + t.Fatalf("remote addr=%q want fallback peer address %q", got, req.RemoteAddr) + } +} diff --git a/internal/adapters/httpui/ui.go b/internal/adapters/httpui/ui.go index 5e2b491..0b16631 100644 --- a/internal/adapters/httpui/ui.go +++ b/internal/adapters/httpui/ui.go @@ -65,6 +65,7 @@ var indexTemplate = template.Must(template.New("index").Parse(` ["routes", "/v1/routes"], ["schemas", "/v1/event-types"], ["events", "/v1/events"], + ["incidents", "/v1/incidents"], ["deliveries", "/v1/deliveries"], ["replay", "/v1/replay-jobs"], ["reconciliation", "/v1/reconciliation-jobs"], @@ -148,6 +149,9 @@ var indexTemplate = template.Must(template.New("index").Parse(` renderAuditChain(body); return; } + if (name === "events") { + renderEventSearchControls(); + } const rows = Array.isArray(body.data) ? body.data : []; view.innerHTML = ""; if (rows.length === 0) { @@ -158,6 +162,12 @@ var indexTemplate = template.Must(template.New("index").Parse(` if (name === "deliveries") { keys = ["id", "event_id", "endpoint_id", "state", "retry_seed", "adapter_version_id", "transformation_version_id", "delivery_payload_sha256"]; } + if (name === "events") { + keys = ["id", "provider", "event_type", "provider_event_id", "verification_status", "dedupe_status", "received_at", "trace_id"]; + } + if (name === "incidents") { + keys = ["id", "title", "state", "created_by", "created_at", "updated_at"]; + } if (name === "provider connections") { keys = ["id", "name", "provider", "state", "credential_type", "credential_hint", "verified_at", "updated_at"]; } @@ -195,11 +205,21 @@ var indexTemplate = template.Must(template.New("index").Parse(` th.textContent = key; head.append(th); } + if (name === "events") { + const th = document.createElement("th"); + th.textContent = "timeline"; + head.append(th); + } if (name === "events") { const th = document.createElement("th"); th.textContent = "normalized"; head.append(th); } + if (name === "incidents") { + const th = document.createElement("th"); + th.textContent = "report"; + head.append(th); + } if (name === "transformations") { const th = document.createElement("th"); th.textContent = "versions"; @@ -229,6 +249,16 @@ var indexTemplate = template.Must(template.New("index").Parse(` td.textContent = typeof value === "object" && value !== null ? JSON.stringify(value) : String(value ?? ""); tr.append(td); } + if (name === "events") { + const td = document.createElement("td"); + if (row.id) { + const button = document.createElement("button"); + button.textContent = "Timeline"; + button.onclick = () => showEventTimeline(row.id); + td.append(button); + } + tr.append(td); + } if (name === "events") { const td = document.createElement("td"); if (row.id) { @@ -239,6 +269,19 @@ var indexTemplate = template.Must(template.New("index").Parse(` } tr.append(td); } + if (name === "incidents") { + const td = document.createElement("td"); + if (row.id) { + const json = document.createElement("button"); + json.textContent = "JSON"; + json.onclick = () => showIncidentReport(row.id, "json"); + const markdown = document.createElement("button"); + markdown.textContent = "Markdown"; + markdown.onclick = () => showIncidentReport(row.id, "markdown"); + td.append(json, markdown); + } + tr.append(td); + } if (name === "transformations") { const td = document.createElement("td"); if (row.id) { @@ -353,6 +396,90 @@ var indexTemplate = template.Must(template.New("index").Parse(` } } + function renderEventSearchControls() { + const query = current[1].includes("?") ? current[1].split("?")[1] : ""; + const params = new URLSearchParams(query); + const controls = []; + for (const spec of [ + ["provider", "Provider"], + ["external_id", "External ID"], + ["delivery_id", "Delivery ID"], + ["received_after", "Received after"], + ["route_id", "Route ID"] + ]) { + const input = document.createElement("input"); + input.placeholder = spec[1]; + input.name = spec[0]; + input.value = params.get(spec[0]) || ""; + controls.push(input); + } + const statusSelect = document.createElement("select"); + statusSelect.name = "status"; + for (const optionValue of ["", "dlq"]) { + const option = document.createElement("option"); + option.value = optionValue; + option.textContent = optionValue || "Status"; + statusSelect.append(option); + } + statusSelect.value = params.get("status") || ""; + controls.push(statusSelect); + const verificationSelect = document.createElement("select"); + verificationSelect.name = "verification"; + for (const optionValue of ["", "valid", "invalid"]) { + const option = document.createElement("option"); + option.value = optionValue; + option.textContent = optionValue || "Verification"; + verificationSelect.append(option); + } + verificationSelect.value = params.get("verification") || ""; + controls.push(verificationSelect); + const search = document.createElement("button"); + search.textContent = "Search"; + search.onclick = () => { + const next = new URLSearchParams(); + for (const control of controls) { + const value = control.value.trim(); + if (value) next.set(control.name, value); + } + const qs = next.toString(); + current = ["events", qs ? "/v1/events?" + qs : "/v1/events"]; + load(); + }; + const clear = document.createElement("button"); + clear.textContent = "Clear"; + clear.onclick = () => { + current = ["events", "/v1/events"]; + load(); + }; + actions.replaceChildren(...controls, search, clear); + } + + async function showEventTimeline(id) { + try { + const body = await request("/v1/events/" + encodeURIComponent(id) + "/timeline"); + raw.hidden = false; + raw.textContent = JSON.stringify(body, null, 2); + status.textContent = "Timeline loaded"; + } catch (error) { + raw.hidden = false; + raw.textContent = JSON.stringify(error, null, 2); + status.textContent = "Timeline read failed"; + } + } + + async function showIncidentReport(id, format) { + try { + const body = await request("/v1/incidents/" + encodeURIComponent(id) + "/report?format=" + encodeURIComponent(format)); + raw.hidden = false; + raw.textContent = typeof body === "string" ? body : JSON.stringify(body, null, 2); + status.textContent = "Incident report loaded"; + } catch (error) { + raw.hidden = false; + raw.textContent = JSON.stringify(error, null, 2); + status.textContent = "Incident report read failed"; + } + } + async function showNormalized(id) { try { const body = await request("/v1/events/" + encodeURIComponent(id) + "/normalized"); diff --git a/internal/adapters/httpui/ui_test.go b/internal/adapters/httpui/ui_test.go index 169848a..b4446f2 100644 --- a/internal/adapters/httpui/ui_test.go +++ b/internal/adapters/httpui/ui_test.go @@ -21,4 +21,12 @@ func TestIndexAvoidsPersistentTokenStorage(t *testing.T) { if !strings.Contains(body, "/v1/replay-jobs") || !strings.Contains(body, "/v1/ops/metrics") || !strings.Contains(body, "/v1/audit-chain/head") || !strings.Contains(body, "/v1/notification-channels") || !strings.Contains(body, "/v1/siem-sinks") { t.Fatal("operator UI should expose replay, ops, audit chain, and signal egress surfaces") } + for _, want := range []string{"/v1/incidents", "/timeline", "renderEventSearchControls", "showIncidentReport"} { + if !strings.Contains(body, want) { + t.Fatalf("operator UI missing investigation surface %q", want) + } + } + if strings.Contains(body, "report_markdown.innerHTML") || strings.Contains(body, "markdown.innerHTML") { + t.Fatal("incident markdown must not be injected as HTML") + } } diff --git a/internal/adapters/objectstore/s3_test.go b/internal/adapters/objectstore/s3_test.go index 1e469cc..9636a2d 100644 --- a/internal/adapters/objectstore/s3_test.go +++ b/internal/adapters/objectstore/s3_test.go @@ -1,6 +1,16 @@ package objectstore -import "testing" +import ( + "context" + "errors" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "webhookery/internal/blobstore" +) func TestNewS3StoreRequiresDurableConfiguration(t *testing.T) { tests := []S3Config{ @@ -15,3 +25,139 @@ func TestNewS3StoreRequiresDurableConfiguration(t *testing.T) { } } } + +func TestNewS3StoreTrimsBucketAndAcceptsCompleteConfiguration(t *testing.T) { + store, err := NewS3Store(S3Config{ + Endpoint: " localhost:9000 ", + AccessKey: "access", + SecretKey: "secret", + Bucket: " webhookery-raw ", + Region: " us-east-1 ", + UseSSL: true, + }) + if err != nil { + t.Fatal(err) + } + if store.Bucket() != "webhookery-raw" { + t.Fatalf("bucket was not trimmed: %q", store.Bucket()) + } +} + +func TestS3StorePutGetDeleteUsesDefaultBucketAndMetadata(t *testing.T) { + var seen []string + var putBody string + var putSHA string + var putContentType string + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if _, ok := r.URL.Query()["location"]; ok { + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(``)) + return + } + seen = append(seen, r.Method+" "+r.URL.Path) + switch r.Method { + case http.MethodPut: + body, err := io.ReadAll(r.Body) + if err != nil { + t.Fatal(err) + } + putBody = string(body) + putSHA = r.Header.Get("X-Amz-Meta-Webhookery-Sha256") + putContentType = r.Header.Get("Content-Type") + w.Header().Set("ETag", `"etag"`) + w.WriteHeader(http.StatusOK) + case http.MethodGet: + w.Header().Set("Last-Modified", "Thu, 28 May 2026 09:00:00 GMT") + w.Header().Set("ETag", `"etag"`) + _, _ = w.Write([]byte("stored raw body")) + case http.MethodDelete: + w.WriteHeader(http.StatusNoContent) + case http.MethodHead: + w.WriteHeader(http.StatusOK) + default: + t.Fatalf("unexpected method %s", r.Method) + } + })) + defer server.Close() + + store, err := NewS3Store(S3Config{ + Endpoint: strings.TrimPrefix(server.URL, "http://"), + AccessKey: "access", + SecretKey: "secret", + Bucket: "default-bucket", + }) + if err != nil { + t.Fatal(err) + } + + ctx := context.Background() + if err := store.Put(ctx, blobstore.Object{Key: "raw/event.bin", ContentType: "application/json", SHA256: "sha256:test"}, []byte(`{"ok":true}`)); err != nil { + t.Fatal(err) + } + if !strings.Contains(putBody, `{"ok":true}`) || putSHA != "sha256:test" || putContentType != "application/json" { + t.Fatalf("unexpected put request body=%q sha=%q content-type=%q", putBody, putSHA, putContentType) + } + body, err := store.Get(ctx, "", "raw/event.bin") + if err != nil { + t.Fatal(err) + } + if string(body) != "stored raw body" { + t.Fatalf("unexpected get body %q", string(body)) + } + if err := store.Delete(ctx, "", "raw/event.bin"); err != nil { + t.Fatal(err) + } + + want := []string{ + "PUT /default-bucket/raw/event.bin", + "GET /default-bucket/raw/event.bin", + "DELETE /default-bucket/raw/event.bin", + } + if strings.Join(seen, "\n") != strings.Join(want, "\n") { + t.Fatalf("unexpected S3 requests:\ngot:\n%s\nwant:\n%s", strings.Join(seen, "\n"), strings.Join(want, "\n")) + } +} + +func TestS3StoreUsesObjectBucketOverrideAndMapsNotFound(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if _, ok := r.URL.Query()["location"]; ok { + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(``)) + return + } + if r.Method == http.MethodPut { + if r.URL.Path != "/override-bucket/raw/event.bin" { + t.Fatalf("expected explicit bucket path, got %s", r.URL.Path) + } + w.Header().Set("ETag", `"etag"`) + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Type", "application/xml") + w.WriteHeader(http.StatusNotFound) + _, _ = w.Write([]byte(`NoSuchKeymissing`)) + })) + defer server.Close() + + store, err := NewS3Store(S3Config{ + Endpoint: strings.TrimPrefix(server.URL, "http://"), + AccessKey: "access", + SecretKey: "secret", + Bucket: "default-bucket", + }) + if err != nil { + t.Fatal(err) + } + + ctx := context.Background() + if err := store.Put(ctx, blobstore.Object{Bucket: "override-bucket", Key: "raw/event.bin"}, []byte("raw")); err != nil { + t.Fatal(err) + } + if _, err := store.Get(ctx, "", "missing.bin"); !errors.Is(err, blobstore.ErrNotFound) { + t.Fatalf("expected get not found mapping, got %v", err) + } + if err := store.Delete(ctx, "", "missing.bin"); !errors.Is(err, blobstore.ErrNotFound) { + t.Fatalf("expected delete not found mapping, got %v", err) + } +} diff --git a/internal/adapters/postgres/alerts_static_test.go b/internal/adapters/postgres/alerts_static_test.go deleted file mode 100644 index 2a7ca9c..0000000 --- a/internal/adapters/postgres/alerts_static_test.go +++ /dev/null @@ -1,32 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestAlertMigrationAndStorePreserveTenantScopeAndOpenFiringUniqueness(t *testing.T) { - up, err := os.ReadFile("../../../migrations/019_alerts.up.sql") - if err != nil { - t.Fatal(err) - } - store, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(up) + "\n" + string(store) - for _, want := range []string{ - "CREATE TABLE IF NOT EXISTS alert_rules", - "CREATE TABLE IF NOT EXISTS alert_firings", - "tenant_id text NOT NULL REFERENCES tenants(id)", - "alert_firings_open_rule_idx", - "func (s *Store) EvaluateAlertRules", - "WHERE tenant_id=$1", - "alert_firing.acknowledged", - } { - if !strings.Contains(text, want) { - t.Fatalf("alert persistence missing %q", want) - } - } -} diff --git a/internal/adapters/postgres/audit_chain_static_test.go b/internal/adapters/postgres/audit_chain_static_test.go index 869c77a..96d3a63 100644 --- a/internal/adapters/postgres/audit_chain_static_test.go +++ b/internal/adapters/postgres/audit_chain_static_test.go @@ -28,3 +28,19 @@ func TestAuditRetentionTombstonesChainEntriesBeforeDeletingRows(t *testing.T) { t.Fatal("audit retention must tombstone audit_chain_entries before deleting audit_events") } } + +func TestStoreConstructionDoesNotBackfillAuditChain(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + source := string(body) + start := strings.Index(source, "func NewWithOptions(") + end := strings.Index(source, "func (s *Store) Close()") + if start == -1 || end == -1 || end <= start { + t.Fatal("could not locate store construction body") + } + if strings.Contains(source[start:end], "BackfillAuditChain") { + t.Fatal("store construction must not run audit-chain backfill") + } +} diff --git a/internal/adapters/postgres/audit_required_static_test.go b/internal/adapters/postgres/audit_required_static_test.go new file mode 100644 index 0000000..6166ea0 --- /dev/null +++ b/internal/adapters/postgres/audit_required_static_test.go @@ -0,0 +1,35 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestSensitiveActionsDoNotIgnoreAuditWriteFailures(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + source := string(body) + actions := []string{ + "api_key.revoked", + "raw_payload.read", + "delivery.retry_requested", + "delivery.canceled", + "audit_export.downloaded", + "dead_letter.released", + "quarantine.approved", + "quarantine.rejected", + "replay.paused", + "replay.resumed", + "replay.canceled", + } + for _, action := range actions { + for _, line := range strings.Split(source, "\n") { + if strings.Contains(line, "_ = s.recordAuditEvent(ctx, auditEventInput{") && strings.Contains(line, `Action: "`+action+`"`) { + t.Fatalf("sensitive action %s must return or transactionally persist audit failures", action) + } + } + } +} diff --git a/internal/adapters/postgres/dedupe_atomic_static_test.go b/internal/adapters/postgres/dedupe_atomic_static_test.go new file mode 100644 index 0000000..2299101 --- /dev/null +++ b/internal/adapters/postgres/dedupe_atomic_static_test.go @@ -0,0 +1,26 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestCaptureInboundUsesAtomicDedupeInsert(t *testing.T) { + body, err := os.ReadFile("store_ingest.go") + if err != nil { + t.Fatal(err) + } + source := string(body) + start := strings.Index(source, "func (s *Store) CaptureInbound") + if start == -1 { + t.Fatal("CaptureInbound not found") + } + capture := source[start:] + if !strings.Contains(capture, "ON CONFLICT (tenant_id, dedupe_key) DO NOTHING") { + t.Fatal("CaptureInbound must insert events with ON CONFLICT on the dedupe key") + } + if strings.Contains(capture, "SELECT id FROM events WHERE tenant_id=$1 AND dedupe_key=$2") { + t.Fatal("CaptureInbound must not select-then-insert events by dedupe key") + } +} diff --git a/internal/adapters/postgres/endpoint_crud_static_test.go b/internal/adapters/postgres/endpoint_crud_static_test.go deleted file mode 100644 index 2f842cd..0000000 --- a/internal/adapters/postgres/endpoint_crud_static_test.go +++ /dev/null @@ -1,26 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestEndpointCRUDStoreQueriesAreTenantScopedVersionedAndAudited(t *testing.T) { - body, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(body) - for _, want := range []string{ - "WHERE tenant_id=$1 AND id=$2", - "FOR UPDATE", - "domain.ConfigResourceEndpoint", - "endpoint.updated", - "endpoint.disabled", - } { - if !strings.Contains(text, want) { - t.Fatalf("endpoint CRUD store missing tenant-scoped/config/audit evidence %q", want) - } - } -} diff --git a/internal/adapters/postgres/event_search_static_test.go b/internal/adapters/postgres/event_search_static_test.go new file mode 100644 index 0000000..a8affcc --- /dev/null +++ b/internal/adapters/postgres/event_search_static_test.go @@ -0,0 +1,40 @@ +package postgres + +import ( + "strings" + "testing" + "time" + + "webhookery/internal/app" +) + +func TestEventSearchQueryKeepsTenantPredicatesAndBoundArgs(t *testing.T) { + query, args := eventSearchQuery("ten_1", app.EventSearchRequest{ + Limit: 25, + Provider: "stripe", + ExternalID: "evt_external", + DeliveryID: "del_1", + Status: "dlq", + Verification: "invalid", + ReceivedAfter: time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC), + RouteID: "rte_1", + }) + for _, want := range []string{ + "FROM events e WHERE e.tenant_id=$1", + "e.provider=$2", + "e.provider_event_id=$3", + "deliveries d WHERE d.tenant_id=e.tenant_id AND d.event_id=e.id AND d.id=$4", + "dead_letter_entries dlq WHERE dlq.tenant_id=e.tenant_id AND dlq.event_id=e.id", + "e.signature_verified=false", + "e.received_at >= $5", + "deliveries d WHERE d.tenant_id=e.tenant_id AND d.event_id=e.id AND d.route_id=$6", + "LIMIT $7", + } { + if !strings.Contains(query, want) { + t.Fatalf("event search query missing %q:\n%s", want, query) + } + } + if len(args) != 7 || args[0] != "ten_1" || args[1] != "stripe" || args[2] != "evt_external" || args[3] != "del_1" || args[5] != "rte_1" || args[6] != 25 { + t.Fatalf("unexpected event search args: %#v", args) + } +} diff --git a/internal/adapters/postgres/event_timeline_static_test.go b/internal/adapters/postgres/event_timeline_static_test.go new file mode 100644 index 0000000..b024235 --- /dev/null +++ b/internal/adapters/postgres/event_timeline_static_test.go @@ -0,0 +1,20 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestEventTimelineQueryIsVersionedAndIncludesReplayReasons(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + querySource := string(body) + for _, want := range []string{"webhookery.event_timeline.v1", "reason_code=", "FROM replay_jobs"} { + if !strings.Contains(querySource, want) { + t.Fatalf("event timeline query missing %q", want) + } + } +} diff --git a/internal/adapters/postgres/fair_scheduling_static_test.go b/internal/adapters/postgres/fair_scheduling_static_test.go index 6771c24..d1e1cff 100644 --- a/internal/adapters/postgres/fair_scheduling_static_test.go +++ b/internal/adapters/postgres/fair_scheduling_static_test.go @@ -15,7 +15,7 @@ func TestStoreClaimSQLUsesTenantFairOutboxOrdering(t *testing.T) { for _, want := range []string{ "row_number() OVER (PARTITION BY priority, tenant_id ORDER BY available_at ASC, id ASC)", "ORDER BY r.priority ASC, r.tenant_rank ASC, r.available_at ASC, r.tenant_id ASC, r.id ASC", - "CASE kind WHEN 'route_event' THEN 0 WHEN 'replay_job' THEN 1 ELSE 2 END AS priority", + "CASE kind WHEN 'route_event' THEN 0 WHEN 'route_recovered_event' THEN 0 WHEN 'replay_job' THEN 1 ELSE 2 END AS priority", } { if !strings.Contains(text, want) { t.Fatalf("store claim SQL must include tenant-fair outbox ordering evidence %q", want) diff --git a/internal/adapters/postgres/fanout_boundary_static_test.go b/internal/adapters/postgres/fanout_boundary_static_test.go new file mode 100644 index 0000000..a895bdb --- /dev/null +++ b/internal/adapters/postgres/fanout_boundary_static_test.go @@ -0,0 +1,27 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestStoreDoesNotOwnDeliveryFanoutOrchestration(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + text := string(body) + for _, forbidden := range []string{ + "func (s *Store) ProcessOutbox", + "func (s *Store) createDeliveriesForEvent", + "createDeliveriesForEventWithOptions", + "createReplayDeliveries", + "deliveryCreationOptions", + "currentDeliveryReplayConfig", + } { + if strings.Contains(text, forbidden) { + t.Fatalf("delivery fanout orchestration must stay out of postgres.Store; found %q", forbidden) + } + } +} diff --git a/internal/adapters/postgres/incidents.go b/internal/adapters/postgres/incidents.go new file mode 100644 index 0000000..c252364 --- /dev/null +++ b/internal/adapters/postgres/incidents.go @@ -0,0 +1,391 @@ +package postgres + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "github.com/jackc/pgx/v5" + + "webhookery/internal/app" + "webhookery/internal/blobstore" + "webhookery/internal/domain" + "webhookery/internal/evidence" +) + +func (s *Store) CreateIncident(ctx context.Context, incident domain.Incident) (domain.Incident, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.Incident{}, err + } + defer rollback(ctx, tx) + if _, err := tx.Exec(ctx, "INSERT INTO tenants(id, name) VALUES($1, $1) ON CONFLICT (id) DO NOTHING", incident.TenantID); err != nil { + return domain.Incident{}, err + } + err = tx.QueryRow(ctx, ` + INSERT INTO incidents(id, tenant_id, title, reason, state, created_by) + VALUES($1,$2,$3,$4,$5,$6) + RETURNING id, tenant_id, title, reason, state, created_by, created_at, updated_at`, + incident.ID, incident.TenantID, incident.Title, incident.Reason, incident.State, incident.CreatedBy, + ).Scan(&incident.ID, &incident.TenantID, &incident.Title, &incident.Reason, &incident.State, &incident.CreatedBy, &incident.CreatedAt, &incident.UpdatedAt) + if err != nil { + return domain.Incident{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: incident.TenantID, ActorID: incident.CreatedBy, Action: "incident.created", Resource: "incident", ResourceID: incident.ID, Reason: incident.Reason}); err != nil { + return domain.Incident{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.Incident{}, err + } + return incident, nil +} + +func (s *Store) ListIncidents(ctx context.Context, tenantID string, limit int) ([]domain.Incident, error) { + rows, err := s.pool.Query(ctx, ` + SELECT id, tenant_id, title, reason, state, created_by, created_at, updated_at + FROM incidents + WHERE tenant_id=$1 + ORDER BY created_at DESC, id DESC + LIMIT $2`, tenantID, limit) + if err != nil { + return nil, err + } + defer rows.Close() + var out []domain.Incident + for rows.Next() { + item, err := scanIncident(rows) + if err != nil { + return nil, err + } + out = append(out, item) + } + return out, rows.Err() +} + +func (s *Store) GetIncident(ctx context.Context, tenantID, incidentID string) (domain.Incident, error) { + item, err := scanIncident(s.pool.QueryRow(ctx, ` + SELECT id, tenant_id, title, reason, state, created_by, created_at, updated_at + FROM incidents + WHERE tenant_id=$1 AND id=$2`, tenantID, incidentID)) + if errors.Is(err, pgx.ErrNoRows) { + return domain.Incident{}, app.ErrNotFound + } + return item, err +} + +func (s *Store) AddIncidentEvent(ctx context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.IncidentEvent{}, err + } + defer rollback(ctx, tx) + if err := requireIncidentEventTenant(ctx, tx, tenantID, incidentID, eventID); err != nil { + return domain.IncidentEvent{}, err + } + id := mustID("ine") + item, err := scanIncidentEvent(tx.QueryRow(ctx, ` + INSERT INTO incident_events(id, tenant_id, incident_id, event_id, added_by, reason) + VALUES($1,$2,$3,$4,$5,$6) + ON CONFLICT (tenant_id, incident_id, event_id) + DO UPDATE SET added_by=EXCLUDED.added_by, reason=EXCLUDED.reason + RETURNING id, tenant_id, incident_id, event_id, added_by, reason, created_at`, + id, tenantID, incidentID, eventID, actorID, reason)) + if err != nil { + return domain.IncidentEvent{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "incident.event_added", Resource: "incident", ResourceID: incidentID, Reason: reason}); err != nil { + return domain.IncidentEvent{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.IncidentEvent{}, err + } + return item, nil +} + +func (s *Store) RemoveIncidentEvent(ctx context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.IncidentEvent{}, err + } + defer rollback(ctx, tx) + item, err := scanIncidentEvent(tx.QueryRow(ctx, ` + DELETE FROM incident_events + WHERE tenant_id=$1 AND incident_id=$2 AND event_id=$3 + RETURNING id, tenant_id, incident_id, event_id, added_by, reason, created_at`, + tenantID, incidentID, eventID)) + if errors.Is(err, pgx.ErrNoRows) { + return domain.IncidentEvent{}, app.ErrNotFound + } + if err != nil { + return domain.IncidentEvent{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "incident.event_removed", Resource: "incident", ResourceID: incidentID, Reason: reason}); err != nil { + return domain.IncidentEvent{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.IncidentEvent{}, err + } + return item, nil +} + +func (s *Store) ListIncidentEvents(ctx context.Context, tenantID, incidentID string) ([]domain.IncidentEvent, error) { + rows, err := s.pool.Query(ctx, ` + SELECT id, tenant_id, incident_id, event_id, added_by, reason, created_at + FROM incident_events + WHERE tenant_id=$1 AND incident_id=$2 + ORDER BY created_at ASC, id ASC`, tenantID, incidentID) + if err != nil { + return nil, err + } + defer rows.Close() + var out []domain.IncidentEvent + for rows.Next() { + item, err := scanIncidentEvent(rows) + if err != nil { + return nil, err + } + out = append(out, item) + } + return out, rows.Err() +} + +func (s *Store) CreateIncidentReportSnapshot(ctx context.Context, tenantID, incidentID, actorID, reason string, report app.IncidentReport, markdown string) (domain.IncidentReportSnapshot, error) { + raw, err := json.Marshal(report) + if err != nil { + return domain.IncidentReportSnapshot{}, err + } + id := mustID("irs") + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.IncidentReportSnapshot{}, err + } + defer rollback(ctx, tx) + if err := requireIncident(ctx, tx, tenantID, incidentID); err != nil { + return domain.IncidentReportSnapshot{}, err + } + var out domain.IncidentReportSnapshot + err = tx.QueryRow(ctx, ` + INSERT INTO incident_report_snapshots(id, tenant_id, incident_id, schema_version, report_json, report_markdown, generated_by) + VALUES($1,$2,$3,$4,$5::jsonb,$6,$7) + RETURNING id, tenant_id, incident_id, schema_version, report_json, report_markdown, generated_by, generated_at`, + id, tenantID, incidentID, report.SchemaVersion, string(raw), markdown, actorID, + ).Scan(&out.ID, &out.TenantID, &out.IncidentID, &out.SchemaVersion, &out.Report, &out.Markdown, &out.GeneratedBy, &out.GeneratedAt) + if err != nil { + return domain.IncidentReportSnapshot{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "incident_report.generated", Resource: "incident", ResourceID: incidentID, Reason: reason}); err != nil { + return domain.IncidentReportSnapshot{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.IncidentReportSnapshot{}, err + } + return out, nil +} + +func (s *Store) GetIncidentReportSnapshot(ctx context.Context, tenantID, incidentID string) (domain.IncidentReportSnapshot, error) { + var out domain.IncidentReportSnapshot + err := s.pool.QueryRow(ctx, ` + SELECT id, tenant_id, incident_id, schema_version, report_json, report_markdown, generated_by, generated_at + FROM incident_report_snapshots + WHERE tenant_id=$1 AND incident_id=$2 + ORDER BY generated_at DESC, id DESC + LIMIT 1`, tenantID, incidentID). + Scan(&out.ID, &out.TenantID, &out.IncidentID, &out.SchemaVersion, &out.Report, &out.Markdown, &out.GeneratedBy, &out.GeneratedAt) + if errors.Is(err, pgx.ErrNoRows) { + return domain.IncidentReportSnapshot{}, app.ErrNotFound + } + return out, err +} + +func (s *Store) CreateIncidentEvidenceExport(ctx context.Context, tenantID, incidentID, actorID string, req app.CreateIncidentEvidenceExportRequest, report app.IncidentReport, markdown string) (domain.IncidentEvidenceExport, domain.EvidenceExport, error) { + reportJSON, err := json.MarshalIndent(report, "", " ") + if err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + reportJSON = append(reportJSON, '\n') + timelineItems := make([]any, 0) + for _, event := range report.Events { + for _, entry := range event.Timeline { + timelineItems = append(timelineItems, map[string]any{"event_id": event.Event.ID, "entry": entry}) + } + } + timelines, err := evidence.JSONLines(timelineItems) + if err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + auditEvents, err := evidence.JSONLines([]any{}) + if err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + exportID := mustID("exp") + now := time.Now().UTC() + files := map[string][]byte{ + "audit_events.jsonl": auditEvents, + "incident_report.json": reportJSON, + "incident_report.md": []byte(markdown), + "timelines.jsonl": timelines, + } + eventIDs := make([]string, 0, len(report.Events)) + for _, event := range report.Events { + eventIDs = append(eventIDs, event.Event.ID) + } + bundle, err := evidence.BuildTarGzipBundle(evidence.Manifest{ + ExportID: exportID, + TenantID: tenantID, + CreatedAt: now, + IncludedEvents: eventIDs, + IncludedIncidents: []string{incidentID}, + IncludeRawPayloads: false, + IncludeTimelines: true, + IncludePayloadBodies: false, + }, files) + if err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + verification, err := evidence.VerifyTarGzipBundle(bundle.Bytes) + if err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + if !verification.Valid { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, fmt.Errorf("incident evidence export bundle verification failed: %s", strings.Join(verification.Failures, "; ")) + } + storageBackend := domain.RawStoragePostgres + objectBucket := "" + objectKey := "" + bodyForDB := bundle.Bytes + objectWritten := false + if s.rawStorageMode == domain.RawStorageS3 { + storageBackend = domain.RawStorageS3 + objectBucket = s.objectBucket + objectKey = blobstore.ExportKey(tenantID, exportID) + if err := s.objectStore.Put(ctx, blobstore.Object{ + Bucket: objectBucket, + Key: objectKey, + ContentType: "application/gzip", + SHA256: bundle.BundleSHA256, + SizeBytes: int64(len(bundle.Bytes)), + }, bundle.Bytes); err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + objectWritten = true + bodyForDB = []byte{} + } + manifestJSON := string(bundle.Manifest) + filesJSON, _ := json.Marshal(bundle.Files) + tx, err := s.pool.Begin(ctx) + if err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + defer rollback(ctx, tx) + if err := requireIncident(ctx, tx, tenantID, incidentID); err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + var export domain.EvidenceExport + err = tx.QueryRow(ctx, ` + INSERT INTO evidence_exports(id, tenant_id, state, include_raw_payloads, include_timelines, include_payload_bodies, format, + storage_backend, object_bucket, object_key, sha256, manifest_sha256, size_bytes, bundle, manifest, file_hashes, + created_by, completed_at) + VALUES($1,$2,$3,false,true,false,'tar+gzip+jsonl',$4,$5,$6,$7,$8,$9,$10,$11::jsonb,$12::jsonb,$13,now()) + RETURNING id, tenant_id, state, COALESCE(from_time, 'epoch'::timestamptz), COALESCE(to_time, 'epoch'::timestamptz), + include_raw_payloads, include_timelines, include_payload_bodies, format, storage_backend, object_bucket, object_key, sha256, + manifest_sha256, size_bytes, error, created_by, created_at, COALESCE(completed_at, 'epoch'::timestamptz)`, + exportID, tenantID, domain.EvidenceExportStateReady, storageBackend, objectBucket, objectKey, + bundle.BundleSHA256, bundle.ManifestSHA256, int64(len(bundle.Bytes)), bodyForDB, manifestJSON, string(filesJSON), actorID, + ).Scan(&export.ID, &export.TenantID, &export.State, &export.From, &export.To, &export.IncludeRawPayloads, &export.IncludeTimelines, &export.IncludePayloadBodies, + &export.Format, &export.StorageBackend, &export.ObjectBucket, &export.ObjectKey, &export.SHA256, &export.ManifestSHA256, + &export.SizeBytes, &export.Error, &export.CreatedBy, &export.CreatedAt, &export.CompletedAt) + if err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + for _, file := range bundle.Files { + if _, err := tx.Exec(ctx, ` + INSERT INTO evidence_export_items(id, tenant_id, export_id, resource_type, resource_id, file_name, sha256, size_bytes) + VALUES($1,$2,$3,'export_file',$4,$5,$6,$7)`, + mustID("exi"), tenantID, exportID, file.Name, file.Name, file.SHA256, file.SizeBytes, + ); err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + } + var incidentExport domain.IncidentEvidenceExport + err = tx.QueryRow(ctx, ` + INSERT INTO incident_evidence_exports(id, tenant_id, incident_id, export_id, created_by) + VALUES($1,$2,$3,$4,$5) + RETURNING id, tenant_id, incident_id, export_id, created_by, created_at`, + mustID("iex"), tenantID, incidentID, exportID, actorID, + ).Scan(&incidentExport.ID, &incidentExport.TenantID, &incidentExport.IncidentID, &incidentExport.ExportID, &incidentExport.CreatedBy, &incidentExport.CreatedAt) + if err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "incident_evidence_export.created", Resource: "incident", ResourceID: incidentID, Reason: req.Reason}); err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + if err := tx.Commit(ctx); err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, objectBucket, objectKey) + } + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + return incidentExport, normalizeEvidenceExportTimes(export), nil +} + +type incidentScanner interface { + Scan(dest ...any) error +} + +func scanIncident(scanner incidentScanner) (domain.Incident, error) { + var item domain.Incident + err := scanner.Scan(&item.ID, &item.TenantID, &item.Title, &item.Reason, &item.State, &item.CreatedBy, &item.CreatedAt, &item.UpdatedAt) + return item, err +} + +func scanIncidentEvent(scanner incidentScanner) (domain.IncidentEvent, error) { + var item domain.IncidentEvent + err := scanner.Scan(&item.ID, &item.TenantID, &item.IncidentID, &item.EventID, &item.AddedBy, &item.Reason, &item.CreatedAt) + return item, err +} + +func requireIncident(ctx context.Context, tx pgx.Tx, tenantID, incidentID string) error { + var exists bool + if err := tx.QueryRow(ctx, `SELECT EXISTS (SELECT 1 FROM incidents WHERE tenant_id=$1 AND id=$2)`, tenantID, incidentID).Scan(&exists); err != nil { + return err + } + if !exists { + return app.ErrNotFound + } + return nil +} + +func requireIncidentEventTenant(ctx context.Context, tx pgx.Tx, tenantID, incidentID, eventID string) error { + if err := requireIncident(ctx, tx, tenantID, incidentID); err != nil { + return err + } + var exists bool + if err := tx.QueryRow(ctx, `SELECT EXISTS (SELECT 1 FROM events WHERE tenant_id=$1 AND id=$2)`, tenantID, eventID).Scan(&exists); err != nil { + return err + } + if !exists { + return app.ErrNotFound + } + return nil +} diff --git a/internal/adapters/postgres/migrate.go b/internal/adapters/postgres/migrate.go index df40d96..04d6071 100644 --- a/internal/adapters/postgres/migrate.go +++ b/internal/adapters/postgres/migrate.go @@ -4,12 +4,15 @@ import ( "context" "crypto/sha256" "encoding/hex" + "errors" "fmt" "os" "path/filepath" "sort" "strings" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgxpool" ) @@ -33,13 +36,17 @@ func MigrateUp(ctx context.Context, databaseURL, dir string) error { } version := strings.TrimSuffix(filepath.Base(file), ".up.sql") sum := checksum(body) - var exists bool - err = pool.QueryRow(ctx, "SELECT EXISTS (SELECT 1 FROM schema_migrations WHERE version=$1 AND checksum=$2)", version, sum).Scan(&exists) - if err != nil && version != "001_init" { - return fmt.Errorf("check migration %s: %w", version, err) + appliedChecksum, err := appliedMigrationChecksum(ctx, pool, version) + if err != nil { + if version != "001_init" || !isUndefinedTable(err) { + return fmt.Errorf("check migration %s: %w", version, err) + } } - if exists { - continue + if appliedChecksum != "" { + if appliedChecksum == sum { + continue + } + return fmt.Errorf("check migration %s: checksum mismatch", version) } tx, err := pool.Begin(ctx) if err != nil { @@ -49,7 +56,7 @@ func MigrateUp(ctx context.Context, databaseURL, dir string) error { _ = tx.Rollback(ctx) return fmt.Errorf("apply migration %s: %w", version, err) } - if _, err := tx.Exec(ctx, "INSERT INTO schema_migrations(version, checksum) VALUES($1, $2) ON CONFLICT (version) DO UPDATE SET checksum = EXCLUDED.checksum, applied_at = now()", version, sum); err != nil { + if _, err := tx.Exec(ctx, "INSERT INTO schema_migrations(version, checksum) VALUES($1, $2)", version, sum); err != nil { _ = tx.Rollback(ctx) return fmt.Errorf("record migration %s: %w", version, err) } @@ -60,6 +67,20 @@ func MigrateUp(ctx context.Context, databaseURL, dir string) error { return nil } +func appliedMigrationChecksum(ctx context.Context, pool *pgxpool.Pool, version string) (string, error) { + var appliedChecksum string + err := pool.QueryRow(ctx, "SELECT checksum FROM schema_migrations WHERE version=$1", version).Scan(&appliedChecksum) + if errors.Is(err, pgx.ErrNoRows) { + return "", nil + } + return appliedChecksum, err +} + +func isUndefinedTable(err error) bool { + var pgErr *pgconn.PgError + return errors.As(err, &pgErr) && pgErr.Code == "42P01" +} + func checksum(body []byte) string { sum := sha256.Sum256(body) return hex.EncodeToString(sum[:]) diff --git a/internal/adapters/postgres/notification_signal_static_test.go b/internal/adapters/postgres/notification_signal_static_test.go deleted file mode 100644 index 8862b7b..0000000 --- a/internal/adapters/postgres/notification_signal_static_test.go +++ /dev/null @@ -1,35 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestNotificationSignalMigrationAndStoreProtectTenantAndSecrets(t *testing.T) { - up, err := os.ReadFile("../../../migrations/020_notification_signal.up.sql") - if err != nil { - t.Fatal(err) - } - store, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(up) + "\n" + string(store) - for _, want := range []string{ - "CREATE TABLE IF NOT EXISTS notification_channels", - "tenant_id text NOT NULL REFERENCES tenants(id)", - "encrypted_secret bytea NOT NULL", - "CREATE TABLE IF NOT EXISTS alert_rule_channels", - "CREATE TABLE IF NOT EXISTS notification_deliveries", - "notification_deliveries_transition_unique_idx", - "func (s *Store) CreateNotificationChannel", - "WHERE tenant_id=$1", - "notification_channel.created", - "notification_delivery.retry_requested", - } { - if !strings.Contains(text, want) { - t.Fatalf("notification signal persistence missing %q", want) - } - } -} diff --git a/internal/adapters/postgres/reconciliation_boundary_static_test.go b/internal/adapters/postgres/reconciliation_boundary_static_test.go new file mode 100644 index 0000000..daf95f3 --- /dev/null +++ b/internal/adapters/postgres/reconciliation_boundary_static_test.go @@ -0,0 +1,28 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestStoreDoesNotOwnProviderReconciliationOrchestration(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + text := string(body) + for _, forbidden := range []string{ + "func (s *Store) RunReconciliationJob", + "func (s *Store) DryRunReconciliation", + "reconcileProviderObject", + "adapter.Scan(", + "adapter.Lookup(", + "RequestRedelivery(", + "reconcile.ProviderObject", + } { + if strings.Contains(text, forbidden) { + t.Fatalf("provider reconciliation orchestration must stay out of postgres.Store; found %q", forbidden) + } + } +} diff --git a/internal/adapters/postgres/replay_approval_static_test.go b/internal/adapters/postgres/replay_approval_static_test.go index 5a7c766..8524ab7 100644 --- a/internal/adapters/postgres/replay_approval_static_test.go +++ b/internal/adapters/postgres/replay_approval_static_test.go @@ -11,12 +11,19 @@ func TestReplayApprovalMigrationAddsGateColumns(t *testing.T) { if err != nil { t.Fatal(err) } + expiryBody, err := os.ReadFile("../../..//migrations/028_replay_approval_expiry.up.sql") + if err != nil { + t.Fatal(err) + } text := string(body) for _, want := range []string{"approval_required", "approved_by", "approved_at", "approval_reason"} { if !strings.Contains(text, want) { t.Fatalf("migration missing replay approval column %q", want) } } + if !strings.Contains(string(expiryBody), "approval_expires_at") { + t.Fatal("migration missing replay approval expiry column") + } } func TestReplayApprovalStoreQueuesOnlyAfterApproval(t *testing.T) { @@ -31,3 +38,40 @@ func TestReplayApprovalStoreQueuesOnlyAfterApproval(t *testing.T) { } } } + +func TestReplayApprovalStoreEnforcesExpiryAndSecondActor(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + text := string(body) + for _, want := range []string{"approval_expires_at", "approval_expires_at > now()", "created_by<>$3"} { + if !strings.Contains(text, want) { + t.Fatalf("store missing replay approval guard %q", want) + } + } +} + +func TestReplayApprovalPolicyMigrationAndStoreLookup(t *testing.T) { + migration, err := os.ReadFile("../../..//migrations/029_replay_approval_policies.up.sql") + if err != nil { + t.Fatal(err) + } + migrationText := string(migration) + for _, want := range []string{"replay_approval_policies", "scope_type", "scope_id", "default_expiry_seconds", "UNIQUE (tenant_id, scope_type, scope_id)"} { + if !strings.Contains(migrationText, want) { + t.Fatalf("migration missing replay approval policy evidence %q", want) + } + } + + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + text := string(body) + for _, want := range []string{"replayApprovalPolicyForReplay", "scope_type='tenant'", "scope_type='source'", "scope_type='route'", "req.RequireApproval = true"} { + if !strings.Contains(text, want) { + t.Fatalf("store missing replay approval policy lookup %q", want) + } + } +} diff --git a/internal/adapters/postgres/replay_invariants_static_test.go b/internal/adapters/postgres/replay_invariants_static_test.go new file mode 100644 index 0000000..cda0b47 --- /dev/null +++ b/internal/adapters/postgres/replay_invariants_static_test.go @@ -0,0 +1,31 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestReplayCreationPersistsGovernanceAuditEvidence(t *testing.T) { + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + source := string(body) + for _, want := range []string{ + "INSERT INTO replay_jobs", + "scope_json", + "reason_code, reason", + `Action: "replay.created"`, + "replayAuditReason(req)", + `"reason_code=" + req.ReasonCode`, + `"reason=" + req.Reason`, + `"config_mode=" + req.ConfigMode`, + `"event_id="+req.EventID`, + `"delivery_id="+req.DeliveryID`, + } { + if !strings.Contains(source, want) { + t.Fatalf("replay creation must preserve governance evidence marker %q", want) + } + } +} diff --git a/internal/adapters/postgres/replay_reason_codes_static_test.go b/internal/adapters/postgres/replay_reason_codes_static_test.go new file mode 100644 index 0000000..2919c04 --- /dev/null +++ b/internal/adapters/postgres/replay_reason_codes_static_test.go @@ -0,0 +1,20 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestReplayReasonCodeMigrationAddsGovernanceColumn(t *testing.T) { + body, err := os.ReadFile("../../../migrations/027_replay_reason_codes.up.sql") + if err != nil { + t.Fatal(err) + } + migration := string(body) + for _, want := range []string{"reason_code", "operator_requested"} { + if !strings.Contains(migration, want) { + t.Fatalf("migration missing replay reason-code evidence %q", want) + } + } +} diff --git a/internal/adapters/postgres/replay_schedule_test.go b/internal/adapters/postgres/replay_schedule_test.go deleted file mode 100644 index 6fa2651..0000000 --- a/internal/adapters/postgres/replay_schedule_test.go +++ /dev/null @@ -1,30 +0,0 @@ -package postgres - -import ( - "testing" - "time" -) - -func TestReplayScheduleDelaySpacesItemsByRateLimit(t *testing.T) { - if got := replayScheduleDelay(0, 60); got != 0 { - t.Fatalf("first replay item should be immediately eligible, got %s", got) - } - if got := replayScheduleDelay(1, 60); got != time.Second { - t.Fatalf("second item at 60/min should be delayed 1s, got %s", got) - } - if got := replayScheduleDelay(2, 30); got != 4*time.Second { - t.Fatalf("third item at 30/min should be delayed 4s, got %s", got) - } -} - -func TestReplayScheduleDelayIgnoresInvalidRateLimit(t *testing.T) { - if got := replayScheduleDelay(10, 0); got != 0 { - t.Fatalf("zero rate limit should not delay replay, got %s", got) - } - if got := replayScheduleDelay(10, -1); got != 0 { - t.Fatalf("negative rate limit should not delay replay, got %s", got) - } - if got := replayScheduleDelay(-1, 60); got != 0 { - t.Fatalf("negative item index should not delay replay, got %s", got) - } -} diff --git a/internal/adapters/postgres/retry_policy_crud_static_test.go b/internal/adapters/postgres/retry_policy_crud_static_test.go deleted file mode 100644 index 259253a..0000000 --- a/internal/adapters/postgres/retry_policy_crud_static_test.go +++ /dev/null @@ -1,27 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestRetryPolicyCRUDStoreQueriesAreTenantScopedVersionedAndAudited(t *testing.T) { - body, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(body) - for _, want := range []string{ - "FROM retry_policies", - "WHERE tenant_id=$1 AND id=$2", - "FOR UPDATE", - "domain.ConfigResourceRetryPolicy", - "retry_policy.updated", - "retry_policy.disabled", - } { - if !strings.Contains(text, want) { - t.Fatalf("retry policy CRUD store missing tenant-scoped/config/audit evidence %q", want) - } - } -} diff --git a/internal/adapters/postgres/route_crud_static_test.go b/internal/adapters/postgres/route_crud_static_test.go deleted file mode 100644 index 60f5790..0000000 --- a/internal/adapters/postgres/route_crud_static_test.go +++ /dev/null @@ -1,28 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestRouteCRUDStoreQueriesAreTenantScopedVersionedAndAudited(t *testing.T) { - body, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(body) - for _, want := range []string{ - "FROM routes WHERE tenant_id=$1 AND id=$2", - "FOR UPDATE", - "domain.ConfigResourceRoute", - "route.updated", - "route.inactivated", - "SELECT state FROM sources WHERE tenant_id=$1 AND id=$2", - "SELECT state FROM endpoints WHERE tenant_id=$1 AND id=$2", - } { - if !strings.Contains(text, want) { - t.Fatalf("route CRUD store missing tenant-scoped/config/audit evidence %q", want) - } - } -} diff --git a/internal/adapters/postgres/siem_signal_static_test.go b/internal/adapters/postgres/siem_signal_static_test.go deleted file mode 100644 index a3ea841..0000000 --- a/internal/adapters/postgres/siem_signal_static_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestSIEMSignalMigrationAndStoreProtectCursorAndSecrets(t *testing.T) { - up, err := os.ReadFile("../../../migrations/021_siem_signal.up.sql") - if err != nil { - t.Fatal(err) - } - store, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(up) + "\n" + string(store) - for _, want := range []string{ - "CREATE TABLE IF NOT EXISTS siem_sinks", - "encrypted_secret bytea NOT NULL", - "cursor_sequence bigint NOT NULL DEFAULT 0", - "CREATE TABLE IF NOT EXISTS siem_deliveries", - "func (s *Store) EnqueueSIEMDeliveries", - "UPDATE siem_sinks", - "GREATEST(cursor_sequence", - "siem_sink.created", - "siem_delivery.retry_requested", - } { - if !strings.Contains(text, want) { - t.Fatalf("SIEM signal persistence missing %q", want) - } - } -} diff --git a/internal/adapters/postgres/source_crud_static_test.go b/internal/adapters/postgres/source_crud_static_test.go deleted file mode 100644 index 55e756d..0000000 --- a/internal/adapters/postgres/source_crud_static_test.go +++ /dev/null @@ -1,26 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestSourceCRUDStoreQueriesAreTenantScopedAndAudited(t *testing.T) { - body, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(body) - for _, want := range []string{ - "WHERE tenant_id=$1 AND id=$2", - "FOR UPDATE", - "domain.ConfigResourceSource", - "source.updated", - "source.disabled", - } { - if !strings.Contains(text, want) { - t.Fatalf("source CRUD store missing tenant-scoped/audit evidence %q", want) - } - } -} diff --git a/internal/adapters/postgres/store.go b/internal/adapters/postgres/store.go index 4ae163e..fdd5c27 100644 --- a/internal/adapters/postgres/store.go +++ b/internal/adapters/postgres/store.go @@ -67,6 +67,8 @@ type StoreOptions struct { ObjectBucket string } +var errObjectStoreReadFailed = errors.New("object store read failed") + func New(ctx context.Context, databaseURL string, box SecretBox) (*Store, error) { return NewWithOptions(ctx, databaseURL, box, StoreOptions{RawStorageMode: domain.RawStoragePostgres}) } @@ -101,10 +103,6 @@ func NewWithOptions(ctx context.Context, databaseURL string, box SecretBox, opts objectStore: opts.ObjectStore, objectBucket: strings.TrimSpace(opts.ObjectBucket), } - if err := store.BackfillAuditChain(ctx); err != nil { - pool.Close() - return nil, err - } return store, nil } @@ -116,40 +114,98 @@ func (s *Store) Health(ctx context.Context) error { return s.pool.Ping(ctx) } -func (s *Store) BackfillAuditChain(ctx context.Context) error { +const ( + auditChainBackfillLeaseID = "audit-chain-backfill" + auditChainBackfillMax = 1000 +) + +func (s *Store) BackfillAuditChain(ctx context.Context, workerID string, limit int) (worker.AuditChainBackfillResult, error) { + if limit <= 0 { + limit = 100 + } + if limit > auditChainBackfillMax { + limit = auditChainBackfillMax + } + result := worker.AuditChainBackfillResult{} + tx, err := s.pool.Begin(ctx) + if err != nil { + return result, err + } + defer rollback(ctx, tx) var exists bool - if err := s.pool.QueryRow(ctx, "SELECT to_regclass('audit_chain_entries') IS NOT NULL").Scan(&exists); err != nil { - return err + if err := tx.QueryRow(ctx, "SELECT to_regclass('audit_chain_entries') IS NOT NULL").Scan(&exists); err != nil { + return result, err } if !exists { - return nil + return result, tx.Commit(ctx) } - rows, err := s.pool.Query(ctx, `SELECT DISTINCT tenant_id FROM audit_events ORDER BY tenant_id`) + acquired, err := tryAcquireWorkerLease(ctx, tx, auditChainBackfillLeaseID, workerID, time.Minute) if err != nil { - return err + return result, err } - defer rows.Close() + result.LeaseAcquired = acquired + if !acquired { + return result, tx.Commit(ctx) + } + rows, err := tx.Query(ctx, ` + SELECT a.tenant_id + FROM audit_events a + WHERE NOT EXISTS ( + SELECT 1 FROM audit_chain_entries c + WHERE c.tenant_id=a.tenant_id AND c.audit_event_id=a.id + ) + GROUP BY a.tenant_id + ORDER BY min(a.occurred_at) ASC, min(a.id) ASC, a.tenant_id ASC + LIMIT $1`, limit) + if err != nil { + return result, err + } + var tenantIDs []string for rows.Next() { var tenantID string if err := rows.Scan(&tenantID); err != nil { - return err + rows.Close() + return result, err } - if err := s.backfillTenantAuditChain(ctx, tenantID); err != nil { - return err + tenantIDs = append(tenantIDs, tenantID) + } + if err := rows.Err(); err != nil { + rows.Close() + return result, err + } + rows.Close() + remaining := limit + now := time.Now().UTC() + for _, tenantID := range tenantIDs { + if remaining <= 0 { + break } + backfilled, err := s.backfillTenantAuditChain(ctx, tx, tenantID, remaining, now) + if err != nil { + return result, err + } + result.TenantsScanned++ + result.EventsBackfilled += backfilled + remaining -= backfilled } - return rows.Err() + if err := tx.QueryRow(ctx, ` + SELECT EXISTS ( + SELECT 1 + FROM audit_events a + WHERE NOT EXISTS ( + SELECT 1 FROM audit_chain_entries c + WHERE c.tenant_id=a.tenant_id AND c.audit_event_id=a.id + ) + )`).Scan(&result.More); err != nil { + return result, err + } + return result, tx.Commit(ctx) } -func (s *Store) backfillTenantAuditChain(ctx context.Context, tenantID string) error { - tx, err := s.pool.Begin(ctx) - if err != nil { - return err - } - defer rollback(ctx, tx) +func (s *Store) backfillTenantAuditChain(ctx context.Context, tx pgx.Tx, tenantID string, limit int, now time.Time) (int, error) { sequence, previousHash, err := ensureAuditChainHead(ctx, tx, tenantID) if err != nil { - return err + return 0, err } rows, err := tx.Query(ctx, ` SELECT a.id, a.tenant_id, a.actor_id, a.action, a.resource, a.resource_id, a.reason, a.occurred_at @@ -159,38 +215,46 @@ func (s *Store) backfillTenantAuditChain(ctx context.Context, tenantID string) e SELECT 1 FROM audit_chain_entries c WHERE c.tenant_id=a.tenant_id AND c.audit_event_id=a.id ) - ORDER BY a.occurred_at ASC, a.id ASC`, tenantID) + ORDER BY a.occurred_at ASC, a.id ASC + LIMIT $2`, tenantID, limit) if err != nil { - return err + return 0, err } - defer rows.Close() - now := time.Now().UTC() - lastAuditEventID := "" + var events []domain.AuditEvent for rows.Next() { var event domain.AuditEvent if err := rows.Scan(&event.ID, &event.TenantID, &event.ActorID, &event.Action, &event.Resource, &event.ResourceID, &event.Reason, &event.OccurredAt); err != nil { - return err + rows.Close() + return 0, err } + events = append(events, event) + } + if err := rows.Err(); err != nil { + rows.Close() + return 0, err + } + rows.Close() + lastAuditEventID := "" + backfilled := 0 + for _, event := range events { sequence++ entry, err := auditchain.ComputeEntry(mustID("ace"), event, sequence, previousHash, domain.AuditChainEntrySourceBackfill, now) if err != nil { - return err + return 0, err } if err := insertAuditChainEntry(ctx, tx, entry); err != nil { - return err + return 0, err } previousHash = entry.ChainHash lastAuditEventID = event.ID - } - if err := rows.Err(); err != nil { - return err + backfilled++ } if lastAuditEventID != "" { if _, err := tx.Exec(ctx, `UPDATE audit_chain_heads SET sequence=$2, chain_hash=$3, last_audit_event_id=$4, updated_at=now() WHERE tenant_id=$1`, tenantID, sequence, previousHash, lastAuditEventID); err != nil { - return err + return 0, err } } - return tx.Commit(ctx) + return backfilled, nil } func ensureAuditChainHead(ctx context.Context, tx pgx.Tx, tenantID string) (int64, string, error) { @@ -380,8 +444,13 @@ func (s *Store) ListAPIKeys(ctx context.Context, tenantID string, limit int) ([] } func (s *Store) RevokeAPIKey(ctx context.Context, tenantID, apiKeyID, actorID, reason string) (domain.APIKey, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.APIKey{}, err + } + defer rollback(ctx, tx) var item domain.APIKey - err := s.pool.QueryRow(ctx, ` + err = tx.QueryRow(ctx, ` UPDATE api_keys SET state='revoked', revoked_at=now() WHERE tenant_id=$1 AND id=$2 AND state <> 'revoked' @@ -394,7 +463,12 @@ func (s *Store) RevokeAPIKey(ctx context.Context, tenantID, apiKeyID, actorID, r if err != nil { return domain.APIKey{}, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "api_key.revoked", Resource: "api_key", ResourceID: apiKeyID, Reason: reason}) + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "api_key.revoked", Resource: "api_key", ResourceID: apiKeyID, Reason: reason}); err != nil { + return domain.APIKey{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.APIKey{}, err + } return item, nil } @@ -2368,136 +2442,6 @@ func (s *Store) RotateEndpointSecret(ctx context.Context, tenantID, endpointID, return normalizeEndpointSecretVersion(item), nil } -func (s *Store) CaptureInbound(ctx context.Context, input app.CaptureInboundInput) (app.CaptureInboundResult, error) { - eventID := mustID("evt") - rawID := mustID("raw") - receiptID := mustID("rcp") - outboxID := mustID("out") - storage, bodyForDB, err := s.prepareRawPayloadStorage(ctx, input.Source.TenantID, rawID, input.RawPayload) - if err != nil { - return app.CaptureInboundResult{}, err - } - objectWritten := storage.backend == domain.RawStorageS3 - tx, err := s.pool.Begin(ctx) - if err != nil { - if objectWritten { - _ = s.objectStore.Delete(ctx, storage.bucket, storage.key) - } - return app.CaptureInboundResult{}, err - } - defer rollback(ctx, tx) - - if _, err := tx.Exec(ctx, "INSERT INTO tenants(id, name) VALUES($1, $1) ON CONFLICT (id) DO NOTHING", input.Source.TenantID); err != nil { - return app.CaptureInboundResult{}, err - } - if _, err := tx.Exec(ctx, ` - INSERT INTO raw_payloads(id, tenant_id, sha256, content_type, size_bytes, body, storage_backend, object_bucket, object_key, storage_status, created_at) - VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)`, - rawID, input.Source.TenantID, input.RawPayload.SHA256, input.RawPayload.ContentType, input.RawPayload.SizeBytes, bodyForDB, - storage.backend, storage.bucket, storage.key, domain.StorageStatusStored, input.RawPayload.CreatedAt, - ); err != nil { - if objectWritten { - _ = s.objectStore.Delete(ctx, storage.bucket, storage.key) - } - return app.CaptureInboundResult{}, err - } - - var existingEventID string - err = tx.QueryRow(ctx, `SELECT id FROM events WHERE tenant_id=$1 AND dedupe_key=$2`, input.Source.TenantID, input.Event.DedupeKey).Scan(&existingEventID) - if err != nil && !errors.Is(err, pgx.ErrNoRows) { - return app.CaptureInboundResult{}, err - } - dedupeStatus := domain.DedupeUnique - if existingEventID != "" { - eventID = existingEventID - dedupeStatus = domain.DedupeDuplicateSuppressed - } else { - if input.Event.Type == "" { - input.Event.Type = "unknown" - } - if _, err := tx.Exec(ctx, ` - INSERT INTO events(id, tenant_id, source_id, provider, type, provider_event_id, raw_payload_id, raw_payload_hash, - signature_verified, verification_reason, dedupe_key, dedupe_status, received_at, trace_id) - VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14)`, - eventID, input.Source.TenantID, input.Source.ID, input.Source.Provider, input.Event.Type, input.Event.ProviderID, - rawID, input.RawPayload.SHA256, input.VerificationOK, input.VerifyReason, input.Event.DedupeKey, dedupeStatus, - input.Event.ReceivedAt, input.Event.TraceID, - ); err != nil { - return app.CaptureInboundResult{}, err - } - if len(input.Normalized.Envelope) > 0 { - adapterVersionID, err := s.lookupAdapterVersionID(ctx, tx, firstNonEmpty(input.Source.Adapter, input.Source.Provider)) - if err != nil { - return app.CaptureInboundResult{}, err - } - normalizedID := mustID("nenv") - if _, err := tx.Exec(ctx, ` - INSERT INTO normalized_envelopes(id, tenant_id, event_id, adapter_version_id, provider, provider_event_id, type, source, subject, - envelope_json, data_json, metadata_json, envelope_sha256, data_sha256, metadata_sha256, storage_status, created_at) - VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10::jsonb,$11::jsonb,$12::jsonb,$13,$14,$15,$16,$17)`, - normalizedID, input.Source.TenantID, eventID, adapterVersionID, input.Normalized.Provider, input.Normalized.ProviderEventID, - input.Normalized.Type, input.Normalized.Source, input.Normalized.Subject, string(input.Normalized.Envelope), - string(input.Normalized.Data), string(input.Normalized.Metadata), input.Normalized.EnvelopeSHA256, input.Normalized.DataSHA256, - input.Normalized.MetadataSHA256, domain.StorageStatusStored, input.Normalized.CreatedAt, - ); err != nil { - return app.CaptureInboundResult{}, err - } - } - payload, _ := json.Marshal(map[string]any{"event_id": eventID}) - if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, outboxID, input.Source.TenantID, "route_event", eventID, payload); err != nil { - return app.CaptureInboundResult{}, err - } - } - - if _, err := tx.Exec(ctx, `UPDATE raw_payloads SET event_id=$1 WHERE id=$2`, eventID, rawID); err != nil { - return app.CaptureInboundResult{}, err - } - - headersJSON, err := json.Marshal(input.Receipt.RawHeaders) - if err != nil { - return app.CaptureInboundResult{}, err - } - if _, err := tx.Exec(ctx, ` - INSERT INTO provider_receipts(id, tenant_id, source_id, event_id, raw_payload_id, raw_headers, remote_ip, verification_ok, verification_reason, received_at) - VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)`, - receiptID, input.Source.TenantID, input.Source.ID, eventID, rawID, headersJSON, input.Receipt.RemoteIP, - input.VerificationOK, input.VerifyReason, input.Receipt.ReceivedAt, - ); err != nil { - return app.CaptureInboundResult{}, err - } - if input.Event.DedupeKey != "" { - if _, err := tx.Exec(ctx, ` - INSERT INTO idempotency_records(tenant_id, dedupe_key, resource_type, resource_id, status_code) - VALUES($1,$2,'event',$3,202) - ON CONFLICT (tenant_id, dedupe_key) DO NOTHING`, - input.Source.TenantID, input.Event.DedupeKey, eventID, - ); err != nil { - return app.CaptureInboundResult{}, err - } - if _, err := tx.Exec(ctx, ` - INSERT INTO dedupe_records(tenant_id, source_id, dedupe_key, first_event_id, last_receipt_id, status) - VALUES($1,$2,$3,$4,$5,$6) - ON CONFLICT (tenant_id, dedupe_key) DO UPDATE - SET last_receipt_id=EXCLUDED.last_receipt_id, status=EXCLUDED.status, last_seen_at=now()`, - input.Source.TenantID, input.Source.ID, input.Event.DedupeKey, eventID, receiptID, dedupeStatus, - ); err != nil { - return app.CaptureInboundResult{}, err - } - } - if !input.VerificationOK { - if _, err := tx.Exec(ctx, `INSERT INTO quarantine_entries(id, tenant_id, event_id, reason) VALUES($1,$2,$3,$4)`, mustID("qua"), input.Source.TenantID, eventID, input.VerifyReason); err != nil { - return app.CaptureInboundResult{}, err - } - } - if err := tx.Commit(ctx); err != nil { - if objectWritten { - _ = s.objectStore.Delete(ctx, storage.bucket, storage.key) - } - return app.CaptureInboundResult{}, err - } - return app.CaptureInboundResult{EventID: eventID, ReceiptID: receiptID, RawPayloadID: rawID, DedupeStatus: dedupeStatus}, nil -} - type rawPayloadStorage struct { backend string bucket string @@ -2593,8 +2537,9 @@ func (s *Store) requireActiveSource(ctx context.Context, tx pgx.Tx, tenantID, so return nil } -func (s *Store) ListEvents(ctx context.Context, tenantID string, limit int) ([]domain.Event, error) { - rows, err := s.pool.Query(ctx, `SELECT id, tenant_id, source_id, provider, type, provider_event_id, raw_payload_id, raw_payload_hash, signature_verified, verification_reason, dedupe_key, dedupe_status, received_at, trace_id FROM events WHERE tenant_id=$1 ORDER BY received_at DESC LIMIT $2`, tenantID, limit) +func (s *Store) ListEvents(ctx context.Context, tenantID string, req app.EventSearchRequest) ([]domain.Event, error) { + query, args := eventSearchQuery(tenantID, req) + rows, err := s.pool.Query(ctx, query, args...) if err != nil { return nil, err } @@ -2610,6 +2555,46 @@ func (s *Store) ListEvents(ctx context.Context, tenantID string, limit int) ([]d return out, rows.Err() } +func eventSearchQuery(tenantID string, req app.EventSearchRequest) (string, []any) { + args := []any{tenantID} + where := []string{"e.tenant_id=$1"} + add := func(clause string, value any) { + args = append(args, value) + where = append(where, fmt.Sprintf(clause, len(args))) + } + if req.Provider != "" { + add("e.provider=$%d", req.Provider) + } + if req.ExternalID != "" { + add("e.provider_event_id=$%d", req.ExternalID) + } + if req.DeliveryID != "" { + add("EXISTS (SELECT 1 FROM deliveries d WHERE d.tenant_id=e.tenant_id AND d.event_id=e.id AND d.id=$%d)", req.DeliveryID) + } + switch req.Status { + case "dlq", "dead_lettered": + where = append(where, "EXISTS (SELECT 1 FROM dead_letter_entries dlq WHERE dlq.tenant_id=e.tenant_id AND dlq.event_id=e.id AND dlq.state='open')") + } + switch req.Verification { + case "valid": + where = append(where, "e.signature_verified=true") + case "invalid": + where = append(where, "e.signature_verified=false") + } + if !req.ReceivedAfter.IsZero() { + add("e.received_at >= $%d", req.ReceivedAfter) + } + if req.RouteID != "" { + add("EXISTS (SELECT 1 FROM deliveries d WHERE d.tenant_id=e.tenant_id AND d.event_id=e.id AND d.route_id=$%d)", req.RouteID) + } + limit := req.Limit + if limit <= 0 { + limit = 50 + } + args = append(args, limit) + return `SELECT e.id, e.tenant_id, e.source_id, e.provider, e.type, e.provider_event_id, e.raw_payload_id, e.raw_payload_hash, e.signature_verified, e.verification_reason, e.dedupe_key, e.dedupe_status, e.received_at, e.trace_id FROM events e WHERE ` + strings.Join(where, " AND ") + fmt.Sprintf(" ORDER BY e.received_at DESC LIMIT $%d", len(args)), args +} + func (s *Store) GetEvent(ctx context.Context, tenantID, eventID string) (domain.Event, error) { row := s.pool.QueryRow(ctx, `SELECT id, tenant_id, source_id, provider, type, provider_event_id, raw_payload_id, raw_payload_hash, signature_verified, verification_reason, dedupe_key, dedupe_status, received_at, trace_id FROM events WHERE tenant_id=$1 AND id=$2`, tenantID, eventID) item, err := scanEvent(row) @@ -2619,7 +2604,7 @@ func (s *Store) GetEvent(ctx context.Context, tenantID, eventID string) (domain. return item, err } -func (s *Store) GetRawPayload(ctx context.Context, tenantID, eventID, actorID string) (domain.RawPayload, error) { +func (s *Store) GetRawPayload(ctx context.Context, tenantID, eventID, actorID, reason string) (domain.RawPayload, error) { var raw domain.RawPayload err := s.pool.QueryRow(ctx, ` SELECT id, tenant_id, event_id, sha256, content_type, size_bytes, body, @@ -2649,14 +2634,16 @@ func (s *Store) GetRawPayload(ctx context.Context, tenantID, eventID, actorID st if errors.Is(err, blobstore.ErrNotFound) { return domain.RawPayload{}, app.ErrGone } - return domain.RawPayload{}, err + return domain.RawPayload{}, errObjectStoreReadFailed } if domain.HashSHA256(body) != raw.SHA256 { return domain.RawPayload{}, errors.New("raw payload object hash mismatch") } raw.Body = body } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "raw_payload.read", Resource: "event", ResourceID: eventID}) + if err := s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "raw_payload.read", Resource: "event", ResourceID: eventID, Reason: reason}); err != nil { + return domain.RawPayload{}, err + } return raw, nil } @@ -2689,7 +2676,7 @@ func (s *Store) GetNormalizedEvent(ctx context.Context, tenantID, eventID, actor return item, nil } -func (s *Store) ListEventTimeline(ctx context.Context, tenantID, eventID string, limit int) ([]map[string]any, error) { +func (s *Store) ListEventTimeline(ctx context.Context, tenantID, eventID string, limit int) ([]app.EventTimelineEntry, error) { var exists bool if err := s.pool.QueryRow(ctx, `SELECT EXISTS (SELECT 1 FROM events WHERE tenant_id=$1 AND id=$2)`, tenantID, eventID).Scan(&exists); err != nil { return nil, err @@ -2697,8 +2684,8 @@ func (s *Store) ListEventTimeline(ctx context.Context, tenantID, eventID string, if !exists { return nil, app.ErrNotFound } - return listRows(ctx, s.pool, ` - SELECT kind, ref_id, state, detail, occurred_at FROM ( + rows, err := s.pool.Query(ctx, ` + SELECT 'webhookery.event_timeline.v1' AS schema_version, kind, ref_id, state, detail, occurred_at FROM ( SELECT 'event' AS kind, id AS ref_id, dedupe_status AS state, verification_reason AS detail, received_at AS occurred_at FROM events WHERE tenant_id=$1 AND id=$2 UNION ALL @@ -2760,11 +2747,53 @@ func (s *Store) ListEventTimeline(ctx context.Context, tenantID, eventID string, FROM reconciliation_items WHERE tenant_id=$1 AND (local_event_id=$2 OR recovered_event_id=$2) UNION ALL + SELECT 'replay' AS kind, r.id AS ref_id, r.state, + 'reason_code=' || COALESCE(NULLIF(r.reason_code,''),'operator_requested') || + ' reason=' || r.reason || + ' config_mode=' || COALESCE(NULLIF(r.config_mode,''),'current') || + ' event_id=' || COALESCE(NULLIF(r.scope_json->>'event_id',''),'none') || + ' delivery_id=' || COALESCE(NULLIF(r.scope_json->>'delivery_id',''),'none') || + ' endpoint_id=' || COALESCE(NULLIF(r.scope_json->>'endpoint_id',''),'none') || + ' approval_required=' || r.approval_required::text || + ' approval_expires_at=' || COALESCE(r.approval_expires_at::text,'none') || + ' approved_by=' || COALESCE(NULLIF(r.approved_by,''),'none') || + ' approved_at=' || COALESCE(r.approved_at::text,'none') AS detail, + r.created_at AS occurred_at + FROM replay_jobs r + WHERE r.tenant_id=$1 + AND ( + r.scope_json->>'event_id'=$2 + OR EXISTS ( + SELECT 1 FROM deliveries d + WHERE d.tenant_id=r.tenant_id AND d.id=r.scope_json->>'delivery_id' AND d.event_id=$2 + ) + OR EXISTS ( + SELECT 1 FROM replay_items ri + WHERE ri.tenant_id=r.tenant_id AND ri.replay_job_id=r.id AND ri.event_id=$2 + ) + ) + UNION ALL SELECT 'audit' AS kind, id AS ref_id, action AS state, reason AS detail, occurred_at FROM audit_events WHERE tenant_id=$1 AND resource_id=$2 ) timeline ORDER BY occurred_at ASC LIMIT $3`, tenantID, eventID, limit) + if err != nil { + return nil, err + } + defer rows.Close() + var out []app.EventTimelineEntry + sequence := 1 + for rows.Next() { + var item app.EventTimelineEntry + if err := rows.Scan(&item.SchemaVersion, &item.Kind, &item.RefID, &item.State, &item.Detail, &item.OccurredAt); err != nil { + return nil, err + } + item.Sequence = sequence + sequence++ + out = append(out, item) + } + return out, rows.Err() } func (s *Store) ListDeliveries(ctx context.Context, tenantID string, limit int) ([]domain.Delivery, error) { @@ -2817,7 +2846,12 @@ func (s *Store) GetDeliveryAttempt(ctx context.Context, tenantID, attemptID stri } func (s *Store) RetryDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.Delivery, error) { - row := s.pool.QueryRow(ctx, `UPDATE deliveries SET state='scheduled', next_attempt_at=now(), locked_by=NULL, lock_expires_at=NULL WHERE tenant_id=$1 AND id=$2 RETURNING id, tenant_id, event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(replay_job_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,''), COALESCE((SELECT p.sha256 FROM delivery_payloads p WHERE p.tenant_id=deliveries.tenant_id AND p.id=deliveries.delivery_payload_id), ''), COALESCE(retry_seed,''), state, attempt_count, COALESCE(next_attempt_at, 'epoch'::timestamptz)`, tenantID, deliveryID) + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.Delivery{}, err + } + defer rollback(ctx, tx) + row := tx.QueryRow(ctx, `UPDATE deliveries SET state='scheduled', next_attempt_at=now(), locked_by=NULL, lock_expires_at=NULL WHERE tenant_id=$1 AND id=$2 RETURNING id, tenant_id, event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(replay_job_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,''), COALESCE((SELECT p.sha256 FROM delivery_payloads p WHERE p.tenant_id=deliveries.tenant_id AND p.id=deliveries.delivery_payload_id), ''), COALESCE(retry_seed,''), state, attempt_count, COALESCE(next_attempt_at, 'epoch'::timestamptz)`, tenantID, deliveryID) item, err := scanDelivery(row) if errors.Is(err, pgx.ErrNoRows) { return domain.Delivery{}, app.ErrNotFound @@ -2825,12 +2859,22 @@ func (s *Store) RetryDelivery(ctx context.Context, tenantID, deliveryID, actorID if err != nil { return domain.Delivery{}, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "delivery.retry_requested", Resource: "delivery", ResourceID: deliveryID, Reason: reason}) + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "delivery.retry_requested", Resource: "delivery", ResourceID: deliveryID, Reason: reason}); err != nil { + return domain.Delivery{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.Delivery{}, err + } return item, nil } func (s *Store) CancelDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.Delivery, error) { - row := s.pool.QueryRow(ctx, `UPDATE deliveries SET state='canceled', locked_by=NULL, lock_expires_at=NULL WHERE tenant_id=$1 AND id=$2 AND state NOT IN ('succeeded','dead_lettered','canceled') RETURNING id, tenant_id, event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(replay_job_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,''), COALESCE((SELECT p.sha256 FROM delivery_payloads p WHERE p.tenant_id=deliveries.tenant_id AND p.id=deliveries.delivery_payload_id), ''), COALESCE(retry_seed,''), state, attempt_count, COALESCE(next_attempt_at, 'epoch'::timestamptz)`, tenantID, deliveryID) + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.Delivery{}, err + } + defer rollback(ctx, tx) + row := tx.QueryRow(ctx, `UPDATE deliveries SET state='canceled', locked_by=NULL, lock_expires_at=NULL WHERE tenant_id=$1 AND id=$2 AND state NOT IN ('succeeded','dead_lettered','canceled') RETURNING id, tenant_id, event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(replay_job_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,''), COALESCE((SELECT p.sha256 FROM delivery_payloads p WHERE p.tenant_id=deliveries.tenant_id AND p.id=deliveries.delivery_payload_id), ''), COALESCE(retry_seed,''), state, attempt_count, COALESCE(next_attempt_at, 'epoch'::timestamptz)`, tenantID, deliveryID) item, err := scanDelivery(row) if errors.Is(err, pgx.ErrNoRows) { return domain.Delivery{}, app.ErrNotFound @@ -2838,7 +2882,12 @@ func (s *Store) CancelDelivery(ctx context.Context, tenantID, deliveryID, actorI if err != nil { return domain.Delivery{}, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "delivery.canceled", Resource: "delivery", ResourceID: deliveryID, Reason: reason}) + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "delivery.canceled", Resource: "delivery", ResourceID: deliveryID, Reason: reason}); err != nil { + return domain.Delivery{}, err + } + if err := tx.Commit(ctx); err != nil { + return domain.Delivery{}, err + } return item, nil } @@ -4913,67 +4962,6 @@ func (s *Store) RevokeProviderConnection(ctx context.Context, tenantID, connecti return normalizeProviderConnection(out), nil } -func (s *Store) DryRunReconciliation(ctx context.Context, tenantID string, req app.ReconciliationJobRequest) (domain.ReconciliationJob, error) { - conn, credential, err := s.getProviderConnectionSecret(ctx, tenantID, req.ConnectionID) - if err != nil { - return domain.ReconciliationJob{}, err - } - adapter, ok := reconcile.BuiltInRegistry(nil).Adapter(conn.Provider) - if !ok { - return domain.ReconciliationJob{}, app.ErrInvalidInput - } - caps := adapter.Capabilities(conn.Config) - job := domain.ReconciliationJob{ - ID: "dry_run", - TenantID: tenantID, - ConnectionID: conn.ID, - Provider: conn.Provider, - State: domain.ReconciliationJobStateCompleted, - DryRun: true, - CaptureMissing: req.CaptureMissing, - RouteRecovered: req.RouteRecovered, - RedeliverFailed: req.RedeliverFailed, - ScopeObjectID: req.ScopeObjectID, - WindowStart: req.WindowStart, - WindowEnd: req.WindowEnd, - Reason: req.Reason, - CreatedAt: time.Now().UTC(), - CompletedAt: time.Now().UTC(), - } - if !caps.CanScanEvents { - job.TotalItems = 1 - job.UnrecoverableItems = 1 - job.Error = strings.Join(caps.Limitations, "; ") - return job, nil - } - scan, err := adapter.Scan(ctx, reconcile.ScanRequest{ - Connection: reconcile.Connection{ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config}, - WindowStart: req.WindowStart, WindowEnd: req.WindowEnd, ScopeObjectID: req.ScopeObjectID, - CaptureMissing: req.CaptureMissing, RedeliverFailed: req.RedeliverFailed, - }) - if err != nil { - job.State = domain.ReconciliationJobStateFailed - job.Error = providerErrorForDB(err) - return job, nil - } - for _, object := range scan.Objects { - job.TotalItems++ - localID, err := s.findLocalProviderEvent(ctx, tenantID, conn, object.ID) - if err != nil { - return domain.ReconciliationJob{}, err - } - if localID != "" { - job.MatchedItems++ - } else { - job.MissingItems++ - } - if object.Failed && req.RedeliverFailed && object.Redeliverable { - job.RedeliveredItems++ - } - } - return normalizeReconciliationJob(job), nil -} - func (s *Store) CreateReconciliationJob(ctx context.Context, tenantID, actorID string, req app.ReconciliationJobRequest) (domain.ReconciliationJob, error) { conn, err := s.getProviderConnectionPublic(ctx, tenantID, req.ConnectionID) if err != nil { @@ -5004,7 +4992,7 @@ func (s *Store) CreateReconciliationJob(ctx context.Context, tenantID, actorID s } if !req.DryRun { payload, _ := json.Marshal(map[string]any{"job_id": id}) - if _, err := s.pool.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,'reconciliation_job',$3,$4)`, mustID("out"), tenantID, id, payload); err != nil { + if _, err := s.pool.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, mustID("out"), tenantID, app.OutboxKindReconciliationJob, id, payload); err != nil { return domain.ReconciliationJob{}, err } } @@ -5134,12 +5122,17 @@ func (s *Store) CreateAuditExport(ctx context.Context, tenantID, actorID string, return domain.EvidenceExport{}, err } files["audit_chain_proof.jsonl"] = chainProof + eventIDs, err := s.eventIDsForExport(ctx, tenantID, req.From, req.To) + if err != nil { + return domain.EvidenceExport{}, err + } bundle, err := evidence.BuildTarGzipBundle(evidence.Manifest{ ExportID: id, TenantID: tenantID, CreatedAt: now, - From: req.From, - To: req.To, + From: manifestTime(req.From), + To: manifestTime(req.To), + IncludedEvents: eventIDs, IncludeRawPayloads: req.IncludeRawPayloads, IncludeTimelines: req.IncludeTimelines, IncludePayloadBodies: req.IncludePayloadBodies, @@ -5289,7 +5282,7 @@ func (s *Store) DownloadAuditExport(ctx context.Context, tenantID, exportID, act if errors.Is(err, blobstore.ErrNotFound) { return app.EvidenceExportDownload{}, app.ErrGone } - return app.EvidenceExportDownload{}, err + return app.EvidenceExportDownload{}, errObjectStoreReadFailed } } if len(body) == 0 { @@ -5298,7 +5291,9 @@ func (s *Store) DownloadAuditExport(ctx context.Context, tenantID, exportID, act if evidence.SHA256(body) != out.SHA256 { return app.EvidenceExportDownload{}, errors.New("audit export bundle hash mismatch") } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "audit_export.downloaded", Resource: "audit_export", ResourceID: exportID}) + if err := s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "audit_export.downloaded", Resource: "audit_export", ResourceID: exportID}); err != nil { + return app.EvidenceExportDownload{}, err + } out = normalizeEvidenceExportTimes(out) return app.EvidenceExportDownload{ Export: out, @@ -5338,7 +5333,7 @@ func (s *Store) ListDeadLetter(ctx context.Context, tenantID string, limit int) return listRows(ctx, s.pool, `SELECT id, delivery_id, event_id, reason, state, created_at FROM dead_letter_entries WHERE tenant_id=$1 ORDER BY created_at DESC LIMIT $2`, tenantID, limit) } -func (s *Store) ReleaseDeadLetter(ctx context.Context, tenantID, entryID, actorID, reason string) (app.ReplayJob, error) { +func (s *Store) ReleaseDeadLetter(ctx context.Context, tenantID, entryID, actorID, reasonCode, reason string) (app.ReplayJob, error) { var deliveryID, eventID string err := s.pool.QueryRow(ctx, `SELECT COALESCE(delivery_id,''), COALESCE(event_id,'') FROM dead_letter_entries WHERE tenant_id=$1 AND id=$2 AND state='open'`, tenantID, entryID).Scan(&deliveryID, &eventID) if errors.Is(err, pgx.ErrNoRows) { @@ -5347,19 +5342,29 @@ func (s *Store) ReleaseDeadLetter(ctx context.Context, tenantID, entryID, actorI if err != nil { return app.ReplayJob{}, err } - req := app.ReplayRequest{DeliveryID: deliveryID, EventID: eventID, Reason: reason} + req := app.ReplayRequest{DeliveryID: deliveryID, EventID: eventID, ReasonCode: reasonCode, Reason: reason} job, err := s.CreateReplay(ctx, tenantID, actorID, req) if err != nil { return app.ReplayJob{}, err } - if _, err := s.pool.Exec(ctx, `UPDATE dead_letter_entries SET state='released' WHERE tenant_id=$1 AND id=$2`, tenantID, entryID); err != nil { + tx, err := s.pool.Begin(ctx) + if err != nil { + return app.ReplayJob{}, err + } + defer rollback(ctx, tx) + if _, err := tx.Exec(ctx, `UPDATE dead_letter_entries SET state='released' WHERE tenant_id=$1 AND id=$2`, tenantID, entryID); err != nil { + return app.ReplayJob{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "dead_letter.released", Resource: "dead_letter_entry", ResourceID: entryID, Reason: reason}); err != nil { + return app.ReplayJob{}, err + } + if err := tx.Commit(ctx); err != nil { return app.ReplayJob{}, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "dead_letter.released", Resource: "dead_letter_entry", ResourceID: entryID, Reason: reason}) return job, nil } -func (s *Store) BulkReleaseDeadLetter(ctx context.Context, tenantID string, entryIDs []string, actorID, reason string) ([]app.ReplayJob, error) { +func (s *Store) BulkReleaseDeadLetter(ctx context.Context, tenantID string, entryIDs []string, actorID, reasonCode, reason string) ([]app.ReplayJob, error) { if len(entryIDs) == 0 { rows, err := s.pool.Query(ctx, `SELECT id FROM dead_letter_entries WHERE tenant_id=$1 AND state='open' ORDER BY created_at ASC LIMIT 100`, tenantID) if err != nil { @@ -5380,7 +5385,7 @@ func (s *Store) BulkReleaseDeadLetter(ctx context.Context, tenantID string, entr } jobs := make([]app.ReplayJob, 0, len(entryIDs)) for _, entryID := range entryIDs { - job, err := s.ReleaseDeadLetter(ctx, tenantID, entryID, actorID, reason) + job, err := s.ReleaseDeadLetter(ctx, tenantID, entryID, actorID, reasonCode, reason) if err != nil { return jobs, err } @@ -5394,36 +5399,260 @@ func (s *Store) ListQuarantine(ctx context.Context, tenantID string, limit int) } func (s *Store) ApproveQuarantine(ctx context.Context, tenantID, entryID, actorID, reason string, routeAfterRelease bool) (map[string]any, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return nil, err + } + defer rollback(ctx, tx) var eventID string - err := s.pool.QueryRow(ctx, `UPDATE quarantine_entries SET state='approved' WHERE tenant_id=$1 AND id=$2 AND state='open' RETURNING COALESCE(event_id,'')`, tenantID, entryID).Scan(&eventID) + err = tx.QueryRow(ctx, `UPDATE quarantine_entries SET state='approved' WHERE tenant_id=$1 AND id=$2 AND state='open' RETURNING COALESCE(event_id,'')`, tenantID, entryID).Scan(&eventID) if errors.Is(err, pgx.ErrNoRows) { return nil, app.ErrNotFound } if err != nil { return nil, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "quarantine.approved", Resource: "quarantine_entry", ResourceID: entryID, Reason: reason}) + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "quarantine.approved", Resource: "quarantine_entry", ResourceID: entryID, Reason: reason}); err != nil { + return nil, err + } if routeAfterRelease && eventID != "" { - if _, err := s.createDeliveriesForEvent(ctx, tenantID, eventID); err != nil { + payload, _ := json.Marshal(map[string]any{"event_id": eventID, "allow_recovered": false}) + if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, mustID("out"), tenantID, app.OutboxKindRouteEvent, eventID, payload); err != nil { return nil, err } } + if err := tx.Commit(ctx); err != nil { + return nil, err + } return map[string]any{"id": entryID, "event_id": eventID, "state": "approved"}, nil } func (s *Store) RejectQuarantine(ctx context.Context, tenantID, entryID, actorID, reason string) (map[string]any, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return nil, err + } + defer rollback(ctx, tx) var eventID string - err := s.pool.QueryRow(ctx, `UPDATE quarantine_entries SET state='rejected' WHERE tenant_id=$1 AND id=$2 AND state='open' RETURNING COALESCE(event_id,'')`, tenantID, entryID).Scan(&eventID) + err = tx.QueryRow(ctx, `UPDATE quarantine_entries SET state='rejected' WHERE tenant_id=$1 AND id=$2 AND state='open' RETURNING COALESCE(event_id,'')`, tenantID, entryID).Scan(&eventID) if errors.Is(err, pgx.ErrNoRows) { return nil, app.ErrNotFound } if err != nil { return nil, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "quarantine.rejected", Resource: "quarantine_entry", ResourceID: entryID, Reason: reason}) + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "quarantine.rejected", Resource: "quarantine_entry", ResourceID: entryID, Reason: reason}); err != nil { + return nil, err + } + if err := tx.Commit(ctx); err != nil { + return nil, err + } return map[string]any{"id": entryID, "event_id": eventID, "state": "rejected"}, nil } +const replayApprovalPolicyColumns = `id, tenant_id, scope_type, scope_id, require_approval, default_expiry_seconds, state, reason, created_by, created_at, updated_at` + +func scanReplayApprovalPolicy(scanner rowScanner) (domain.ReplayApprovalPolicy, error) { + var item domain.ReplayApprovalPolicy + if err := scanner.Scan(&item.ID, &item.TenantID, &item.ScopeType, &item.ScopeID, &item.RequireApproval, &item.DefaultExpirySeconds, &item.State, &item.Reason, &item.CreatedBy, &item.CreatedAt, &item.UpdatedAt); err != nil { + return domain.ReplayApprovalPolicy{}, err + } + item.CreatedAt = item.CreatedAt.UTC() + item.UpdatedAt = item.UpdatedAt.UTC() + return item, nil +} + +func (s *Store) CreateReplayApprovalPolicy(ctx context.Context, tenantID, actorID string, req app.CreateReplayApprovalPolicyRequest) (domain.ReplayApprovalPolicy, error) { + scopeType := strings.TrimSpace(req.ScopeType) + scopeID := strings.TrimSpace(req.ScopeID) + if scopeType == app.ReplayApprovalScopeTenant { + scopeID = "" + } + if req.DefaultExpirySeconds == 0 { + req.DefaultExpirySeconds = int(app.ReplayApprovalDefaultExpiry / time.Second) + } + if !req.RequireApproval { + req.RequireApproval = true + } + id := mustID("rap") + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.ReplayApprovalPolicy{}, err + } + defer rollback(ctx, tx) + item, err := scanReplayApprovalPolicy(tx.QueryRow(ctx, ` + INSERT INTO replay_approval_policies(id, tenant_id, scope_type, scope_id, require_approval, default_expiry_seconds, state, reason, created_by) + VALUES($1,$2,$3,$4,$5,$6,'active',$7,$8) + ON CONFLICT (tenant_id, scope_type, scope_id) DO UPDATE + SET require_approval=EXCLUDED.require_approval, + default_expiry_seconds=EXCLUDED.default_expiry_seconds, + state='active', + reason=EXCLUDED.reason, + updated_at=now() + RETURNING `+replayApprovalPolicyColumns, + id, tenantID, scopeType, scopeID, req.RequireApproval, req.DefaultExpirySeconds, strings.TrimSpace(req.Reason), actorID, + )) + if err != nil { + return domain.ReplayApprovalPolicy{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "replay_approval_policy.upserted", Resource: "replay_approval_policy", ResourceID: item.ID, Reason: req.Reason}); err != nil { + return domain.ReplayApprovalPolicy{}, err + } + return item, tx.Commit(ctx) +} + +func (s *Store) ListReplayApprovalPolicies(ctx context.Context, tenantID string, limit int) ([]domain.ReplayApprovalPolicy, error) { + rows, err := s.pool.Query(ctx, ` + SELECT `+replayApprovalPolicyColumns+` + FROM replay_approval_policies + WHERE tenant_id=$1 + ORDER BY scope_type, scope_id, updated_at DESC + LIMIT $2`, tenantID, limit) + if err != nil { + return nil, err + } + defer rows.Close() + var out []domain.ReplayApprovalPolicy + for rows.Next() { + item, err := scanReplayApprovalPolicy(rows) + if err != nil { + return nil, err + } + out = append(out, item) + } + return out, rows.Err() +} + +func (s *Store) DisableReplayApprovalPolicy(ctx context.Context, tenantID, policyID, actorID, reason string) (domain.ReplayApprovalPolicy, error) { + tx, err := s.pool.Begin(ctx) + if err != nil { + return domain.ReplayApprovalPolicy{}, err + } + defer rollback(ctx, tx) + item, err := scanReplayApprovalPolicy(tx.QueryRow(ctx, ` + UPDATE replay_approval_policies + SET state='disabled', reason=$3, updated_at=now() + WHERE tenant_id=$1 AND id=$2 + RETURNING `+replayApprovalPolicyColumns, + tenantID, policyID, strings.TrimSpace(reason), + )) + if errors.Is(err, pgx.ErrNoRows) { + return domain.ReplayApprovalPolicy{}, app.ErrNotFound + } + if err != nil { + return domain.ReplayApprovalPolicy{}, err + } + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "replay_approval_policy.disabled", Resource: "replay_approval_policy", ResourceID: policyID, Reason: reason}); err != nil { + return domain.ReplayApprovalPolicy{}, err + } + return item, tx.Commit(ctx) +} + +func (s *Store) replayApprovalPolicyForReplay(ctx context.Context, tenantID string, req app.ReplayRequest) (domain.ReplayApprovalPolicy, bool, error) { + var sourceID string + var routeIDs []string + if req.DeliveryID != "" { + var routeID string + err := s.pool.QueryRow(ctx, ` + SELECT COALESCE(d.route_id,''), e.source_id + FROM deliveries d + JOIN events e ON e.tenant_id=d.tenant_id AND e.id=d.event_id + WHERE d.tenant_id=$1 AND d.id=$2`, tenantID, req.DeliveryID).Scan(&routeID, &sourceID) + if err != nil && !errors.Is(err, pgx.ErrNoRows) { + return domain.ReplayApprovalPolicy{}, false, err + } + if routeID != "" { + routeIDs = append(routeIDs, routeID) + } + } + if req.EventID != "" { + var eventType string + err := s.pool.QueryRow(ctx, `SELECT source_id, type FROM events WHERE tenant_id=$1 AND id=$2`, tenantID, req.EventID).Scan(&sourceID, &eventType) + if err != nil && !errors.Is(err, pgx.ErrNoRows) { + return domain.ReplayApprovalPolicy{}, false, err + } + if sourceID != "" { + rows, err := s.pool.Query(ctx, ` + SELECT id + FROM routes + WHERE tenant_id=$1 AND source_id=$2 AND state='active' AND $3 = ANY(event_types)`, tenantID, sourceID, eventType) + if err != nil { + return domain.ReplayApprovalPolicy{}, false, err + } + defer rows.Close() + for rows.Next() { + var routeID string + if err := rows.Scan(&routeID); err != nil { + return domain.ReplayApprovalPolicy{}, false, err + } + routeIDs = append(routeIDs, routeID) + } + if err := rows.Err(); err != nil { + return domain.ReplayApprovalPolicy{}, false, err + } + } + if req.ConfigMode == app.ReplayConfigOriginal { + rows, err := s.pool.Query(ctx, ` + SELECT DISTINCT COALESCE(route_id,'') + FROM deliveries + WHERE tenant_id=$1 AND event_id=$2 AND COALESCE(route_id,'') <> ''`, tenantID, req.EventID) + if err != nil { + return domain.ReplayApprovalPolicy{}, false, err + } + defer rows.Close() + for rows.Next() { + var routeID string + if err := rows.Scan(&routeID); err != nil { + return domain.ReplayApprovalPolicy{}, false, err + } + routeIDs = append(routeIDs, routeID) + } + if err := rows.Err(); err != nil { + return domain.ReplayApprovalPolicy{}, false, err + } + } + } + if len(routeIDs) > 0 { + item, err := scanReplayApprovalPolicy(s.pool.QueryRow(ctx, ` + SELECT `+replayApprovalPolicyColumns+` + FROM replay_approval_policies + WHERE tenant_id=$1 AND scope_type='route' AND scope_id=ANY($2::text[]) AND state='active' AND require_approval=true + ORDER BY updated_at DESC + LIMIT 1`, tenantID, routeIDs)) + if err == nil { + return item, true, nil + } + if !errors.Is(err, pgx.ErrNoRows) { + return domain.ReplayApprovalPolicy{}, false, err + } + } + if sourceID != "" { + item, err := scanReplayApprovalPolicy(s.pool.QueryRow(ctx, ` + SELECT `+replayApprovalPolicyColumns+` + FROM replay_approval_policies + WHERE tenant_id=$1 AND scope_type='source' AND scope_id=$2 AND state='active' AND require_approval=true + LIMIT 1`, tenantID, sourceID)) + if err == nil { + return item, true, nil + } + if !errors.Is(err, pgx.ErrNoRows) { + return domain.ReplayApprovalPolicy{}, false, err + } + } + item, err := scanReplayApprovalPolicy(s.pool.QueryRow(ctx, ` + SELECT `+replayApprovalPolicyColumns+` + FROM replay_approval_policies + WHERE tenant_id=$1 AND scope_type='tenant' AND scope_id='' AND state='active' AND require_approval=true + LIMIT 1`, tenantID)) + if err == nil { + return item, true, nil + } + if errors.Is(err, pgx.ErrNoRows) { + return domain.ReplayApprovalPolicy{}, false, nil + } + return domain.ReplayApprovalPolicy{}, false, err +} + func (s *Store) DryRunReplay(ctx context.Context, tenantID string, req app.ReplayRequest) (app.ReplayDryRun, error) { if req.ConfigMode == "" { req.ConfigMode = app.ReplayConfigCurrent @@ -5498,7 +5727,9 @@ func (s *Store) DryRunReplay(ctx context.Context, tenantID string, req app.Repla return app.ReplayDryRun{WouldReplayEvents: total, WouldCreateDeliveries: total, Warnings: warnings}, nil } -const replayJobSelectSQL = `SELECT id, state, scope_hash, config_mode, rate_limit_per_minute, total_items, processed_items, failed_items, approval_required, COALESCE(approved_by,''), approved_at FROM replay_jobs` +const replayJobColumns = `id, state, scope_hash, COALESCE(reason_code,'operator_requested'), reason, config_mode, rate_limit_per_minute, total_items, processed_items, failed_items, approval_required, approval_expires_at, COALESCE(approved_by,''), approved_at, created_by, created_at` + +const replayJobSelectSQL = `SELECT ` + replayJobColumns + ` FROM replay_jobs` type replayJobScanner interface { Scan(dest ...any) error @@ -5506,14 +5737,22 @@ type replayJobScanner interface { func scanReplayJob(scanner replayJobScanner) (app.ReplayJob, error) { var item app.ReplayJob + var approvalExpiresAt sql.NullTime var approvedAt sql.NullTime - if err := scanner.Scan(&item.ID, &item.State, &item.ScopeHash, &item.ConfigMode, &item.RateLimitPerMinute, &item.TotalItems, &item.ProcessedItems, &item.FailedItems, &item.ApprovalRequired, &item.ApprovedBy, &approvedAt); err != nil { + var createdAt time.Time + if err := scanner.Scan(&item.ID, &item.State, &item.ScopeHash, &item.ReasonCode, &item.Reason, &item.ConfigMode, &item.RateLimitPerMinute, &item.TotalItems, &item.ProcessedItems, &item.FailedItems, &item.ApprovalRequired, &approvalExpiresAt, &item.ApprovedBy, &approvedAt, &item.CreatedBy, &createdAt); err != nil { return app.ReplayJob{}, err } + if approvalExpiresAt.Valid { + t := approvalExpiresAt.Time.UTC() + item.ApprovalExpiresAt = &t + } if approvedAt.Valid { t := approvedAt.Time.UTC() item.ApprovedAt = &t } + created := createdAt.UTC() + item.CreatedAt = &created return item, nil } @@ -5521,6 +5760,36 @@ func (s *Store) CreateReplay(ctx context.Context, tenantID, actorID string, req if req.ConfigMode == "" { req.ConfigMode = app.ReplayConfigCurrent } + req.ReasonCode = strings.TrimSpace(req.ReasonCode) + if req.ReasonCode == "" { + req.ReasonCode = app.ReplayReasonOperatorRequested + } + req.Reason = strings.TrimSpace(req.Reason) + if policy, ok, err := s.replayApprovalPolicyForReplay(ctx, tenantID, req); err != nil { + return app.ReplayJob{}, err + } else if ok && policy.RequireApproval { + req.RequireApproval = true + if req.ApprovalExpiresAt == nil { + defaultExpirySeconds := policy.DefaultExpirySeconds + if defaultExpirySeconds <= 0 { + defaultExpirySeconds = int(app.ReplayApprovalDefaultExpiry / time.Second) + } + approvalExpiresAt := time.Now().UTC().Add(time.Duration(defaultExpirySeconds) * time.Second) + req.ApprovalExpiresAt = &approvalExpiresAt + } + } + if req.RequireApproval { + if req.ApprovalExpiresAt == nil { + approvalExpiresAt := time.Now().UTC().Add(app.ReplayApprovalDefaultExpiry) + req.ApprovalExpiresAt = &approvalExpiresAt + } else { + approvalExpiresAt := req.ApprovalExpiresAt.UTC() + req.ApprovalExpiresAt = &approvalExpiresAt + } + if !req.ApprovalExpiresAt.After(time.Now().UTC()) { + return app.ReplayJob{}, fmt.Errorf("%w: approval_expires_at must be in the future", app.ErrInvalidInput) + } + } id := mustID("rpl") scopeBytes, _ := json.Marshal(req) scopeHash := domain.HashSHA256(scopeBytes) @@ -5537,21 +5806,21 @@ func (s *Store) CreateReplay(ctx context.Context, tenantID, actorID string, req if req.RequireApproval { state = "pending_approval" } - if _, err := tx.Exec(ctx, `INSERT INTO replay_jobs(id, tenant_id, state, scope_hash, scope_json, reason, created_by, total_items, config_mode, rate_limit_per_minute, approval_required) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)`, id, tenantID, state, scopeHash, scopeBytes, req.Reason, actorID, dryRun.WouldCreateDeliveries, req.ConfigMode, req.RateLimitPerMinute, req.RequireApproval); err != nil { + if _, err := tx.Exec(ctx, `INSERT INTO replay_jobs(id, tenant_id, state, scope_hash, scope_json, reason_code, reason, created_by, total_items, config_mode, rate_limit_per_minute, approval_required, approval_expires_at) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13)`, id, tenantID, state, scopeHash, scopeBytes, req.ReasonCode, req.Reason, actorID, dryRun.WouldCreateDeliveries, req.ConfigMode, req.RateLimitPerMinute, req.RequireApproval, req.ApprovalExpiresAt); err != nil { return app.ReplayJob{}, err } if !req.RequireApproval { - if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,'replay_job',$3,$4)`, mustID("out"), tenantID, id, scopeBytes); err != nil { + if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, mustID("out"), tenantID, app.OutboxKindReplayJob, id, scopeBytes); err != nil { return app.ReplayJob{}, err } } - if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "replay.created", Resource: "replay_job", ResourceID: id, Reason: req.Reason}); err != nil { + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "replay.created", Resource: "replay_job", ResourceID: id, Reason: replayAuditReason(req)}); err != nil { return app.ReplayJob{}, err } if err := tx.Commit(ctx); err != nil { return app.ReplayJob{}, err } - return app.ReplayJob{ID: id, State: state, ScopeHash: scopeHash, ConfigMode: req.ConfigMode, RateLimitPerMinute: req.RateLimitPerMinute, TotalItems: dryRun.WouldCreateDeliveries, ApprovalRequired: req.RequireApproval}, nil + return app.ReplayJob{ID: id, State: state, ScopeHash: scopeHash, ReasonCode: req.ReasonCode, Reason: req.Reason, ConfigMode: req.ConfigMode, RateLimitPerMinute: req.RateLimitPerMinute, TotalItems: dryRun.WouldCreateDeliveries, ApprovalRequired: req.RequireApproval, ApprovalExpiresAt: req.ApprovalExpiresAt, CreatedBy: actorID}, nil } func (s *Store) ListReplayJobs(ctx context.Context, tenantID string, limit int) ([]app.ReplayJob, error) { @@ -5571,6 +5840,33 @@ func (s *Store) ListReplayJobs(ctx context.Context, tenantID string, limit int) return out, rows.Err() } +func replayAuditReason(req app.ReplayRequest) string { + parts := []string{ + "reason_code=" + req.ReasonCode, + "reason=" + req.Reason, + "config_mode=" + req.ConfigMode, + } + if req.EventID != "" { + parts = append(parts, "event_id="+req.EventID) + } + if req.DeliveryID != "" { + parts = append(parts, "delivery_id="+req.DeliveryID) + } + if req.EndpointID != "" { + parts = append(parts, "endpoint_id="+req.EndpointID) + } + if req.RequireApproval { + parts = append(parts, "approval_required=true") + if req.ApprovalExpiresAt != nil { + parts = append(parts, "approval_expires_at="+req.ApprovalExpiresAt.UTC().Format(time.RFC3339)) + } + } + if req.RateLimitPerMinute > 0 { + parts = append(parts, "rate_limit_per_minute="+strconv.Itoa(req.RateLimitPerMinute)) + } + return strings.Join(parts, " ") +} + func (s *Store) ApproveReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (app.ReplayJob, error) { tx, err := s.pool.Begin(ctx) if err != nil { @@ -5579,7 +5875,7 @@ func (s *Store) ApproveReplayJob(ctx context.Context, tenantID, replayJobID, act defer rollback(ctx, tx) var scopeBytes []byte - err = tx.QueryRow(ctx, `SELECT scope_json FROM replay_jobs WHERE tenant_id=$1 AND id=$2 AND state='pending_approval' AND approval_required=true FOR UPDATE`, tenantID, replayJobID).Scan(&scopeBytes) + err = tx.QueryRow(ctx, `SELECT scope_json FROM replay_jobs WHERE tenant_id=$1 AND id=$2 AND created_by<>$3 AND state='pending_approval' AND approval_required=true AND approval_expires_at > now() FOR UPDATE`, tenantID, replayJobID, actorID).Scan(&scopeBytes) if errors.Is(err, pgx.ErrNoRows) { return app.ReplayJob{}, app.ErrNotFound } @@ -5589,14 +5885,14 @@ func (s *Store) ApproveReplayJob(ctx context.Context, tenantID, replayJobID, act item, err := scanReplayJob(tx.QueryRow(ctx, ` UPDATE replay_jobs SET state='scheduled', approved_by=$1, approved_at=now(), approval_reason=$2 - WHERE tenant_id=$3 AND id=$4 AND state='pending_approval' AND approval_required=true - RETURNING id, state, scope_hash, config_mode, rate_limit_per_minute, total_items, processed_items, failed_items, approval_required, COALESCE(approved_by,''), approved_at`, + WHERE tenant_id=$3 AND id=$4 AND state='pending_approval' AND approval_required=true AND created_by<>$1 AND approval_expires_at > now() + RETURNING `+replayJobColumns, actorID, reason, tenantID, replayJobID, )) if err != nil { return app.ReplayJob{}, err } - if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,'replay_job',$3,$4)`, mustID("out"), tenantID, replayJobID, scopeBytes); err != nil { + if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, mustID("out"), tenantID, app.OutboxKindReplayJob, replayJobID, scopeBytes); err != nil { return app.ReplayJob{}, err } if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: "replay.approved", Resource: "replay_job", ResourceID: replayJobID, Reason: reason}); err != nil { @@ -5632,14 +5928,24 @@ func (s *Store) updateReplayState(ctx context.Context, tenantID, replayJobID, ac if state == "scheduled" { stateGuard = "state NOT IN ('completed','canceled','pending_approval')" } - item, err := scanReplayJob(s.pool.QueryRow(ctx, `UPDATE replay_jobs SET state=$1`+extra+` WHERE tenant_id=$2 AND id=$3 AND `+stateGuard+` RETURNING id, state, scope_hash, config_mode, rate_limit_per_minute, total_items, processed_items, failed_items, approval_required, COALESCE(approved_by,''), approved_at`, state, tenantID, replayJobID)) + tx, err := s.pool.Begin(ctx) + if err != nil { + return app.ReplayJob{}, err + } + defer rollback(ctx, tx) + item, err := scanReplayJob(tx.QueryRow(ctx, `UPDATE replay_jobs SET state=$1`+extra+` WHERE tenant_id=$2 AND id=$3 AND `+stateGuard+` RETURNING `+replayJobColumns, state, tenantID, replayJobID)) if errors.Is(err, pgx.ErrNoRows) { return app.ReplayJob{}, app.ErrNotFound } if err != nil { return app.ReplayJob{}, err } - _ = s.recordAuditEvent(ctx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: action, Resource: "replay_job", ResourceID: replayJobID, Reason: reason}) + if _, err := recordAuditEventTx(ctx, tx, auditEventInput{TenantID: tenantID, ActorID: actorID, Action: action, Resource: "replay_job", ResourceID: replayJobID, Reason: reason}); err != nil { + return app.ReplayJob{}, err + } + if err := tx.Commit(ctx); err != nil { + return app.ReplayJob{}, err + } return item, nil } @@ -5658,7 +5964,7 @@ func (s *Store) ClaimOutbox(ctx context.Context, workerID string, limit int) ([] rows, err := tx.Query(ctx, ` WITH candidates AS ( SELECT id, tenant_id, available_at, - CASE kind WHEN 'route_event' THEN 0 WHEN 'replay_job' THEN 1 ELSE 2 END AS priority + CASE kind WHEN 'route_event' THEN 0 WHEN 'route_recovered_event' THEN 0 WHEN 'replay_job' THEN 1 ELSE 2 END AS priority FROM outbox WHERE state='pending' AND available_at <= now() ), @@ -5708,287 +6014,304 @@ func (s *Store) CompleteOutbox(ctx context.Context, outboxID string) error { return err } -func (s *Store) ProcessOutbox(ctx context.Context, item worker.OutboxItem) error { - switch item.Kind { - case "route_event": - _, err := s.createDeliveriesForEvent(ctx, item.TenantID, item.ResourceID) - return err - case "replay_job": - return s.createReplayDeliveries(ctx, item.TenantID, item.ResourceID) - case "reconciliation_job": - return s.RunReconciliationJob(ctx, item.TenantID, item.ResourceID) - default: - return nil +func (s *Store) enqueueRouteEvent(ctx context.Context, tenantID, eventID string, allowRecovered bool) error { + kind := app.OutboxKindRouteEvent + if allowRecovered { + kind = app.OutboxKindRouteRecoveredEvent } + payload, _ := json.Marshal(map[string]any{"event_id": eventID, "allow_recovered": allowRecovered}) + _, err := s.pool.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, mustID("out"), tenantID, kind, eventID, payload) + return err } -func (s *Store) RunReconciliationJob(ctx context.Context, tenantID, jobID string) error { - job, err := scanReconciliationJob(s.pool.QueryRow(ctx, reconciliationJobSelectSQL()+` WHERE tenant_id=$1 AND id=$2`, tenantID, jobID)) - if errors.Is(err, pgx.ErrNoRows) { - return app.ErrNotFound - } +func (s *Store) ListDeliveryFanoutTargets(ctx context.Context, tenantID, sourceID, eventType string) ([]app.DeliveryFanoutTarget, error) { + subRows, err := s.pool.Query(ctx, ` + SELECT s.id, s.endpoint_id, s.active_version_id, COALESCE(NULLIF(e.retry_policy_id,''),''), COALESCE(NULLIF(s.transformation_version_id,''),'') + FROM subscriptions s + JOIN endpoints e ON e.tenant_id=s.tenant_id AND e.id=s.endpoint_id + WHERE s.tenant_id=$1 AND s.state='active' AND $2 = ANY(s.event_types)`, tenantID, eventType) if err != nil { - return err + return nil, err } - job = normalizeReconciliationJob(job) - if job.State == domain.ReconciliationJobStateCanceled || job.State == domain.ReconciliationJobStateCompleted { - return nil + var out []app.DeliveryFanoutTarget + for subRows.Next() { + var target app.DeliveryFanoutTarget + if err := subRows.Scan(&target.SubscriptionID, &target.EndpointID, &target.SubscriptionVersionID, &target.EndpointRetryPolicyID, &target.TransformationVersionID); err != nil { + subRows.Close() + return nil, err + } + out = append(out, target) } - conn, credential, err := s.getProviderConnectionSecret(ctx, tenantID, job.ConnectionID) + subRows.Close() + if err := subRows.Err(); err != nil { + return nil, err + } + routeRows, err := s.pool.Query(ctx, ` + SELECT r.id, r.endpoint_id, r.active_version_id, COALESCE(NULLIF(r.retry_policy_id,''),''), COALESCE(NULLIF(e.retry_policy_id,''),''), COALESCE(NULLIF(r.transformation_version_id,''),'') + FROM routes r + JOIN endpoints e ON e.tenant_id=r.tenant_id AND e.id=r.endpoint_id + WHERE r.tenant_id=$1 AND r.source_id=$2 AND r.state='active' AND $3 = ANY(r.event_types) + ORDER BY r.priority ASC`, tenantID, sourceID, eventType) if err != nil { - return s.failReconciliationJob(ctx, tenantID, jobID, err) + return nil, err } - adapter, ok := reconcile.BuiltInRegistry(nil).Adapter(conn.Provider) - if !ok { - return s.failReconciliationJob(ctx, tenantID, jobID, fmt.Errorf("unsupported provider %q", conn.Provider)) + defer routeRows.Close() + for routeRows.Next() { + var target app.DeliveryFanoutTarget + if err := routeRows.Scan(&target.RouteID, &target.EndpointID, &target.RouteVersionID, &target.RouteRetryPolicyID, &target.EndpointRetryPolicyID, &target.TransformationVersionID); err != nil { + return nil, err + } + out = append(out, target) } - if _, err := s.pool.Exec(ctx, `UPDATE reconciliation_jobs SET state='running', started_at=COALESCE(started_at, now()) WHERE tenant_id=$1 AND id=$2 AND state='scheduled'`, tenantID, jobID); err != nil { - return err + return out, routeRows.Err() +} + +func (s *Store) CreateDeliverySnapshot(ctx context.Context, req app.DeliverySnapshotRequest) (app.DeliverySnapshotResult, error) { + deliveryID := mustID("del") + retrySeed := req.RetrySeed + if retrySeed == "" { + retrySeed = deliveryRetrySeed(req.TenantID, deliveryID, req.EventID, req.EndpointID) } - caps := adapter.Capabilities(conn.Config) - if !caps.CanScanEvents { - metadata, _ := json.Marshal(map[string]any{"limitations": caps.Limitations}) - if _, err := s.insertReconciliationItem(ctx, reconciliationItemInput{ - tenantID: tenantID, jobID: jobID, provider: conn.Provider, objectID: conn.Provider + ":unsupported", objectType: "capability", - outcome: domain.ReconciliationOutcomeUnrecoverable, errText: strings.Join(caps.Limitations, "; "), metadata: metadata, - }); err != nil { - return err - } - return s.completeReconciliationJob(ctx, tenantID, jobID) + tx, err := s.pool.Begin(ctx) + if err != nil { + return app.DeliverySnapshotResult{}, err } - scan, err := adapter.Scan(ctx, reconcile.ScanRequest{ - Connection: reconcile.Connection{ - ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config, - }, - WindowStart: job.WindowStart, WindowEnd: job.WindowEnd, ScopeObjectID: job.ScopeObjectID, Cursor: job.Cursor, - CaptureMissing: job.CaptureMissing, RedeliverFailed: job.RedeliverFailed, - }) - for _, ev := range scan.Evidence { - if _, recErr := s.insertProviderAPIEvidence(ctx, tenantID, jobID, "", conn.ID, conn.Provider, ev); recErr != nil { - return recErr - } + defer rollback(ctx, tx) + if _, err := tx.Exec(ctx, ` + INSERT INTO deliveries(id, tenant_id, event_id, endpoint_id, route_id, route_version_id, subscription_id, subscription_version_id, retry_policy_id, replay_job_id, adapter_version_id, normalized_envelope_id, transformation_version_id, retry_seed, state, next_attempt_at) + VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,'scheduled',$15)`, + deliveryID, req.TenantID, req.EventID, req.EndpointID, req.RouteID, req.RouteVersionID, + req.SubscriptionID, req.SubscriptionVersionID, req.RetryPolicyID, req.ReplayJobID, + req.AdapterVersionID, req.NormalizedEnvelopeID, req.TransformationVersionID, retrySeed, req.NextAttemptAt, + ); err != nil { + return app.DeliverySnapshotResult{}, err + } + var payloadID, normalizedID, adapterVersionID, transformationVersionID string + switch req.DeliveryPayloadMode { + case app.DeliveryPayloadClone: + payloadID, normalizedID, adapterVersionID, transformationVersionID, err = s.cloneDeliveryPayload(ctx, tx, req.TenantID, req.SourceDeliveryPayloadID, deliveryID) + default: + payloadID, normalizedID, adapterVersionID, err = s.createDeliveryPayload(ctx, tx, req.TenantID, req.EventID, deliveryID, req.TransformationVersionID) + transformationVersionID = req.TransformationVersionID } if err != nil { - return s.failReconciliationJob(ctx, tenantID, jobID, err) + return app.DeliverySnapshotResult{}, err } - for _, object := range scan.Objects { - if err := s.reconcileProviderObject(ctx, job, conn, credential, adapter, object); err != nil { - return s.failReconciliationJob(ctx, tenantID, jobID, err) - } + payloadHash, err := s.deliveryPayloadSHA256(ctx, tx, req.TenantID, payloadID) + if err != nil { + return app.DeliverySnapshotResult{}, err } - if scan.NextCursor != "" { - if _, err := s.pool.Exec(ctx, `UPDATE reconciliation_jobs SET cursor=$1 WHERE tenant_id=$2 AND id=$3`, scan.NextCursor, tenantID, jobID); err != nil { - return err + result := app.DeliverySnapshotResult{ + DeliveryID: deliveryID, + DeliveryPayloadID: payloadID, + DeliveryPayloadSHA256: payloadHash, + AdapterVersionID: adapterVersionID, + NormalizedEnvelopeID: normalizedID, + TransformationVersionID: transformationVersionID, + } + if req.ReplayJobID != "" { + if err := insertReplayDecisionEvidence(ctx, tx, app.ReplayDecisionEvidence{ + TenantID: req.TenantID, ReplayJobID: req.ReplayJobID, EventID: req.EventID, + OriginalDeliveryID: req.OriginalDeliveryID, NewDeliveryID: result.DeliveryID, ConfigMode: req.ConfigMode, + RouteVersionID: req.RouteVersionID, SubscriptionVersionID: req.SubscriptionVersionID, RetryPolicyID: req.RetryPolicyID, + AdapterVersionID: result.AdapterVersionID, NormalizedEnvelopeID: result.NormalizedEnvelopeID, + TransformationVersionID: result.TransformationVersionID, DeliveryPayloadID: result.DeliveryPayloadID, DeliveryPayloadSHA256: result.DeliveryPayloadSHA256, + }); err != nil { + return app.DeliverySnapshotResult{}, err } } - return s.completeReconciliationJob(ctx, tenantID, jobID) + if err := tx.Commit(ctx); err != nil { + return app.DeliverySnapshotResult{}, err + } + return result, nil } -func (s *Store) reconcileProviderObject(ctx context.Context, job domain.ReconciliationJob, conn domain.ProviderConnection, credential string, adapter reconcile.Adapter, object reconcile.ProviderObject) error { - tenantID := job.TenantID - localEventID, err := s.findLocalProviderEvent(ctx, tenantID, conn, object.ID) +func (s *Store) GetReplayJobWork(ctx context.Context, tenantID, replayJobID string) (app.ReplayJobWork, error) { + var work app.ReplayJobWork + var scopeBytes []byte + err := s.pool.QueryRow(ctx, `SELECT scope_json, state, config_mode, rate_limit_per_minute FROM replay_jobs WHERE tenant_id=$1 AND id=$2`, tenantID, replayJobID). + Scan(&scopeBytes, &work.State, &work.ConfigMode, &work.RateLimitPerMinute) + if errors.Is(err, pgx.ErrNoRows) { + return app.ReplayJobWork{}, app.ErrNotFound + } if err != nil { - return err + return app.ReplayJobWork{}, err } - outcome := domain.ReconciliationOutcomeMatched - if localEventID == "" { - outcome = domain.ReconciliationOutcomeMissing - } - metadata, _ := json.Marshal(object.Metadata) - var evidenceID string - var recoveredEventID string - var errText string - if localEventID == "" && job.CaptureMissing { - lookupObject := object - lookupEvidence := []reconcile.Evidence(nil) - if len(lookupObject.RawBody) == 0 || !lookupObject.Recoverable { - lookupID := providerLookupID(object) - lookedUp, evs, lookupErr := adapter.Lookup(ctx, reconcile.Connection{ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config}, lookupID) - lookupEvidence = evs - if lookupErr == nil { - lookupObject = lookedUp - } else if errors.Is(lookupErr, reconcile.ErrUnsupported) { - outcome = domain.ReconciliationOutcomeUnrecoverable - errText = "provider does not expose recoverable payload evidence for this object" - } else { - outcome = domain.ReconciliationOutcomeFailed - errText = providerErrorForDB(lookupErr) - } - } - for _, ev := range lookupEvidence { - id, recErr := s.insertProviderAPIEvidence(ctx, tenantID, job.ID, "", conn.ID, conn.Provider, ev) - if recErr != nil { - return recErr - } - evidenceID = id - } - if outcome == domain.ReconciliationOutcomeMissing && lookupObject.Recoverable && len(lookupObject.RawBody) > 0 { - recoveredEventID, err = s.captureRecoveredProviderEvent(ctx, conn, lookupObject, job.RouteRecovered) - if err != nil { - outcome = domain.ReconciliationOutcomeFailed - errText = err.Error() - } else { - outcome = domain.ReconciliationOutcomeCaptured - } - } else if outcome == domain.ReconciliationOutcomeMissing { - outcome = domain.ReconciliationOutcomeUnrecoverable - errText = "provider API did not include a recoverable payload body" - } - } - redeliveryRequested := false - if job.RedeliverFailed && object.Failed && object.Redeliverable { - evs, redeliverErr := adapter.RequestRedelivery(ctx, reconcile.Connection{ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config}, providerLookupID(object)) - for _, ev := range evs { - id, recErr := s.insertProviderAPIEvidence(ctx, tenantID, job.ID, "", conn.ID, conn.Provider, ev) - if recErr != nil { - return recErr - } - evidenceID = id - } - if redeliverErr != nil { - outcome = domain.ReconciliationOutcomeFailed - errText = providerErrorForDB(redeliverErr) - } else { - outcome = domain.ReconciliationOutcomeRedeliveryRequested - redeliveryRequested = true - } + if err := json.Unmarshal(scopeBytes, &work.Request); err != nil { + return app.ReplayJobWork{}, err } - itemID, err := s.insertReconciliationItem(ctx, reconciliationItemInput{ - tenantID: tenantID, jobID: job.ID, provider: conn.Provider, objectID: object.ID, objectType: object.ObjectType, - outcome: outcome, localEventID: localEventID, recoveredEventID: recoveredEventID, evidenceID: evidenceID, - redeliveryRequested: redeliveryRequested, errText: errText, metadata: metadata, - }) - if err != nil { - return err + if work.Request.ConfigMode == "" { + work.Request.ConfigMode = work.ConfigMode } - if evidenceID != "" { - _, _ = s.pool.Exec(ctx, `UPDATE provider_api_evidence SET item_id=$1 WHERE tenant_id=$2 AND id=$3`, itemID, tenantID, evidenceID) + if work.Request.RateLimitPerMinute == 0 { + work.Request.RateLimitPerMinute = work.RateLimitPerMinute } - return nil + return work, nil } -func (s *Store) createDeliveriesForEvent(ctx context.Context, tenantID, eventID string) (int, error) { - return s.createDeliveriesForEventWithOptions(ctx, tenantID, eventID, deliveryCreationOptions{}) -} - -type deliveryCreationOptions struct { - ReplayJobID string - ConfigMode string - RateLimitPerMinute int - AllowRecovered bool +func (s *Store) StartReplayJob(ctx context.Context, tenantID, replayJobID string) (bool, error) { + tag, err := s.pool.Exec(ctx, `UPDATE replay_jobs SET state='running' WHERE tenant_id=$1 AND id=$2 AND state='scheduled'`, tenantID, replayJobID) + if err != nil { + return false, err + } + return tag.RowsAffected() > 0, nil } -func (s *Store) createDeliveriesForEventWithOptions(ctx context.Context, tenantID, eventID string, opts deliveryCreationOptions) (int, error) { - event, err := s.GetEvent(ctx, tenantID, eventID) +func (s *Store) ListOriginalDeliveryReplaySources(ctx context.Context, tenantID, eventID string) ([]app.DeliveryReplaySource, error) { + rows, err := s.pool.Query(ctx, ` + SELECT id, event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,'') + FROM deliveries + WHERE tenant_id=$1 + AND event_id=$2 + AND COALESCE(replay_job_id,'') = '' + ORDER BY created_at ASC, id ASC`, + tenantID, eventID, + ) if err != nil { - return 0, err + return nil, err } - if !event.Verified && (!opts.AllowRecovered || event.VerifyReason != domain.VerificationReasonProviderAPIReconcile) { - return 0, nil + defer rows.Close() + var out []app.DeliveryReplaySource + for rows.Next() { + var item app.DeliveryReplaySource + if err := rows.Scan(&item.ID, &item.EventID, &item.EndpointID, &item.RouteID, &item.RouteVersionID, &item.SubscriptionID, &item.SubscriptionVersionID, &item.RetryPolicyID, &item.AdapterVersionID, &item.NormalizedEnvelopeID, &item.TransformationVersionID, &item.DeliveryPayloadID); err != nil { + return nil, err + } + out = append(out, item) } - tx, err := s.pool.Begin(ctx) - if err != nil { - return 0, err + return out, rows.Err() +} + +func (s *Store) GetDeliveryReplaySource(ctx context.Context, tenantID, deliveryID string) (app.DeliveryReplaySource, error) { + var item app.DeliveryReplaySource + err := s.pool.QueryRow(ctx, ` + SELECT id, event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,'') + FROM deliveries + WHERE tenant_id=$1 AND id=$2`, tenantID, deliveryID). + Scan(&item.ID, &item.EventID, &item.EndpointID, &item.RouteID, &item.RouteVersionID, &item.SubscriptionID, &item.SubscriptionVersionID, &item.RetryPolicyID, &item.AdapterVersionID, &item.NormalizedEnvelopeID, &item.TransformationVersionID, &item.DeliveryPayloadID) + if errors.Is(err, pgx.ErrNoRows) { + return app.DeliveryReplaySource{}, app.ErrNotFound } - defer rollback(ctx, tx) - created := 0 - type deliveryDecision struct { - subscriptionID string - subscriptionVersionID string - routeID string - routeVersionID string - endpointID string - retryPolicyID string - transformationVersionID string - } - createFromDecision := func(decision deliveryDecision) error { - deliveryID := mustID("del") - retrySeed := deliveryRetrySeed(tenantID, deliveryID, eventID, decision.endpointID) - if decision.subscriptionID != "" { - if _, err := tx.Exec(ctx, `INSERT INTO deliveries(id, tenant_id, event_id, endpoint_id, subscription_id, subscription_version_id, retry_policy_id, replay_job_id, transformation_version_id, retry_seed, state, next_attempt_at) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,'scheduled',$11)`, deliveryID, tenantID, eventID, decision.endpointID, decision.subscriptionID, decision.subscriptionVersionID, decision.retryPolicyID, opts.ReplayJobID, decision.transformationVersionID, retrySeed, scheduledDeliveryAt(created, opts.RateLimitPerMinute)); err != nil { - return err - } - } else { - if _, err := tx.Exec(ctx, `INSERT INTO deliveries(id, tenant_id, event_id, endpoint_id, route_id, route_version_id, retry_policy_id, replay_job_id, transformation_version_id, retry_seed, state, next_attempt_at) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,'scheduled',$11)`, deliveryID, tenantID, eventID, decision.endpointID, decision.routeID, decision.routeVersionID, decision.retryPolicyID, opts.ReplayJobID, decision.transformationVersionID, retrySeed, scheduledDeliveryAt(created, opts.RateLimitPerMinute)); err != nil { - return err - } - } - payloadID, normalizedID, adapterVersionID, err := s.createDeliveryPayload(ctx, tx, tenantID, eventID, deliveryID, decision.transformationVersionID) - if err != nil { - return err - } - payloadHash, err := s.deliveryPayloadSHA256(ctx, tx, tenantID, payloadID) - if err != nil { - return err + return item, err +} + +func (s *Store) GetCurrentDeliveryFanoutTarget(ctx context.Context, tenantID, routeID, subscriptionID string) (app.DeliveryFanoutTarget, bool, error) { + var target app.DeliveryFanoutTarget + if routeID != "" { + err := s.pool.QueryRow(ctx, ` + SELECT r.id, r.endpoint_id, r.active_version_id, + COALESCE(NULLIF(r.retry_policy_id,''), ''), + COALESCE(NULLIF(e.retry_policy_id,''), ''), + COALESCE(NULLIF(r.transformation_version_id,''),'') + FROM routes r + JOIN endpoints e ON e.tenant_id=r.tenant_id AND e.id=r.endpoint_id + WHERE r.tenant_id=$1 AND r.id=$2 AND r.state='active'`, + tenantID, routeID, + ).Scan(&target.RouteID, &target.EndpointID, &target.RouteVersionID, &target.RouteRetryPolicyID, &target.EndpointRetryPolicyID, &target.TransformationVersionID) + if errors.Is(err, pgx.ErrNoRows) { + return app.DeliveryFanoutTarget{}, false, nil } - if opts.ReplayJobID != "" { - if err := insertReplayDecisionEvidence(ctx, tx, replayEvidence{ - tenantID: tenantID, replayJobID: opts.ReplayJobID, eventID: eventID, newDeliveryID: deliveryID, - configMode: opts.ConfigMode, routeVersionID: decision.routeVersionID, subscriptionVersionID: decision.subscriptionVersionID, retryPolicyID: decision.retryPolicyID, - adapterVersionID: adapterVersionID, normalizedEnvelopeID: normalizedID, transformationVersionID: decision.transformationVersionID, deliveryPayloadID: payloadID, deliveryPayloadSHA256: payloadHash, - }); err != nil { - return err - } + return target, err == nil, err + } + if subscriptionID != "" { + err := s.pool.QueryRow(ctx, ` + SELECT s.id, s.endpoint_id, s.active_version_id, + COALESCE(NULLIF(e.retry_policy_id,''), ''), + COALESCE(NULLIF(s.transformation_version_id,''),'') + FROM subscriptions s + JOIN endpoints e ON e.tenant_id=s.tenant_id AND e.id=s.endpoint_id + WHERE s.tenant_id=$1 AND s.id=$2 AND s.state='active'`, + tenantID, subscriptionID, + ).Scan(&target.SubscriptionID, &target.EndpointID, &target.SubscriptionVersionID, &target.EndpointRetryPolicyID, &target.TransformationVersionID) + if errors.Is(err, pgx.ErrNoRows) { + return app.DeliveryFanoutTarget{}, false, nil } - created++ - return nil + return target, err == nil, err } + return target, true, nil +} - subRows, err := tx.Query(ctx, ` - SELECT s.id, s.endpoint_id, s.active_version_id, COALESCE(NULLIF(e.retry_policy_id,''),''), COALESCE(NULLIF(s.transformation_version_id,''),'') - FROM subscriptions s - JOIN endpoints e ON e.tenant_id=s.tenant_id AND e.id=s.endpoint_id - WHERE s.tenant_id=$1 AND s.state='active' AND $2 = ANY(s.event_types)`, tenantID, event.Type) - if err != nil { - return 0, err - } - var subscriptionDecisions []deliveryDecision - for subRows.Next() { - var decision deliveryDecision - if err := subRows.Scan(&decision.subscriptionID, &decision.endpointID, &decision.subscriptionVersionID, &decision.retryPolicyID, &decision.transformationVersionID); err != nil { - subRows.Close() - return 0, err - } - subscriptionDecisions = append(subscriptionDecisions, decision) +func (s *Store) InsertReplayNoopItem(ctx context.Context, tenantID, replayJobID, eventID, configMode, errorText string) error { + _, err := s.pool.Exec(ctx, ` + INSERT INTO replay_items(id, tenant_id, replay_job_id, event_id, state, config_mode, error, completed_at) + VALUES($1,$2,$3,$4,'completed',$5,$6,now())`, + mustID("rpi"), tenantID, replayJobID, eventID, configMode, errorText) + return err +} + +func (s *Store) CompleteReplayJob(ctx context.Context, tenantID, replayJobID string, processedItems int) error { + _, err := s.pool.Exec(ctx, `UPDATE replay_jobs SET state='completed', processed_items=$3, completed_at=now() WHERE tenant_id=$1 AND id=$2 AND state <> 'canceled'`, tenantID, replayJobID, processedItems) + return err +} + +func (s *Store) GetReconciliationConnection(ctx context.Context, tenantID, connectionID string) (domain.ProviderConnection, string, error) { + return s.getProviderConnectionSecret(ctx, tenantID, connectionID) +} + +func (s *Store) GetReconciliationWork(ctx context.Context, tenantID, jobID string) (app.ReconciliationWork, error) { + job, err := scanReconciliationJob(s.pool.QueryRow(ctx, reconciliationJobSelectSQL()+` WHERE tenant_id=$1 AND id=$2`, tenantID, jobID)) + if errors.Is(err, pgx.ErrNoRows) { + return app.ReconciliationWork{}, app.ErrNotFound } - subRows.Close() - if err := subRows.Err(); err != nil { - return 0, err + if err != nil { + return app.ReconciliationWork{}, err } - for _, decision := range subscriptionDecisions { - if err := createFromDecision(decision); err != nil { - return 0, err - } + conn, credential, err := s.getProviderConnectionSecret(ctx, tenantID, job.ConnectionID) + if err != nil { + return app.ReconciliationWork{}, err } + return app.ReconciliationWork{Job: normalizeReconciliationJob(job), Connection: conn, Credential: credential}, nil +} - routeRows, err := tx.Query(ctx, ` - SELECT r.id, r.endpoint_id, r.active_version_id, COALESCE(NULLIF(r.retry_policy_id,''), NULLIF(e.retry_policy_id,''), ''), COALESCE(NULLIF(r.transformation_version_id,''),'') - FROM routes r - JOIN endpoints e ON e.tenant_id=r.tenant_id AND e.id=r.endpoint_id - WHERE r.tenant_id=$1 AND r.source_id=$2 AND r.state='active' AND $3 = ANY(r.event_types) - ORDER BY r.priority ASC`, tenantID, event.SourceID, event.Type) +func (s *Store) StartReconciliationJob(ctx context.Context, tenantID, jobID string) (bool, error) { + tag, err := s.pool.Exec(ctx, `UPDATE reconciliation_jobs SET state='running', started_at=COALESCE(started_at, now()) WHERE tenant_id=$1 AND id=$2 AND state='scheduled'`, tenantID, jobID) if err != nil { - return 0, err - } - var routeDecisions []deliveryDecision - for routeRows.Next() { - var decision deliveryDecision - if err := routeRows.Scan(&decision.routeID, &decision.endpointID, &decision.routeVersionID, &decision.retryPolicyID, &decision.transformationVersionID); err != nil { - routeRows.Close() - return 0, err - } - routeDecisions = append(routeDecisions, decision) - } - routeRows.Close() - if err := routeRows.Err(); err != nil { - return 0, err - } - for _, decision := range routeDecisions { - if err := createFromDecision(decision); err != nil { - return 0, err - } + return false, err } - return created, tx.Commit(ctx) + return tag.RowsAffected() > 0, nil +} + +func (s *Store) RecordProviderAPIEvidence(ctx context.Context, record app.ProviderAPIEvidenceRecord) (string, error) { + return s.insertProviderAPIEvidence(ctx, record.TenantID, record.JobID, record.ItemID, record.ConnectionID, record.Provider, reconcile.Evidence{ + Method: record.Evidence.Method, URL: record.Evidence.URL, StatusCode: record.Evidence.StatusCode, + Body: record.Evidence.Body, Error: record.Evidence.Error, + }) +} + +func (s *Store) FindLocalProviderEvent(ctx context.Context, tenantID string, conn domain.ProviderConnection, providerObjectID string) (string, error) { + return s.findLocalProviderEvent(ctx, tenantID, conn, providerObjectID) +} + +func (s *Store) InsertReconciliationItem(ctx context.Context, input app.ReconciliationItemRecord) (string, error) { + return s.insertReconciliationItem(ctx, reconciliationItemInput{ + tenantID: input.TenantID, jobID: input.JobID, provider: input.Provider, objectID: input.ObjectID, objectType: input.ObjectType, + outcome: input.Outcome, localEventID: input.LocalEventID, recoveredEventID: input.RecoveredEventID, evidenceID: input.EvidenceID, + redeliveryRequested: input.RedeliveryRequested, errText: input.Error, metadata: input.Metadata, + }) +} + +func (s *Store) AttachProviderEvidenceToItem(ctx context.Context, tenantID, itemID, evidenceID string) error { + _, err := s.pool.Exec(ctx, `UPDATE provider_api_evidence SET item_id=$1 WHERE tenant_id=$2 AND id=$3`, itemID, tenantID, evidenceID) + return err +} + +func (s *Store) UpdateReconciliationCursor(ctx context.Context, tenantID, jobID, cursor string) error { + _, err := s.pool.Exec(ctx, `UPDATE reconciliation_jobs SET cursor=$1 WHERE tenant_id=$2 AND id=$3`, cursor, tenantID, jobID) + return err +} + +func (s *Store) CompleteReconciliationJob(ctx context.Context, tenantID, jobID string) error { + return s.completeReconciliationJob(ctx, tenantID, jobID) +} + +func (s *Store) FailReconciliationJob(ctx context.Context, tenantID, jobID, errorText string) error { + _, err := s.pool.Exec(ctx, ` + UPDATE reconciliation_jobs + SET state='failed', error=$3, completed_at=now() + WHERE tenant_id=$1 AND id=$2 AND state <> 'canceled'`, + tenantID, jobID, errorText, + ) + return err } func (s *Store) createDeliveryPayload(ctx context.Context, tx pgx.Tx, tenantID, eventID, deliveryID, transformationVersionID string) (payloadID, normalizedID, adapterVersionID string, err error) { @@ -6049,7 +6372,8 @@ func (s *Store) createDeliveryPayload(ctx context.Context, tx pgx.Tx, tenantID, return payloadID, normalizedID, adapterVersionID, nil } -func (s *Store) captureRecoveredProviderEvent(ctx context.Context, conn domain.ProviderConnection, object reconcile.ProviderObject, routeRecovered bool) (string, error) { +func (s *Store) CaptureRecoveredProviderEvent(ctx context.Context, input app.RecoveredProviderEventCapture) (string, error) { + conn := input.Connection sourceID := strings.TrimSpace(conn.Config["source_id"]) if sourceID == "" { return "", errors.New("provider connection config source_id is required for recovered capture") @@ -6067,14 +6391,14 @@ func (s *Store) captureRecoveredProviderEvent(ctx context.Context, conn domain.P eventID := mustID("evt") rawID := mustID("raw") receiptID := mustID("rcp") - rawHash := domain.HashSHA256(object.RawBody) - dedupeKey := "reconcile:" + conn.Provider + ":" + source.ID + ":" + object.ID + rawHash := domain.HashSHA256(input.RawBody) + dedupeKey := "reconcile:" + conn.Provider + ":" + source.ID + ":" + input.ObjectID raw := domain.RawPayload{ TenantID: conn.TenantID, SHA256: rawHash, ContentType: "application/json", - SizeBytes: int64(len(object.RawBody)), - Body: append([]byte(nil), object.RawBody...), + SizeBytes: int64(len(input.RawBody)), + Body: append([]byte(nil), input.RawBody...), CreatedAt: now, } storage, bodyForDB, err := s.prepareRawPayloadStorage(ctx, conn.TenantID, rawID, raw) @@ -6111,12 +6435,12 @@ func (s *Store) captureRecoveredProviderEvent(ctx context.Context, conn domain.P return "", err } } else { - eventType := firstNonEmpty(object.EventType, "unknown") + eventType := firstNonEmpty(input.EventType, "unknown") if _, err := tx.Exec(ctx, ` INSERT INTO events(id, tenant_id, source_id, provider, type, provider_event_id, raw_payload_id, raw_payload_hash, signature_verified, verification_reason, dedupe_key, dedupe_status, received_at, trace_id) VALUES($1,$2,$3,$4,$5,$6,$7,$8,false,$9,$10,$11,$12,$13)`, - eventID, conn.TenantID, source.ID, source.Provider, eventType, object.ID, rawID, rawHash, + eventID, conn.TenantID, source.ID, source.Provider, eventType, input.ObjectID, rawID, rawHash, domain.VerificationReasonProviderAPIReconcile, dedupeKey, domain.DedupeUnique, now, "", ); err != nil { return "", err @@ -6124,10 +6448,10 @@ func (s *Store) captureRecoveredProviderEvent(ctx context.Context, conn domain.P if _, err := tx.Exec(ctx, `UPDATE raw_payloads SET event_id=$1 WHERE id=$2`, eventID, rawID); err != nil { return "", err } - headers := headerPairsFromMap(object.RequestHeaders) + headers := headerPairsFromMap(input.RequestHeaders) normalized, err := provider.Normalize(provider.NormalizeInput{ Adapter: source.Adapter, Provider: source.Provider, TenantID: conn.TenantID, SourceID: source.ID, - RawBody: object.RawBody, Headers: domain.CanonicalHeaders(headers), Verified: false, + RawBody: input.RawBody, Headers: domain.CanonicalHeaders(headers), Verified: false, VerifyReason: domain.VerificationReasonProviderAPIReconcile, RawHash: rawHash, }) if err == nil { @@ -6164,7 +6488,7 @@ func (s *Store) captureRecoveredProviderEvent(ctx context.Context, conn domain.P return "", err } } - headersJSON, _ := json.Marshal(headerPairsFromMap(object.RequestHeaders)) + headersJSON, _ := json.Marshal(headerPairsFromMap(input.RequestHeaders)) if _, err := tx.Exec(ctx, ` INSERT INTO provider_receipts(id, tenant_id, source_id, event_id, raw_payload_id, raw_headers, remote_ip, verification_ok, verification_reason, received_at) VALUES($1,$2,$3,$4,$5,$6,'provider-api',false,$7,$8)`, @@ -6181,8 +6505,8 @@ func (s *Store) captureRecoveredProviderEvent(ctx context.Context, conn domain.P } return "", err } - if routeRecovered { - if _, err := s.createDeliveriesForEventWithOptions(ctx, conn.TenantID, eventID, deliveryCreationOptions{AllowRecovered: true}); err != nil { + if input.RouteRecovered { + if err := s.enqueueRouteEvent(ctx, conn.TenantID, eventID, true); err != nil { return "", err } } @@ -6267,317 +6591,29 @@ func (s *Store) legacyDeliveryEnvelope(ctx context.Context, tx pgx.Tx, tenantID, }) } -func (s *Store) createReplayDeliveries(ctx context.Context, tenantID, replayJobID string) error { - var scopeBytes []byte - var state, configMode string - var rateLimitPerMinute int - err := s.pool.QueryRow(ctx, `SELECT scope_json, state, config_mode, rate_limit_per_minute FROM replay_jobs WHERE tenant_id=$1 AND id=$2`, tenantID, replayJobID).Scan(&scopeBytes, &state, &configMode, &rateLimitPerMinute) - if errors.Is(err, pgx.ErrNoRows) { - return app.ErrNotFound - } - if err != nil { - return err - } - if state == "paused" || state == "pending_approval" { - return worker.ErrDeferred - } - if state != "scheduled" { - return nil - } - tag, err := s.pool.Exec(ctx, `UPDATE replay_jobs SET state='running' WHERE tenant_id=$1 AND id=$2 AND state='scheduled'`, tenantID, replayJobID) - if err != nil { - return err - } - if tag.RowsAffected() == 0 { - return worker.ErrDeferred - } - var req app.ReplayRequest - if err := json.Unmarshal(scopeBytes, &req); err != nil { - return err - } - created := 0 - if req.EventID != "" { - var count int - var err error - if configMode == app.ReplayConfigOriginal { - count, err = s.createDeliveriesFromOriginalEvent(ctx, tenantID, req.EventID, deliveryCreationOptions{ReplayJobID: replayJobID, ConfigMode: configMode, RateLimitPerMinute: rateLimitPerMinute}) - if err != nil { - return err - } - created += count - if count == 0 { - if _, err := s.pool.Exec(ctx, `INSERT INTO replay_items(id, tenant_id, replay_job_id, event_id, state, config_mode, error, completed_at) VALUES($1,$2,$3,$4,'completed',$5,'no original deliveries found',now())`, mustID("rpi"), tenantID, replayJobID, req.EventID, configMode); err != nil { - return err - } - } - } else { - count, err = s.createDeliveriesForEventWithOptions(ctx, tenantID, req.EventID, deliveryCreationOptions{ReplayJobID: replayJobID, ConfigMode: configMode, RateLimitPerMinute: rateLimitPerMinute}) - if err != nil { - return err - } - created += count - if count == 0 { - if _, err := s.pool.Exec(ctx, `INSERT INTO replay_items(id, tenant_id, replay_job_id, event_id, state, config_mode, error, completed_at) VALUES($1,$2,$3,$4,'completed',$5,'no current route or subscription matched',now())`, mustID("rpi"), tenantID, replayJobID, req.EventID, configMode); err != nil { - return err - } - } - } - } - if req.DeliveryID != "" { - count, evidence, err := s.createDeliveryFromExisting(ctx, tenantID, req.DeliveryID, deliveryCreationOptions{ReplayJobID: replayJobID, ConfigMode: configMode, RateLimitPerMinute: rateLimitPerMinute}) - if err != nil { - return err - } - created += count - if count > 0 { - tx, err := s.pool.Begin(ctx) - if err != nil { - return err - } - if err := insertReplayDecisionEvidence(ctx, tx, evidence); err != nil { - _ = tx.Rollback(ctx) - return err - } - if err := tx.Commit(ctx); err != nil { - return err - } - } - } - _, err = s.pool.Exec(ctx, `UPDATE replay_jobs SET state='completed', processed_items=$3, completed_at=now() WHERE tenant_id=$1 AND id=$2 AND state <> 'canceled'`, tenantID, replayJobID, created) - return err -} - -func (s *Store) createDeliveryFromExisting(ctx context.Context, tenantID, deliveryID string, opts deliveryCreationOptions) (int, replayEvidence, error) { - var eventID, endpointID, routeID, routeVersionID, subscriptionID, subscriptionVersionID, retryPolicyID, adapterVersionID, normalizedID, transformationVersionID, deliveryPayloadID string - err := s.pool.QueryRow(ctx, `SELECT event_id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,'') FROM deliveries WHERE tenant_id=$1 AND id=$2`, tenantID, deliveryID). - Scan(&eventID, &endpointID, &routeID, &routeVersionID, &subscriptionID, &subscriptionVersionID, &retryPolicyID, &adapterVersionID, &normalizedID, &transformationVersionID, &deliveryPayloadID) - if errors.Is(err, pgx.ErrNoRows) { - return 0, replayEvidence{}, app.ErrNotFound - } - if err != nil { - return 0, replayEvidence{}, err - } - if opts.ConfigMode != app.ReplayConfigOriginal && (routeID != "" || subscriptionID != "") { - current, ok, err := s.currentDeliveryReplayConfig(ctx, tenantID, routeID, subscriptionID) - if err != nil { - return 0, replayEvidence{}, err - } - if !ok { - return 0, replayEvidence{}, nil - } - endpointID = current.endpointID - routeVersionID = current.routeVersionID - subscriptionVersionID = current.subscriptionVersionID - retryPolicyID = current.retryPolicyID - transformationVersionID = current.transformationVersionID - adapterVersionID = "" - normalizedID = "" - deliveryPayloadID = "" - } - newDeliveryID := mustID("del") - tx, err := s.pool.Begin(ctx) - if err != nil { - return 0, replayEvidence{}, err - } - defer rollback(ctx, tx) - retrySeed := deliveryRetrySeed(tenantID, newDeliveryID, eventID, endpointID) - if _, err = tx.Exec(ctx, `INSERT INTO deliveries(id, tenant_id, event_id, endpoint_id, route_id, route_version_id, subscription_id, subscription_version_id, retry_policy_id, replay_job_id, adapter_version_id, normalized_envelope_id, transformation_version_id, retry_seed, state, next_attempt_at) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,'scheduled',$15)`, newDeliveryID, tenantID, eventID, endpointID, routeID, routeVersionID, subscriptionID, subscriptionVersionID, retryPolicyID, opts.ReplayJobID, adapterVersionID, normalizedID, transformationVersionID, retrySeed, scheduledDeliveryAt(0, opts.RateLimitPerMinute)); err != nil { - return 0, replayEvidence{}, err - } - newPayloadID := "" - if deliveryPayloadID != "" && opts.ConfigMode == app.ReplayConfigOriginal { - newPayloadID, normalizedID, adapterVersionID, transformationVersionID, err = s.cloneDeliveryPayload(ctx, tx, tenantID, deliveryPayloadID, newDeliveryID) - } else { - newPayloadID, normalizedID, adapterVersionID, err = s.createDeliveryPayload(ctx, tx, tenantID, eventID, newDeliveryID, transformationVersionID) - } - if err != nil { - return 0, replayEvidence{}, err - } - payloadHash, err := s.deliveryPayloadSHA256(ctx, tx, tenantID, newPayloadID) - if err != nil { - return 0, replayEvidence{}, err - } - if err := tx.Commit(ctx); err != nil { - return 0, replayEvidence{}, err - } - return 1, replayEvidence{ - tenantID: tenantID, replayJobID: opts.ReplayJobID, eventID: eventID, originalDeliveryID: deliveryID, newDeliveryID: newDeliveryID, - configMode: opts.ConfigMode, routeVersionID: routeVersionID, subscriptionVersionID: subscriptionVersionID, retryPolicyID: retryPolicyID, - adapterVersionID: adapterVersionID, normalizedEnvelopeID: normalizedID, transformationVersionID: transformationVersionID, deliveryPayloadID: newPayloadID, deliveryPayloadSHA256: payloadHash, - }, nil -} - -type currentReplayConfig struct { - endpointID string - routeVersionID string - subscriptionVersionID string - retryPolicyID string - transformationVersionID string -} - -func (s *Store) currentDeliveryReplayConfig(ctx context.Context, tenantID, routeID, subscriptionID string) (currentReplayConfig, bool, error) { - var out currentReplayConfig - if routeID != "" { - err := s.pool.QueryRow(ctx, ` - SELECT r.endpoint_id, r.active_version_id, - COALESCE(NULLIF(r.retry_policy_id,''), NULLIF(e.retry_policy_id,''), ''), - COALESCE(NULLIF(r.transformation_version_id,''),'') - FROM routes r - JOIN endpoints e ON e.tenant_id=r.tenant_id AND e.id=r.endpoint_id - WHERE r.tenant_id=$1 AND r.id=$2 AND r.state='active'`, - tenantID, routeID, - ).Scan(&out.endpointID, &out.routeVersionID, &out.retryPolicyID, &out.transformationVersionID) - if errors.Is(err, pgx.ErrNoRows) { - return currentReplayConfig{}, false, nil - } - if err != nil { - return currentReplayConfig{}, false, err - } - return out, true, nil - } - if subscriptionID != "" { - err := s.pool.QueryRow(ctx, ` - SELECT s.endpoint_id, s.active_version_id, - COALESCE(NULLIF(e.retry_policy_id,''), ''), - COALESCE(NULLIF(s.transformation_version_id,''),'') - FROM subscriptions s - JOIN endpoints e ON e.tenant_id=s.tenant_id AND e.id=s.endpoint_id - WHERE s.tenant_id=$1 AND s.id=$2 AND s.state='active'`, - tenantID, subscriptionID, - ).Scan(&out.endpointID, &out.subscriptionVersionID, &out.retryPolicyID, &out.transformationVersionID) - if errors.Is(err, pgx.ErrNoRows) { - return currentReplayConfig{}, false, nil - } - if err != nil { - return currentReplayConfig{}, false, err - } - return out, true, nil - } - return out, true, nil -} - -func (s *Store) createDeliveriesFromOriginalEvent(ctx context.Context, tenantID, eventID string, opts deliveryCreationOptions) (int, error) { - rows, err := s.pool.Query(ctx, ` - SELECT id, endpoint_id, COALESCE(route_id,''), COALESCE(route_version_id,''), COALESCE(subscription_id,''), COALESCE(subscription_version_id,''), COALESCE(retry_policy_id,''), COALESCE(adapter_version_id,''), COALESCE(normalized_envelope_id,''), COALESCE(transformation_version_id,''), COALESCE(delivery_payload_id,'') - FROM deliveries - WHERE tenant_id=$1 - AND event_id=$2 - AND COALESCE(replay_job_id,'') = '' - ORDER BY created_at ASC, id ASC`, - tenantID, eventID, - ) - if err != nil { - return 0, err - } - defer rows.Close() - type originalDelivery struct { - id string - endpointID string - routeID string - routeVersionID string - subscriptionID string - subscriptionVersionID string - retryPolicyID string - adapterVersionID string - normalizedEnvelopeID string - transformationVersionID string - deliveryPayloadID string - } - var originals []originalDelivery - for rows.Next() { - var item originalDelivery - if err := rows.Scan(&item.id, &item.endpointID, &item.routeID, &item.routeVersionID, &item.subscriptionID, &item.subscriptionVersionID, &item.retryPolicyID, &item.adapterVersionID, &item.normalizedEnvelopeID, &item.transformationVersionID, &item.deliveryPayloadID); err != nil { - return 0, err - } - originals = append(originals, item) - } - if err := rows.Err(); err != nil { - return 0, err - } - if len(originals) == 0 { - return 0, nil - } - tx, err := s.pool.Begin(ctx) - if err != nil { - return 0, err - } - defer rollback(ctx, tx) - for i, original := range originals { - newDeliveryID := mustID("del") - retrySeed := deliveryRetrySeed(tenantID, newDeliveryID, eventID, original.endpointID) - if _, err := tx.Exec(ctx, ` - INSERT INTO deliveries(id, tenant_id, event_id, endpoint_id, route_id, route_version_id, subscription_id, subscription_version_id, retry_policy_id, replay_job_id, adapter_version_id, normalized_envelope_id, transformation_version_id, retry_seed, state, next_attempt_at) - VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,'scheduled',$15)`, - newDeliveryID, tenantID, eventID, original.endpointID, original.routeID, original.routeVersionID, - original.subscriptionID, original.subscriptionVersionID, original.retryPolicyID, opts.ReplayJobID, - original.adapterVersionID, original.normalizedEnvelopeID, original.transformationVersionID, - retrySeed, - scheduledDeliveryAt(i, opts.RateLimitPerMinute), - ); err != nil { - return 0, err - } - newPayloadID, normalizedID, adapterVersionID, transformationVersionID, err := s.cloneDeliveryPayload(ctx, tx, tenantID, original.deliveryPayloadID, newDeliveryID) - if err != nil { - return 0, err - } - payloadHash, err := s.deliveryPayloadSHA256(ctx, tx, tenantID, newPayloadID) - if err != nil { - return 0, err - } - if err := insertReplayDecisionEvidence(ctx, tx, replayEvidence{ - tenantID: tenantID, replayJobID: opts.ReplayJobID, eventID: eventID, originalDeliveryID: original.id, newDeliveryID: newDeliveryID, - configMode: opts.ConfigMode, routeVersionID: original.routeVersionID, subscriptionVersionID: original.subscriptionVersionID, retryPolicyID: original.retryPolicyID, - adapterVersionID: adapterVersionID, normalizedEnvelopeID: normalizedID, transformationVersionID: transformationVersionID, deliveryPayloadID: newPayloadID, deliveryPayloadSHA256: payloadHash, - }); err != nil { - return 0, err - } - } - if err := tx.Commit(ctx); err != nil { - return 0, err - } - return len(originals), nil -} - -type replayEvidence struct { - tenantID string - replayJobID string - eventID string - originalDeliveryID string - newDeliveryID string - configMode string - routeVersionID string - subscriptionVersionID string - retryPolicyID string - adapterVersionID string - normalizedEnvelopeID string - transformationVersionID string - deliveryPayloadID string - deliveryPayloadSHA256 string -} - -func insertReplayDecisionEvidence(ctx context.Context, tx pgx.Tx, ev replayEvidence) error { - if ev.configMode == "" { - ev.configMode = app.ReplayConfigCurrent +func insertReplayDecisionEvidence(ctx context.Context, tx pgx.Tx, ev app.ReplayDecisionEvidence) error { + if ev.ConfigMode == "" { + ev.ConfigMode = app.ReplayConfigCurrent } if _, err := tx.Exec(ctx, ` INSERT INTO replay_items(id, tenant_id, replay_job_id, event_id, original_delivery_id, new_delivery_id, state, config_mode, route_version_id, subscription_version_id, retry_policy_id, adapter_version_id, normalized_envelope_id, transformation_version_id, delivery_payload_id, delivery_payload_sha256, completed_at) VALUES($1,$2,$3,$4,$5,$6,'completed',$7,$8,$9,$10,$11,$12,$13,$14,$15,now())`, - mustID("rpi"), ev.tenantID, ev.replayJobID, ev.eventID, ev.originalDeliveryID, ev.newDeliveryID, ev.configMode, - ev.routeVersionID, ev.subscriptionVersionID, ev.retryPolicyID, ev.adapterVersionID, ev.normalizedEnvelopeID, ev.transformationVersionID, ev.deliveryPayloadID, ev.deliveryPayloadSHA256, + mustID("rpi"), ev.TenantID, ev.ReplayJobID, ev.EventID, ev.OriginalDeliveryID, ev.NewDeliveryID, ev.ConfigMode, + ev.RouteVersionID, ev.SubscriptionVersionID, ev.RetryPolicyID, ev.AdapterVersionID, ev.NormalizedEnvelopeID, ev.TransformationVersionID, ev.DeliveryPayloadID, ev.DeliveryPayloadSHA256, ); err != nil { return err } - receiptDeliveryID := ev.originalDeliveryID + receiptDeliveryID := ev.OriginalDeliveryID if receiptDeliveryID == "" { - receiptDeliveryID = ev.newDeliveryID + receiptDeliveryID = ev.NewDeliveryID } _, err := tx.Exec(ctx, ` INSERT INTO replay_receipts(id, tenant_id, replay_job_id, event_id, delivery_id, config_mode, route_version_id, subscription_version_id, retry_policy_id, adapter_version_id, normalized_envelope_id, transformation_version_id, delivery_payload_id, delivery_payload_sha256) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14)`, - mustID("rrc"), ev.tenantID, ev.replayJobID, ev.eventID, receiptDeliveryID, ev.configMode, - ev.routeVersionID, ev.subscriptionVersionID, ev.retryPolicyID, ev.adapterVersionID, ev.normalizedEnvelopeID, ev.transformationVersionID, ev.deliveryPayloadID, ev.deliveryPayloadSHA256, + mustID("rrc"), ev.TenantID, ev.ReplayJobID, ev.EventID, receiptDeliveryID, ev.ConfigMode, + ev.RouteVersionID, ev.SubscriptionVersionID, ev.RetryPolicyID, ev.AdapterVersionID, ev.NormalizedEnvelopeID, ev.TransformationVersionID, ev.DeliveryPayloadID, ev.DeliveryPayloadSHA256, ) return err } @@ -7330,6 +7366,22 @@ func (s *Store) coveringAuditChainAnchors(ctx context.Context, tenantID string, } func (s *Store) timelineJSONLForExport(ctx context.Context, tenantID string, from, to time.Time) ([]byte, error) { + eventIDs, err := s.eventIDsForExport(ctx, tenantID, from, to) + if err != nil { + return nil, err + } + var lines []any + for _, eventID := range eventIDs { + items, err := s.ListEventTimeline(ctx, tenantID, eventID, 100) + if err != nil { + return nil, err + } + lines = append(lines, map[string]any{"event_id": eventID, "timeline": items}) + } + return evidence.JSONLines(lines) +} + +func (s *Store) eventIDsForExport(ctx context.Context, tenantID string, from, to time.Time) ([]string, error) { query := `SELECT id FROM events WHERE tenant_id=$1` args := []any{tenantID} if !from.IsZero() { @@ -7346,22 +7398,18 @@ func (s *Store) timelineJSONLForExport(ctx context.Context, tenantID string, fro return nil, err } defer rows.Close() - var lines []any + var eventIDs []string for rows.Next() { var eventID string if err := rows.Scan(&eventID); err != nil { return nil, err } - items, err := s.ListEventTimeline(ctx, tenantID, eventID, 100) - if err != nil { - return nil, err - } - lines = append(lines, map[string]any{"event_id": eventID, "timeline": items}) + eventIDs = append(eventIDs, eventID) } if err := rows.Err(); err != nil { return nil, err } - return evidence.JSONLines(lines) + return eventIDs, nil } func (s *Store) rawPayloadsJSONLForExport(ctx context.Context, tenantID string, from, to time.Time) ([]byte, error) { @@ -7415,7 +7463,7 @@ func (s *Store) rawPayloadsJSONLForExport(ctx context.Context, tenantID string, if errors.Is(err, blobstore.ErrNotFound) { bodyAvailable = false } else { - return nil, err + return nil, errObjectStoreReadFailed } } else { raw.Body = body @@ -7730,6 +7778,14 @@ func zeroTimeOmit(value time.Time) any { return value } +func manifestTime(value time.Time) *time.Time { + if value.IsZero() { + return nil + } + out := value.UTC() + return &out +} + func (s *Store) getAuditExportWithBundle(ctx context.Context, tenantID, exportID string) (domain.EvidenceExport, []byte, error) { var out domain.EvidenceExport var body []byte @@ -8318,35 +8374,6 @@ func (s *Store) findLocalProviderEvent(ctx context.Context, tenantID string, con return id, err } -func providerLookupID(object reconcile.ProviderObject) string { - if value, ok := object.Metadata["delivery_id"]; ok && fmt.Sprint(value) != "" { - return fmt.Sprint(value) - } - return object.ID -} - -func providerErrorForDB(err error) string { - if err == nil { - return "" - } - var providerErr reconcile.ProviderError - if errors.As(err, &providerErr) { - if providerErr.Class != "" { - return providerErr.Class - } - } - if errors.Is(err, reconcile.ErrUnsupported) { - return reconcile.ErrorUnsupported - } - msg := err.Error() - for _, marker := range []string{"sk_", "ghp_", "github_pat_", "xoxb-", "shpat_"} { - if strings.Contains(msg, marker) { - return "provider request failed" - } - } - return msg -} - func headerPairsFromMap(values map[string]string) []domain.HeaderPair { headers := []domain.HeaderPair{{Name: "Webhookery-Recovered-By", Value: "provider-api-reconciliation"}} for name, value := range values { @@ -8399,16 +8426,6 @@ func (s *Store) completeReconciliationJob(ctx context.Context, tenantID, jobID s return err } -func (s *Store) failReconciliationJob(ctx context.Context, tenantID, jobID string, cause error) error { - _, err := s.pool.Exec(ctx, ` - UPDATE reconciliation_jobs - SET state='failed', error=$3, completed_at=now() - WHERE tenant_id=$1 AND id=$2 AND state <> 'canceled'`, - tenantID, jobID, providerErrorForDB(cause), - ) - return err -} - func scanEvent(row rowScanner) (domain.Event, error) { var item domain.Event err := row.Scan(&item.ID, &item.TenantID, &item.SourceID, &item.Provider, &item.Type, &item.ProviderID, &item.RawPayloadID, &item.RawPayloadHash, &item.Verified, &item.VerifyReason, &item.DedupeKey, &item.DedupeStatus, &item.ReceivedAt, &item.TraceID) @@ -8763,25 +8780,10 @@ func (s *Store) nextConfigVersion(ctx context.Context, tx pgx.Tx, tenantID, reso return version, err } -func scheduledDeliveryAt(index, rateLimitPerMinute int) time.Time { - return time.Now().UTC().Add(replayScheduleDelay(index, rateLimitPerMinute)) -} - func deliveryRetrySeed(tenantID, deliveryID, eventID, endpointID string) string { return retry.Seed(tenantID, deliveryID, eventID, endpointID) } -func replayScheduleDelay(index, rateLimitPerMinute int) time.Duration { - if index <= 0 || rateLimitPerMinute <= 0 { - return 0 - } - interval := time.Minute / time.Duration(rateLimitPerMinute) - if interval <= 0 { - return 0 - } - return time.Duration(index) * interval -} - func firstNonEmpty(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { @@ -8935,6 +8937,35 @@ func upsertWorkerLease(ctx context.Context, tx pgx.Tx, workerID string) error { return err } +func tryAcquireWorkerLease(ctx context.Context, tx pgx.Tx, leaseID, workerID string, ttl time.Duration) (bool, error) { + leaseID = strings.TrimSpace(leaseID) + if leaseID == "" { + leaseID = "worker" + } + workerID = strings.TrimSpace(workerID) + if workerID == "" { + workerID = "worker" + } + seconds := int64(ttl.Seconds()) + if seconds <= 0 { + seconds = 60 + } + var acquired bool + err := tx.QueryRow(ctx, ` + WITH acquired AS ( + INSERT INTO worker_leases(id, worker_id, expires_at) + VALUES($1,$2,now() + ($3 * interval '1 second')) + ON CONFLICT (id) DO UPDATE + SET worker_id=EXCLUDED.worker_id, expires_at=EXCLUDED.expires_at, updated_at=now() + WHERE worker_leases.expires_at <= now() OR worker_leases.worker_id=EXCLUDED.worker_id + RETURNING 1 + ) + SELECT EXISTS(SELECT 1 FROM acquired)`, + leaseID, workerID, seconds, + ).Scan(&acquired) + return acquired, err +} + func mustID(prefix string) string { id, err := random.Token(prefix, 18) if err != nil { @@ -8945,9 +8976,11 @@ func mustID(prefix string) string { var _ app.IngestStore = (*Store)(nil) var _ app.ControlStore = (*Store)(nil) +var _ app.DeliveryFanoutStore = (*Store)(nil) +var _ app.ReconciliationWorkStore = (*Store)(nil) var _ app.APIKeyLookup = (*Store)(nil) var _ worker.OutboxStore = (*Store)(nil) -var _ worker.OutboxProcessor = (*Store)(nil) var _ worker.DeliveryStore = (*Store)(nil) var _ worker.RetentionStore = (*Store)(nil) +var _ worker.AuditChainBackfillStore = (*Store)(nil) var _ = time.Now diff --git a/internal/adapters/postgres/store_ingest.go b/internal/adapters/postgres/store_ingest.go new file mode 100644 index 0000000..8469ba7 --- /dev/null +++ b/internal/adapters/postgres/store_ingest.go @@ -0,0 +1,150 @@ +package postgres + +import ( + "context" + "encoding/json" + "errors" + + "webhookery/internal/app" + "webhookery/internal/domain" + + "github.com/jackc/pgx/v5" +) + +func (s *Store) CaptureInbound(ctx context.Context, input app.CaptureInboundInput) (app.CaptureInboundResult, error) { + eventID := mustID("evt") + rawID := mustID("raw") + receiptID := mustID("rcp") + outboxID := mustID("out") + storage, bodyForDB, err := s.prepareRawPayloadStorage(ctx, input.Source.TenantID, rawID, input.RawPayload) + if err != nil { + return app.CaptureInboundResult{}, err + } + objectWritten := storage.backend == domain.RawStorageS3 + tx, err := s.pool.Begin(ctx) + if err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, storage.bucket, storage.key) + } + return app.CaptureInboundResult{}, err + } + defer rollback(ctx, tx) + + if _, err := tx.Exec(ctx, "INSERT INTO tenants(id, name) VALUES($1, $1) ON CONFLICT (id) DO NOTHING", input.Source.TenantID); err != nil { + return app.CaptureInboundResult{}, err + } + if _, err := tx.Exec(ctx, ` + INSERT INTO raw_payloads(id, tenant_id, sha256, content_type, size_bytes, body, storage_backend, object_bucket, object_key, storage_status, created_at) + VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)`, + rawID, input.Source.TenantID, input.RawPayload.SHA256, input.RawPayload.ContentType, input.RawPayload.SizeBytes, bodyForDB, + storage.backend, storage.bucket, storage.key, domain.StorageStatusStored, input.RawPayload.CreatedAt, + ); err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, storage.bucket, storage.key) + } + return app.CaptureInboundResult{}, err + } + + if input.Event.Type == "" { + input.Event.Type = "unknown" + } + dedupeStatus := domain.DedupeUnique + var insertedEventID string + err = tx.QueryRow(ctx, ` + INSERT INTO events(id, tenant_id, source_id, provider, type, provider_event_id, raw_payload_id, raw_payload_hash, + signature_verified, verification_reason, dedupe_key, dedupe_status, received_at, trace_id) + VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14) + ON CONFLICT (tenant_id, dedupe_key) DO NOTHING + RETURNING id`, + eventID, input.Source.TenantID, input.Source.ID, input.Source.Provider, input.Event.Type, input.Event.ProviderID, + rawID, input.RawPayload.SHA256, input.VerificationOK, input.VerifyReason, input.Event.DedupeKey, dedupeStatus, + input.Event.ReceivedAt, input.Event.TraceID, + ).Scan(&insertedEventID) + if err != nil && !errors.Is(err, pgx.ErrNoRows) { + return app.CaptureInboundResult{}, err + } + if insertedEventID == "" { + dedupeStatus = domain.DedupeDuplicateSuppressed + err = tx.QueryRow(ctx, ` + SELECT id + FROM events + WHERE tenant_id=$1 AND dedupe_key=$2`, + input.Source.TenantID, input.Event.DedupeKey, + ).Scan(&eventID) + if err != nil { + return app.CaptureInboundResult{}, err + } + } else { + eventID = insertedEventID + if len(input.Normalized.Envelope) > 0 { + adapterVersionID, err := s.lookupAdapterVersionID(ctx, tx, firstNonEmpty(input.Source.Adapter, input.Source.Provider)) + if err != nil { + return app.CaptureInboundResult{}, err + } + normalizedID := mustID("nenv") + if _, err := tx.Exec(ctx, ` + INSERT INTO normalized_envelopes(id, tenant_id, event_id, adapter_version_id, provider, provider_event_id, type, source, subject, + envelope_json, data_json, metadata_json, envelope_sha256, data_sha256, metadata_sha256, storage_status, created_at) + VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10::jsonb,$11::jsonb,$12::jsonb,$13,$14,$15,$16,$17)`, + normalizedID, input.Source.TenantID, eventID, adapterVersionID, input.Normalized.Provider, input.Normalized.ProviderEventID, + input.Normalized.Type, input.Normalized.Source, input.Normalized.Subject, string(input.Normalized.Envelope), + string(input.Normalized.Data), string(input.Normalized.Metadata), input.Normalized.EnvelopeSHA256, input.Normalized.DataSHA256, + input.Normalized.MetadataSHA256, domain.StorageStatusStored, input.Normalized.CreatedAt, + ); err != nil { + return app.CaptureInboundResult{}, err + } + } + payload, _ := json.Marshal(map[string]any{"event_id": eventID}) + if _, err := tx.Exec(ctx, `INSERT INTO outbox(id, tenant_id, kind, resource_id, payload) VALUES($1,$2,$3,$4,$5)`, outboxID, input.Source.TenantID, app.OutboxKindRouteEvent, eventID, payload); err != nil { + return app.CaptureInboundResult{}, err + } + } + + if _, err := tx.Exec(ctx, `UPDATE raw_payloads SET event_id=$1 WHERE id=$2`, eventID, rawID); err != nil { + return app.CaptureInboundResult{}, err + } + + headersJSON, err := json.Marshal(input.Receipt.RawHeaders) + if err != nil { + return app.CaptureInboundResult{}, err + } + if _, err := tx.Exec(ctx, ` + INSERT INTO provider_receipts(id, tenant_id, source_id, event_id, raw_payload_id, raw_headers, remote_ip, verification_ok, verification_reason, received_at) + VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)`, + receiptID, input.Source.TenantID, input.Source.ID, eventID, rawID, headersJSON, input.Receipt.RemoteIP, + input.VerificationOK, input.VerifyReason, input.Receipt.ReceivedAt, + ); err != nil { + return app.CaptureInboundResult{}, err + } + if input.Event.DedupeKey != "" { + if _, err := tx.Exec(ctx, ` + INSERT INTO idempotency_records(tenant_id, dedupe_key, resource_type, resource_id, status_code) + VALUES($1,$2,'event',$3,202) + ON CONFLICT (tenant_id, dedupe_key) DO NOTHING`, + input.Source.TenantID, input.Event.DedupeKey, eventID, + ); err != nil { + return app.CaptureInboundResult{}, err + } + if _, err := tx.Exec(ctx, ` + INSERT INTO dedupe_records(tenant_id, source_id, dedupe_key, first_event_id, last_receipt_id, status) + VALUES($1,$2,$3,$4,$5,$6) + ON CONFLICT (tenant_id, dedupe_key) DO UPDATE + SET last_receipt_id=EXCLUDED.last_receipt_id, status=EXCLUDED.status, last_seen_at=now()`, + input.Source.TenantID, input.Source.ID, input.Event.DedupeKey, eventID, receiptID, dedupeStatus, + ); err != nil { + return app.CaptureInboundResult{}, err + } + } + if !input.VerificationOK { + if _, err := tx.Exec(ctx, `INSERT INTO quarantine_entries(id, tenant_id, event_id, reason) VALUES($1,$2,$3,$4)`, mustID("qua"), input.Source.TenantID, eventID, input.VerifyReason); err != nil { + return app.CaptureInboundResult{}, err + } + } + if err := tx.Commit(ctx); err != nil { + if objectWritten { + _ = s.objectStore.Delete(ctx, storage.bucket, storage.key) + } + return app.CaptureInboundResult{}, err + } + return app.CaptureInboundResult{EventID: eventID, ReceiptID: receiptID, RawPayloadID: rawID, DedupeStatus: dedupeStatus}, nil +} diff --git a/internal/adapters/postgres/store_integration_test.go b/internal/adapters/postgres/store_integration_test.go index c2c41af..4a749a1 100644 --- a/internal/adapters/postgres/store_integration_test.go +++ b/internal/adapters/postgres/store_integration_test.go @@ -7,11 +7,13 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "io" "net/netip" "os" "path/filepath" "strings" + "sync" "testing" "time" @@ -19,6 +21,8 @@ import ( "webhookery/internal/app" "webhookery/internal/authz" "webhookery/internal/domain" + "webhookery/internal/evidence" + "webhookery/internal/reconcile" "webhookery/internal/ssrf" "webhookery/internal/worker" "webhookery/pkg/verifier" @@ -27,7 +31,7 @@ import ( func TestPostgresMigrationAndAPIKeyAuthentication(t *testing.T) { databaseURL := os.Getenv("WEBHOOKERY_TEST_DATABASE_URL") if databaseURL == "" { - t.Skip("WEBHOOKERY_TEST_DATABASE_URL is required") + t.Skip("WEBHOOKERY_TEST_DATABASE_URL is required to prove live Postgres migrations and API-key authentication") } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() @@ -82,13 +86,14 @@ func TestPostgresWorkerLeaseRecoveryAndLivePriority(t *testing.T) { defer store.Close() now := time.Date(2026, 5, 26, 16, 0, 0, 0, time.UTC) - if _, err := store.pool.Exec(ctx, `UPDATE outbox SET state='completed', locked_by=NULL, lock_expires_at=NULL WHERE tenant_id LIKE 'ten_it_%' AND state <> 'completed'`); err != nil { + if _, err := store.pool.Exec(ctx, `UPDATE outbox SET state='completed', locked_by=NULL, lock_expires_at=NULL WHERE (tenant_id LIKE 'ten_it_%' OR tenant_id LIKE 'ten_rc_%') AND state <> 'completed'`); err != nil { t.Fatalf("clear prior integration outbox work: %v", err) } - if _, err := store.pool.Exec(ctx, `UPDATE deliveries SET state='succeeded', locked_by=NULL, lock_expires_at=NULL WHERE tenant_id LIKE 'ten_it_%' AND state IN ('scheduled','in_progress')`); err != nil { + if _, err := store.pool.Exec(ctx, `UPDATE deliveries SET state='succeeded', locked_by=NULL, lock_expires_at=NULL WHERE (tenant_id LIKE 'ten_it_%' OR tenant_id LIKE 'ten_rc_%') AND state IN ('scheduled','in_progress')`); err != nil { t.Fatalf("clear prior integration delivery work: %v", err) } control := app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{"receiver.example.com": {netip.MustParseAddr("93.184.216.34")}}}) + fanout := app.NewDeliveryFanoutService(store, app.SystemClock{}) source, _ := createPostgresIntegrationRoute(t, ctx, control, actor, "invoice.created") first := ingestPostgresIntegrationEvent(t, ctx, store, actor, source.ID, "invoice.created", "evt_it_recovery_"+time.Now().UTC().Format("150405.000000000"), now) @@ -109,7 +114,7 @@ func TestPostgresWorkerLeaseRecoveryAndLivePriority(t *testing.T) { if len(recoveredOutbox) != 1 || recoveredOutbox[0].ID != stuckOutbox[0].ID { t.Fatalf("expected expired outbox to be reclaimed, got %+v", recoveredOutbox) } - if err := store.ProcessOutbox(ctx, recoveredOutbox[0]); err != nil { + if err := fanout.ProcessOutbox(ctx, recoveredOutbox[0]); err != nil { t.Fatalf("process recovered outbox: %v", err) } if err := store.CompleteOutbox(ctx, recoveredOutbox[0].ID); err != nil { @@ -149,7 +154,7 @@ func TestPostgresWorkerLeaseRecoveryAndLivePriority(t *testing.T) { t.Fatalf("expected live and replay outbox work, got %+v", outboxItems) } for _, item := range outboxItems { - if err := store.ProcessOutbox(ctx, item); err != nil { + if err := fanout.ProcessOutbox(ctx, item); err != nil { t.Fatalf("process priority outbox item %+v: %v", item, err) } if err := store.CompleteOutbox(ctx, item.ID); err != nil { @@ -224,8 +229,8 @@ func TestPostgresDuplicateRawPayloadEvidenceRemainsLinkedAndExported(t *testing. } rawTimelineByID := map[string]string{} for _, item := range timeline { - if item["kind"] == "raw_payload" { - rawTimelineByID[item["ref_id"].(string)] = item["detail"].(string) + if item.Kind == "raw_payload" { + rawTimelineByID[item.RefID] = item.Detail } } if len(rawTimelineByID) != 2 { @@ -316,11 +321,1546 @@ func TestPostgresDuplicateRawPayloadEvidenceRemainsLinkedAndExported(t *testing. } } +func TestPostgresConcurrentDuplicateCapturePreservesEvidence(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + now := time.Now().UTC().Truncate(time.Second) + control := app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{"receiver.example.com": {netip.MustParseAddr("93.184.216.34")}}}) + source, _ := createPostgresIntegrationRoute(t, ctx, control, actor, "invoice.concurrent_duplicate") + providerID := "evt_it_concurrent_duplicate_" + now.Format("150405.000000000") + body := []byte(`{"id":"` + providerID + `","type":"invoice.concurrent_duplicate","account":"acct_it"}`) + signature := verifier.TimestampedHeader("v1", now, []byte("whsec_it"), body) + + const attempts = 8 + results := make([]app.IngestResult, attempts) + errs := make([]error, attempts) + var wg sync.WaitGroup + wg.Add(attempts) + for i := 0; i < attempts; i++ { + i := i + go func() { + defer wg.Done() + results[i], errs[i] = app.NewIngestService(store, fixedIntegrationClock{now: now}).Ingest(ctx, app.IngestRequest{ + TenantID: actor.TenantID, + SourceID: source.ID, + Provider: "stripe", + RawBody: body, + Headers: []domain.HeaderPair{{Name: "Stripe-Signature", Value: signature}}, + ContentType: "application/json", + RemoteIP: "198.51.100.20", + }) + }() + } + wg.Wait() + + eventID := "" + uniqueCount := 0 + duplicateCount := 0 + for i, err := range errs { + if err != nil { + t.Fatalf("concurrent duplicate capture %d failed: %v", i, err) + } + if !results[i].Accepted || results[i].EventID == "" { + t.Fatalf("concurrent duplicate capture %d was not accepted: %+v", i, results[i]) + } + if eventID == "" { + eventID = results[i].EventID + } + if results[i].EventID != eventID { + t.Fatalf("duplicate capture %d linked to %s, want canonical event %s", i, results[i].EventID, eventID) + } + switch results[i].DedupeStatus { + case domain.DedupeUnique: + uniqueCount++ + case domain.DedupeDuplicateSuppressed: + duplicateCount++ + default: + t.Fatalf("unexpected dedupe status for capture %d: %s", i, results[i].DedupeStatus) + } + } + if uniqueCount != 1 || duplicateCount != attempts-1 { + t.Fatalf("expected one unique and %d duplicates, got unique=%d duplicate=%d", attempts-1, uniqueCount, duplicateCount) + } + + var eventRows, rawRows, receiptRows, distinctReceiptRawRows, outboxRows int + if err := store.pool.QueryRow(ctx, `SELECT count(*) FROM events WHERE tenant_id=$1 AND source_id=$2 AND provider_event_id=$3`, actor.TenantID, source.ID, providerID).Scan(&eventRows); err != nil { + t.Fatal(err) + } + if err := store.pool.QueryRow(ctx, `SELECT count(*) FROM raw_payloads WHERE tenant_id=$1 AND event_id=$2`, actor.TenantID, eventID).Scan(&rawRows); err != nil { + t.Fatal(err) + } + if err := store.pool.QueryRow(ctx, `SELECT count(*) FROM provider_receipts WHERE tenant_id=$1 AND event_id=$2`, actor.TenantID, eventID).Scan(&receiptRows); err != nil { + t.Fatal(err) + } + if err := store.pool.QueryRow(ctx, `SELECT count(DISTINCT raw_payload_id) FROM provider_receipts WHERE tenant_id=$1 AND event_id=$2`, actor.TenantID, eventID).Scan(&distinctReceiptRawRows); err != nil { + t.Fatal(err) + } + if err := store.pool.QueryRow(ctx, `SELECT count(*) FROM outbox WHERE tenant_id=$1 AND kind=$2 AND resource_id=$3`, actor.TenantID, app.OutboxKindRouteEvent, eventID).Scan(&outboxRows); err != nil { + t.Fatal(err) + } + if eventRows != 1 || rawRows != attempts || receiptRows != attempts || distinctReceiptRawRows != attempts || outboxRows != 1 { + t.Fatalf("unexpected concurrent duplicate evidence counts: events=%d raw=%d receipts=%d distinct_receipt_raw=%d outbox=%d", eventRows, rawRows, receiptRows, distinctReceiptRawRows, outboxRows) + } +} + +func TestPostgresEvidenceExportIncludesBodyArtifactsAndProofs(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + windowStart := time.Now().UTC().Add(-time.Minute) + now := time.Now().UTC().Truncate(time.Second) + control := app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{"receiver.example.com": {netip.MustParseAddr("93.184.216.34")}}}) + source, _ := createPostgresIntegrationRoute(t, ctx, control, actor, "invoice.exported") + providerID := "evt_it_export_" + now.Format("150405.000000000") + first := ingestPostgresIntegrationEvent(t, ctx, store, actor, source.ID, "invoice.exported", providerID, now) + duplicate := ingestPostgresIntegrationEvent(t, ctx, store, actor, source.ID, "invoice.exported", providerID, now.Add(time.Second)) + if duplicate.EventID != first.EventID || duplicate.DedupeStatus != domain.DedupeDuplicateSuppressed { + t.Fatalf("expected duplicate evidence linked to %s, got %+v", first.EventID, duplicate) + } + fanout := app.NewDeliveryFanoutService(store, app.SystemClock{}) + if created, err := fanout.CreateDeliveriesForEvent(ctx, actor.TenantID, first.EventID, app.DeliveryFanoutOptions{}); err != nil { + t.Fatal(err) + } else if created == 0 { + t.Fatal("expected route fanout to create a delivery payload") + } + + connection, err := store.CreateProviderConnection(ctx, actor.TenantID, actor.ID, app.CreateProviderConnectionRequest{ + Name: "export evidence connection", + Provider: "stripe", + CredentialType: "api_key", + Credential: "sk_test_placeholder", + Config: map[string]string{"source_id": source.ID}, + }) + if err != nil { + t.Fatal(err) + } + reconciliationJob, err := store.CreateReconciliationJob(ctx, actor.TenantID, actor.ID, app.ReconciliationJobRequest{ + ConnectionID: connection.ID, + DryRun: true, + CaptureMissing: true, + WindowStart: windowStart, + WindowEnd: now.Add(time.Minute), + Reason: "export evidence regression", + }) + if err != nil { + t.Fatal(err) + } + providerEvidenceID, err := store.insertProviderAPIEvidence(ctx, actor.TenantID, reconciliationJob.ID, "", connection.ID, connection.Provider, reconcile.Evidence{ + Method: "GET", + URL: "https://api.stripe.com/v1/events/" + providerID, + StatusCode: 200, + Body: []byte(`{"id":"` + providerID + `","object":"event"}`), + }) + if err != nil { + t.Fatal(err) + } + if _, err := store.insertReconciliationItem(ctx, reconciliationItemInput{ + tenantID: actor.TenantID, jobID: reconciliationJob.ID, provider: connection.Provider, objectID: providerID, objectType: "event", + outcome: domain.ReconciliationOutcomeMatched, localEventID: first.EventID, evidenceID: providerEvidenceID, metadata: []byte(`{"test":"export"}`), + }); err != nil { + t.Fatal(err) + } + + limitedActor := authz.Actor{ID: "usr_export_limited", TenantID: actor.TenantID, Role: authz.RoleAdmin, Scopes: []string{"audit:read"}} + if _, err := control.CreateAuditExport(ctx, limitedActor, app.CreateAuditExportRequest{IncludeRawPayloads: true, Reason: "permission regression"}); !errors.Is(err, app.ErrForbidden) { + t.Fatalf("expected raw-inclusive export to require events:raw, got %v", err) + } + if _, err := control.CreateAuditExport(ctx, limitedActor, app.CreateAuditExportRequest{IncludePayloadBodies: true, Reason: "permission regression"}); !errors.Is(err, app.ErrForbidden) { + t.Fatalf("expected payload-inclusive export to require events:raw, got %v", err) + } + + export, err := control.CreateAuditExport(ctx, actor, app.CreateAuditExportRequest{ + From: windowStart, + To: now.Add(time.Minute), + IncludeRawPayloads: true, + IncludeTimelines: true, + IncludePayloadBodies: true, + Reason: "body-inclusive export regression", + }) + if err != nil { + t.Fatal(err) + } + download, err := control.DownloadAuditExport(ctx, actor, export.ID) + if err != nil { + t.Fatal(err) + } + verification, err := evidence.VerifyTarGzipBundle(download.Body) + if err != nil { + t.Fatal(err) + } + if !verification.Valid || verification.CheckedChainEntries == 0 { + t.Fatalf("expected valid bundle with audit chain proof, got %+v", verification) + } + files := readTestTarGzipFiles(t, download.Body) + rawEntries := decodeTestJSONLines(t, files["raw_payloads.jsonl"]) + rawBodies, receiptRows := 0, 0 + for _, entry := range rawEntries { + if entry["event_id"] != first.EventID { + continue + } + if body, ok := entry["body_base64"].(string); ok && body != "" { + rawBodies++ + } + if receiptIDs, ok := entry["receipt_ids"].([]any); ok && len(receiptIDs) == 1 { + receiptRows++ + } + } + if rawBodies != 2 || receiptRows != 2 { + t.Fatalf("expected two duplicate raw bodies and receipt links, bodies=%d receipts=%d entries=%+v", rawBodies, receiptRows, rawEntries) + } + + payloadEntries := decodeTestJSONLines(t, files["payload_evidence.jsonl"]) + var normalizedWithBody, deliveryWithBody bool + for _, entry := range payloadEntries { + switch entry["resource_type"] { + case "normalized_envelope": + _, hasEnvelope := entry["envelope"] + _, hasData := entry["data"] + normalizedWithBody = entry["event_id"] == first.EventID && entry["body_included"] == true && hasEnvelope && hasData + case "delivery_payload": + body, _ := entry["body_base64"].(string) + deliveryWithBody = entry["event_id"] == first.EventID && entry["body_included"] == true && body != "" + } + } + if !normalizedWithBody || !deliveryWithBody { + t.Fatalf("expected normalized and delivery payload bodies in export, normalized=%v delivery=%v entries=%+v", normalizedWithBody, deliveryWithBody, payloadEntries) + } + + reconciliationEntries := decodeTestJSONLines(t, files["reconciliation_evidence.jsonl"]) + providerBodyIncluded := false + for _, entry := range reconciliationEntries { + if entry["id"] != reconciliationJob.ID { + continue + } + for _, rawEvidence := range entry["provider_api_evidence"].([]any) { + apiEvidence := rawEvidence.(map[string]any) + body, _ := apiEvidence["response_body_base64"].(string) + if apiEvidence["id"] == providerEvidenceID && apiEvidence["body_included"] == true && body != "" { + providerBodyIncluded = true + } + } + } + if !providerBodyIncluded { + t.Fatalf("expected provider API evidence body in export, entries=%+v", reconciliationEntries) + } + + if _, err := store.pool.Exec(ctx, `UPDATE raw_payloads SET body='', storage_status='deleted', storage_deleted_at=now() WHERE tenant_id=$1 AND event_id=$2`, actor.TenantID, first.EventID); err != nil { + t.Fatal(err) + } + if _, err := control.GetRawPayload(ctx, actor, first.EventID, "verify retention tombstone"); !errors.Is(err, app.ErrGone) { + t.Fatalf("expected retained raw body read to return gone after deletion, got %v", err) + } +} + +func TestPostgresAuditChainBackfillIsBoundedAndIdempotent(t *testing.T) { + ctx, store, _ := openPostgresIntegrationStore(t) + defer store.Close() + for _, prefix := range []string{"ten_it_backfill_%", "ten_it_migration_%"} { + if _, err := store.pool.Exec(ctx, `DELETE FROM audit_chain_entries WHERE tenant_id LIKE $1`, prefix); err != nil { + t.Fatal(err) + } + if _, err := store.pool.Exec(ctx, `DELETE FROM audit_chain_heads WHERE tenant_id LIKE $1`, prefix); err != nil { + t.Fatal(err) + } + if _, err := store.pool.Exec(ctx, `DELETE FROM audit_events WHERE tenant_id LIKE $1`, prefix); err != nil { + t.Fatal(err) + } + } + + suffix := time.Now().UTC().Format("150405.000000000") + tenantID := "ten_it_backfill_" + suffix + base := time.Date(2026, 5, 26, 18, 0, 0, 0, time.UTC) + if _, err := store.pool.Exec(ctx, `INSERT INTO tenants(id, name) VALUES($1, 'backfill integration') ON CONFLICT (id) DO NOTHING`, tenantID); err != nil { + t.Fatal(err) + } + events := []struct { + id string + occurredAt time.Time + }{ + {id: "aud_it_backfill_b_" + suffix, occurredAt: base}, + {id: "aud_it_backfill_a_" + suffix, occurredAt: base}, + {id: "aud_it_backfill_c_" + suffix, occurredAt: base.Add(time.Second)}, + } + for _, event := range events { + if _, err := store.pool.Exec(ctx, ` + INSERT INTO audit_events(id, tenant_id, actor_id, action, resource, resource_id, reason, occurred_at) + VALUES($1,$2,'usr_it','integration.backfill','test',$1,'backfill integration',$3)`, + event.id, tenantID, event.occurredAt); err != nil { + t.Fatal(err) + } + } + + first, err := store.BackfillAuditChain(ctx, "it-backfill", 2) + if err != nil { + t.Fatal(err) + } + if !first.LeaseAcquired || first.EventsBackfilled != 2 || !first.More { + t.Fatalf("expected first bounded backfill to claim two events and report more work, got %+v", first) + } + second, err := store.BackfillAuditChain(ctx, "it-backfill", 10) + if err != nil { + t.Fatal(err) + } + if !second.LeaseAcquired || second.EventsBackfilled != 1 || second.More { + t.Fatalf("expected second backfill to finish remaining event, got %+v", second) + } + third, err := store.BackfillAuditChain(ctx, "it-backfill", 10) + if err != nil { + t.Fatal(err) + } + if !third.LeaseAcquired || third.EventsBackfilled != 0 || third.More { + t.Fatalf("expected idempotent empty backfill, got %+v", third) + } + + rows, err := store.pool.Query(ctx, ` + SELECT audit_event_id, sequence, source + FROM audit_chain_entries + WHERE tenant_id=$1 + ORDER BY sequence ASC`, tenantID) + if err != nil { + t.Fatal(err) + } + defer rows.Close() + var orderedIDs []string + var sequences []int64 + var sources []string + for rows.Next() { + var id, source string + var sequence int64 + if err := rows.Scan(&id, &sequence, &source); err != nil { + t.Fatal(err) + } + orderedIDs = append(orderedIDs, id) + sequences = append(sequences, sequence) + sources = append(sources, source) + } + if err := rows.Err(); err != nil { + t.Fatal(err) + } + expectedOrder := []string{events[1].id, events[0].id, events[2].id} + if strings.Join(orderedIDs, ",") != strings.Join(expectedOrder, ",") { + t.Fatalf("expected deterministic occurred_at/id order %v, got %v", expectedOrder, orderedIDs) + } + if strings.Join(sources, ",") != "backfill,backfill,backfill" { + t.Fatalf("expected backfill chain entry sources, got %v", sources) + } + if len(sequences) != 3 || sequences[0] != 1 || sequences[1] != 2 || sequences[2] != 3 { + t.Fatalf("expected sequential chain entries, got %v", sequences) + } +} + +func TestPostgresControlResourcesTenantIsolationAndEvidence(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + suffix := time.Now().UTC().Format("150405.000000000") + other := authz.Actor{ID: "usr_it_other_" + suffix, TenantID: "ten_it_other_" + suffix, Role: authz.RoleOwner, Scopes: []string{"*"}} + if _, err := store.CreateAPIKey(ctx, app.APIKeyCreateInput{ + Key: domain.APIKey{ + TenantID: other.TenantID, + UserID: other.ID, + Name: "integration other owner", + Prefix: "it-other", + Last4: "test", + Hash: app.HashToken("integration-other-" + suffix), + Scopes: []string{"*"}, + State: domain.StateActive, + }, + Role: authz.RoleOwner, + ActorID: other.ID, + }); err != nil { + t.Fatal(err) + } + control := app.NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{ + "receiver.example.com": {netip.MustParseAddr("93.184.216.34")}, + "signals.example.com": {netip.MustParseAddr("93.184.216.34")}, + }}) + + source, err := control.CreateSource(ctx, actor, app.CreateSourceRequest{Name: "tenant source", Provider: "stripe", Adapter: "stripe", VerificationSecret: "whsec_it"}) + if err != nil { + t.Fatal(err) + } + endpoint, _, err := control.CreateEndpoint(ctx, actor, app.CreateEndpointRequest{Name: "tenant endpoint", URL: "https://receiver.example.com/webhook"}) + if err != nil { + t.Fatal(err) + } + retryPolicy, err := control.CreateRetryPolicy(ctx, actor, app.CreateRetryPolicyRequest{Name: "tenant retry", MaxAttempts: 3, MaxDurationSeconds: 3600, InitialDelaySeconds: 1, MaxDelaySeconds: 60}) + if err != nil { + t.Fatal(err) + } + route, err := control.CreateRoute(ctx, actor, app.CreateRouteRequest{SourceID: source.ID, Name: "tenant route", Priority: 10, EventTypes: []string{"invoice.created"}, EndpointID: endpoint.ID, RetryPolicyID: retryPolicy.ID, State: domain.StateActive}) + if err != nil { + t.Fatal(err) + } + subscription, err := control.CreateSubscription(ctx, actor, app.CreateSubscriptionRequest{EndpointID: endpoint.ID, EventTypes: []string{"invoice.created"}, PayloadFormat: "canonical_json"}) + if err != nil { + t.Fatal(err) + } + channel, _, err := control.CreateNotificationChannel(ctx, actor, app.CreateNotificationChannelRequest{Name: "tenant notification", URL: "https://signals.example.com/notify", SigningSecret: "notify-secret-value"}) + if err != nil { + t.Fatal(err) + } + alert, err := control.CreateAlertRule(ctx, actor, app.CreateAlertRuleRequest{Name: "tenant alert", RuleType: domain.AlertRuleDeadLetterOpen, Threshold: 1, ChannelIDs: []string{channel.ID}}) + if err != nil { + t.Fatal(err) + } + sink, _, err := control.CreateSIEMSink(ctx, actor, app.CreateSIEMSinkRequest{Name: "tenant siem", URL: "https://signals.example.com/siem", SigningSecret: "siem-secret-value"}) + if err != nil { + t.Fatal(err) + } + + sourceName := "tenant source updated" + if _, err := control.UpdateSource(ctx, actor, source.ID, app.UpdateSourceRequest{Name: &sourceName, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + endpointName := "tenant endpoint updated" + if _, _, err := control.UpdateEndpoint(ctx, actor, endpoint.ID, app.UpdateEndpointRequest{Name: &endpointName, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + routeName := "tenant route updated" + if _, err := control.UpdateRoute(ctx, actor, route.ID, app.UpdateRouteRequest{Name: &routeName, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + subscriptionFormat := "canonical_json" + if _, err := control.UpdateSubscription(ctx, actor, subscription.ID, app.UpdateSubscriptionRequest{PayloadFormat: &subscriptionFormat, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + retryName := "tenant retry updated" + updatedRetryPolicy, err := control.UpdateRetryPolicy(ctx, actor, retryPolicy.ID, app.UpdateRetryPolicyRequest{Name: &retryName, Reason: "integration update"}) + if err != nil { + t.Fatal(err) + } + retryPolicy = updatedRetryPolicy + alertName := "tenant alert updated" + if _, err := control.UpdateAlertRule(ctx, actor, alert.ID, app.UpdateAlertRuleRequest{Name: &alertName, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + channelName := "tenant notification updated" + if _, _, err := control.UpdateNotificationChannel(ctx, actor, channel.ID, app.UpdateNotificationChannelRequest{Name: &channelName, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + sinkName := "tenant siem updated" + if _, _, err := control.UpdateSIEMSink(ctx, actor, sink.ID, app.UpdateSIEMSinkRequest{Name: &sinkName, Reason: "integration update"}); err != nil { + t.Fatal(err) + } + + _, err = control.GetSource(ctx, other, source.ID) + assertPostgresNotFound(t, err) + _, err = control.GetEndpoint(ctx, other, endpoint.ID) + assertPostgresNotFound(t, err) + _, err = control.GetRoute(ctx, other, route.ID) + assertPostgresNotFound(t, err) + _, err = control.GetSubscription(ctx, other, subscription.ID) + assertPostgresNotFound(t, err) + _, err = control.GetRetryPolicy(ctx, other, retryPolicy.ID) + assertPostgresNotFound(t, err) + _, err = control.GetAlertRule(ctx, other, alert.ID) + assertPostgresNotFound(t, err) + _, err = control.GetNotificationChannel(ctx, other, channel.ID) + assertPostgresNotFound(t, err) + _, err = control.GetSIEMSink(ctx, other, sink.ID) + assertPostgresNotFound(t, err) + + for _, item := range []struct { + resourceType string + resourceID string + }{ + {domain.ConfigResourceSource, source.ID}, + {domain.ConfigResourceEndpoint, endpoint.ID}, + {domain.ConfigResourceRoute, route.ID}, + {domain.ConfigResourceSubscription, subscription.ID}, + {domain.ConfigResourceRetryPolicy, retryPolicy.ID}, + } { + assertPostgresConfigVersion(t, ctx, store, actor.TenantID, item.resourceType, item.resourceID) + } + for _, item := range []struct { + action string + resource string + resourceID string + }{ + {"source.updated", "source", source.ID}, + {"endpoint.updated", "endpoint", endpoint.ID}, + {"route.updated", "route", route.ID}, + {"subscription.updated", "subscription", subscription.ID}, + {"retry_policy.updated", "retry_policy", retryPolicy.ID}, + {"alert_rule.updated", "alert_rule", alert.ID}, + {"notification_channel.updated", "notification_channel", channel.ID}, + {"siem_sink.updated", "siem_sink", sink.ID}, + } { + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, item.action, item.resource, item.resourceID) + } + + sources, err := control.ListSources(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresSource(sources, source.ID, domain.StateActive) { + t.Fatalf("expected source in tenant list, got %+v", sources) + } + if found, err := store.FindSourceByProviderPath(ctx, source.Provider, source.ID); err != nil || found.ID != source.ID { + t.Fatalf("expected provider path source lookup to find %s, found=%+v err=%v", source.ID, found, err) + } + endpoints, err := control.ListEndpoints(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresEndpoint(endpoints, endpoint.ID, domain.StateActive) { + t.Fatalf("expected endpoint in tenant list, got %+v", endpoints) + } + testDelivery, err := control.TestEndpoint(ctx, actor, endpoint.ID, app.TestEndpointRequest{Reason: "integration endpoint test"}) + if err != nil { + t.Fatal(err) + } + if testDelivery.ID == "" || testDelivery.EndpointID != endpoint.ID || testDelivery.State != "scheduled" { + t.Fatalf("unexpected endpoint test delivery: %+v", testDelivery) + } + subscriptions, err := control.ListSubscriptions(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresSubscription(subscriptions, subscription.ID, domain.StateActive) { + t.Fatalf("expected subscription in tenant list, got %+v", subscriptions) + } + routes, err := control.ListRoutes(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresRoute(routes, route.ID, domain.StateActive) { + t.Fatalf("expected route in tenant list, got %+v", routes) + } + activatedRoute, err := control.ActivateRoute(ctx, actor, route.ID, "integration activation") + if err != nil { + t.Fatal(err) + } + if activatedRoute.State != domain.StateActive { + t.Fatalf("expected active route, got %+v", activatedRoute) + } + routeVersions, err := control.ListRouteVersions(ctx, actor, route.ID, 10) + if err != nil { + t.Fatal(err) + } + if len(routeVersions) == 0 { + t.Fatal("expected route versions") + } + retryPolicies, err := control.ListRetryPolicies(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresRetryPolicy(retryPolicies, retryPolicy.ID, domain.StateActive) { + t.Fatalf("expected retry policy in tenant list, got %+v", retryPolicies) + } + alerts, err := control.ListAlertRules(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresAlertRule(alerts, alert.ID, domain.StateActive) { + t.Fatalf("expected alert rule in tenant list, got %+v", alerts) + } + channels, err := control.ListNotificationChannels(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresNotificationChannel(channels, channel.ID, domain.StateActive) { + t.Fatalf("expected notification channel in tenant list, got %+v", channels) + } + notificationDelivery, err := control.TestNotificationChannel(ctx, actor, channel.ID, app.StateChangeRequest{Reason: "integration notification test"}) + if err != nil { + t.Fatal(err) + } + if notificationDelivery.ID == "" || notificationDelivery.ChannelID != channel.ID { + t.Fatalf("unexpected notification test delivery: %+v", notificationDelivery) + } + sinks, err := control.ListSIEMSinks(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresSIEMSink(sinks, sink.ID, domain.StateActive) { + t.Fatalf("expected SIEM sink in tenant list, got %+v", sinks) + } + siemDelivery, err := control.TestSIEMSink(ctx, actor, sink.ID, app.StateChangeRequest{Reason: "integration siem test"}) + if err != nil { + t.Fatal(err) + } + if siemDelivery.ID == "" || siemDelivery.SinkID != sink.ID { + t.Fatalf("unexpected SIEM test delivery: %+v", siemDelivery) + } + if _, err := control.ListEndpointHealth(ctx, actor, 20); err != nil { + t.Fatal(err) + } + if _, err := control.ListWorkers(ctx, actor, 20); err != nil { + t.Fatal(err) + } + if _, err := control.ListQueues(ctx, actor); err != nil { + t.Fatal(err) + } + + if deleted, err := control.DeleteSubscription(ctx, actor, subscription.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled subscription, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteRoute(ctx, actor, route.ID, app.StateChangeRequest{Reason: "integration inactivate"}); err != nil || deleted.State != domain.StateInactive { + t.Fatalf("expected inactive route, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteRetryPolicy(ctx, actor, retryPolicy.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled retry policy, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteAlertRule(ctx, actor, alert.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled alert rule, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteNotificationChannel(ctx, actor, channel.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled notification channel, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteSIEMSink(ctx, actor, sink.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled SIEM sink, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteEndpoint(ctx, actor, endpoint.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled endpoint, got %+v err=%v", deleted, err) + } + if deleted, err := control.DeleteSource(ctx, actor, source.ID, app.StateChangeRequest{Reason: "integration disable"}); err != nil || deleted.State != domain.StateDisabled { + t.Fatalf("expected disabled source, got %+v err=%v", deleted, err) + } +} + +func TestPostgresEnterpriseIdentitySessionsAndProviderLifecycle(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + suffix := time.Now().UTC().Format("150405.000000000") + idp, err := store.CreateIdentityProvider(ctx, actor.TenantID, actor.ID, app.CreateIdentityProviderRequest{ + Name: " Integration OIDC ", + IssuerURL: " https://issuer.example.com ", + AuthorizationURL: " https://issuer.example.com/authorize ", + TokenURL: " https://issuer.example.com/token ", + JWKSURL: " https://issuer.example.com/jwks.json ", + ClientID: " client-" + suffix + " ", + ClientSecret: " oidc-secret-" + suffix + " ", + RedirectURI: " https://webhookery.example.com/auth/callback ", + AllowedEmailDomains: []string{" Example.COM ", "example.com", "", "Ops.Example.com"}, + }) + if err != nil { + t.Fatal(err) + } + if idp.ProviderType != app.IdentityProviderOIDC || idp.Name != "Integration OIDC" { + t.Fatalf("expected default OIDC provider with trimmed name, got %+v", idp) + } + if strings.Join(idp.AllowedEmailDomains, ",") != "example.com,ops.example.com" { + t.Fatalf("expected normalized allowed domains, got %v", idp.AllowedEmailDomains) + } + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "identity_provider.created", "identity_provider", idp.ID) + idps, err := store.ListIdentityProviders(ctx, actor.TenantID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresIdentityProvider(idps, idp.ID, domain.StateActive) { + t.Fatalf("expected identity provider in tenant list, got %+v", idps) + } + + gotIDP, err := store.GetIdentityProvider(ctx, actor.TenantID, idp.ID) + if err != nil { + t.Fatal(err) + } + if string(gotIDP.ClientSecret) != " oidc-secret-"+suffix+" " { + t.Fatalf("expected decrypted client secret to round trip") + } + if _, err := store.GetIdentityProvider(ctx, "ten_it_wrong_"+suffix, idp.ID); !errors.Is(err, app.ErrNotFound) { + t.Fatalf("wrong-tenant identity provider lookup must be hidden, got %v", err) + } + testedIDP, err := store.TestIdentityProvider(ctx, actor.TenantID, idp.ID, actor.ID, "integration smoke") + if err != nil { + t.Fatal(err) + } + if len(testedIDP.ClientSecret) != 0 { + t.Fatal("identity provider test result must not expose the client secret") + } + + stateHash := app.HashToken("state-" + suffix) + if err := store.CreateOIDCLoginState(ctx, domain.OIDCLoginState{ + TenantID: actor.TenantID, + IdentityProviderID: idp.ID, + StateHash: stateHash, + NonceHash: app.HashToken("nonce-" + suffix), + PKCEVerifier: []byte("pkce-verifier-" + suffix), + RedirectAfter: "/events", + ExpiresAt: time.Now().UTC().Add(time.Hour), + }); err != nil { + t.Fatal(err) + } + consumed, consumedIDP, err := store.ConsumeOIDCLoginState(ctx, stateHash) + if err != nil { + t.Fatal(err) + } + if consumed.IdentityProviderID != idp.ID || consumedIDP.ID != idp.ID || string(consumed.PKCEVerifier) != "pkce-verifier-"+suffix { + t.Fatalf("unexpected consumed OIDC state/provider: state=%+v idp=%+v", consumed, consumedIDP) + } + if _, _, err := store.ConsumeOIDCLoginState(ctx, stateHash); !errors.Is(err, app.ErrUnauthorized) { + t.Fatalf("OIDC login state must be one-time use, got %v", err) + } + + sessionHash := app.HashToken("session-" + suffix) + session, sessionActor, err := store.CreateOIDCSession(ctx, app.OIDCSessionInput{ + TenantID: actor.TenantID, + IdentityProviderID: idp.ID, + ExternalSubject: "sub-" + suffix, + Email: "User+" + suffix + "@Example.com", + EmailVerified: true, + DisplayName: "OIDC User", + SessionHash: sessionHash, + UserAgentHash: app.HashToken("ua-" + suffix), + IPHash: app.HashToken("ip-" + suffix), + ExpiresAt: time.Now().UTC().Add(time.Hour), + }) + if err != nil { + t.Fatal(err) + } + if session.ExternalIdentityID == "" || sessionActor.TenantID != actor.TenantID || sessionActor.Role != authz.RoleSupport { + t.Fatalf("unexpected OIDC session/actor: session=%+v actor=%+v", session, sessionActor) + } + authenticated, err := store.AuthenticateSession(ctx, sessionHash) + if err != nil { + t.Fatal(err) + } + if authenticated.ID != sessionActor.ID || authenticated.TenantID != actor.TenantID { + t.Fatalf("unexpected authenticated actor: %+v", authenticated) + } + current, err := store.CurrentAuthSession(ctx, actor.TenantID, sessionActor.ID, sessionHash) + if err != nil { + t.Fatal(err) + } + if current.ID != session.ID { + t.Fatalf("expected current session %s, got %+v", session.ID, current) + } + sessions, err := store.ListAuthSessions(ctx, actor.TenantID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresAuthSession(sessions, session.ID, domain.StateActive) { + t.Fatalf("expected active session in tenant list, got %+v", sessions) + } + revoked, err := store.RevokeAuthSessionByID(ctx, actor.TenantID, session.ID, actor.ID, "integration revoke") + if err != nil { + t.Fatal(err) + } + if revoked.State != "revoked" { + t.Fatalf("expected revoked session, got %+v", revoked) + } + if _, err := store.AuthenticateSession(ctx, sessionHash); !errors.Is(err, app.ErrUnauthorized) { + t.Fatalf("revoked session must not authenticate, got %v", err) + } + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "auth_session.revoked", "auth_session", session.ID) + + logoutSessionHash := app.HashToken("session-logout-" + suffix) + if _, _, err := store.CreateOIDCSession(ctx, app.OIDCSessionInput{ + TenantID: actor.TenantID, + IdentityProviderID: idp.ID, + ExternalSubject: "sub-logout-" + suffix, + Email: "logout+" + suffix + "@example.com", + DisplayName: "Logout User", + SessionHash: logoutSessionHash, + ExpiresAt: time.Now().UTC().Add(time.Hour), + }); err != nil { + t.Fatal(err) + } + if err := store.RevokeAuthSession(ctx, actor.TenantID, actor.ID, logoutSessionHash, "integration logout"); err != nil { + t.Fatal(err) + } + if _, err := store.AuthenticateSession(ctx, logoutSessionHash); !errors.Is(err, app.ErrUnauthorized) { + t.Fatalf("logged-out session must not authenticate, got %v", err) + } + + secondSessionHash := app.HashToken("session-disabled-idp-" + suffix) + secondSession, _, err := store.CreateOIDCSession(ctx, app.OIDCSessionInput{ + TenantID: actor.TenantID, + IdentityProviderID: idp.ID, + ExternalSubject: "sub-disabled-" + suffix, + Email: "disabled+" + suffix + "@example.com", + DisplayName: "Disabled IDP User", + SessionHash: secondSessionHash, + ExpiresAt: time.Now().UTC().Add(time.Hour), + }) + if err != nil { + t.Fatal(err) + } + disabled, err := store.DisableIdentityProvider(ctx, actor.TenantID, idp.ID, actor.ID, "integration disable") + if err != nil { + t.Fatal(err) + } + if disabled.State != domain.StateDisabled { + t.Fatalf("expected disabled identity provider, got %+v", disabled) + } + if _, err := store.AuthenticateSession(ctx, secondSessionHash); !errors.Is(err, app.ErrUnauthorized) { + t.Fatalf("sessions from disabled identity providers must not authenticate, got %v", err) + } + sessions, err = store.ListAuthSessions(ctx, actor.TenantID, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresAuthSession(sessions, secondSession.ID, "revoked") { + t.Fatalf("disabling identity provider should revoke active sessions, got %+v", sessions) + } +} + +func TestPostgresSCIMAndPolicyLifecycle(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + suffix := time.Now().UTC().Format("150405.000000000") + tokenValue := "scim-token-" + suffix + scimToken, err := store.CreateSCIMToken(ctx, actor.TenantID, actor.ID, domain.SCIMToken{ + Name: "SCIM integration token", + Hash: app.HashToken(tokenValue), + Prefix: "scim", + Last4: "test", + }) + if err != nil { + t.Fatal(err) + } + scimActor, err := store.AuthenticateSCIMTokenHash(ctx, app.HashToken(tokenValue)) + if err != nil { + t.Fatal(err) + } + if scimActor.TenantID != actor.TenantID || scimActor.Role != authz.RoleSecurity || scimActor.ID != "scim:"+scimToken.ID { + t.Fatalf("unexpected SCIM actor: %+v", scimActor) + } + tokens, err := store.ListSCIMTokens(ctx, actor.TenantID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresSCIMToken(tokens, scimToken.ID, domain.StateActive) { + t.Fatalf("expected active SCIM token in tenant list, got %+v", tokens) + } + + user, err := store.SCIMCreateOrReplaceUser(ctx, actor.TenantID, scimActor.ID, app.SCIMUserRequest{ + ExternalID: "scim-user-" + suffix, + UserName: "Scim.User+" + suffix + "@Example.com", + DisplayName: "SCIM User", + }, false) + if err != nil { + t.Fatal(err) + } + if user.ID == "" || user.UserName != "Scim.User+"+suffix+"@Example.com" || !user.Active { + t.Fatalf("unexpected provisioned SCIM user: %+v", user) + } + patchedUser, err := store.SCIMPatchUser(ctx, actor.TenantID, scimActor.ID, user.ID, app.SCIMPatchRequest{Operations: []app.SCIMOperation{{ + Op: "replace", + Path: "displayName", + Value: json.RawMessage(`"SCIM User Patched"`), + }}}) + if err != nil { + t.Fatal(err) + } + if patchedUser.DisplayName != "SCIM User Patched" { + t.Fatalf("expected patched display name, got %+v", patchedUser) + } + if _, err := store.SCIMGetUser(ctx, "ten_it_wrong_"+suffix, user.ID); !errors.Is(err, app.ErrNotFound) { + t.Fatalf("wrong-tenant SCIM user lookup must be hidden, got %v", err) + } + users, err := store.SCIMListUsers(ctx, actor.TenantID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresSCIMUser(users, user.ID, true) { + t.Fatalf("expected active SCIM user in tenant list, got %+v", users) + } + + group, err := store.SCIMCreateOrReplaceGroup(ctx, actor.TenantID, scimActor.ID, app.SCIMGroupRequest{ + ExternalID: "scim-group-" + suffix, + DisplayName: "SCIM Operators", + Members: []app.SCIMGroupMember{{Value: user.ID}}, + }, false) + if err != nil { + t.Fatal(err) + } + if group.ID == "" || len(group.Members) != 1 || group.Members[0].Value != user.ID { + t.Fatalf("unexpected SCIM group: %+v", group) + } + patchedGroup, err := store.SCIMPatchGroup(ctx, actor.TenantID, scimActor.ID, group.ID, app.SCIMPatchRequest{Operations: []app.SCIMOperation{{ + Op: "replace", + Path: "displayName", + Value: json.RawMessage(`"SCIM Security"`), + }}}) + if err != nil { + t.Fatal(err) + } + if patchedGroup.DisplayName != "SCIM Security" { + t.Fatalf("expected patched group display name, got %+v", patchedGroup) + } + if groups, err := store.SCIMListGroups(ctx, actor.TenantID, 10); err != nil { + t.Fatal(err) + } else if !containsPostgresSCIMGroup(groups, group.ID, true) { + t.Fatalf("expected active SCIM group in tenant list, got %+v", groups) + } + + binding, err := store.CreateRoleBinding(ctx, actor.TenantID, actor.ID, app.CreateRoleBindingRequest{ + PrincipalType: "group", + PrincipalID: group.ID, + Role: authz.RoleOwner, + ResourceFamily: "secrets", + ResourceID: "secret-" + suffix, + Environment: "prod", + Reason: "integration group elevation", + }) + if err != nil { + t.Fatal(err) + } + bindings, err := store.ListRoleBindings(ctx, actor.TenantID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresRoleBinding(bindings, binding.ID, domain.StateActive) { + t.Fatalf("expected active role binding in tenant list, got %+v", bindings) + } + decision, err := store.ExplainAuthorization(ctx, actor.TenantID, user.ID, app.AuthzExplainRequest{ + Action: "security:write", + ResourceFamily: "secrets", + ResourceID: "secret-" + suffix, + Environment: "prod", + }) + if err != nil { + t.Fatal(err) + } + if !decision.Allowed || decision.MatchedRoleBindingID != binding.ID { + t.Fatalf("expected group role binding to allow security write, got %+v", decision) + } + + policy, err := store.CreateAccessPolicyRule(ctx, actor.TenantID, actor.ID, app.CreateAccessPolicyRuleRequest{ + Name: "deny integration secret writes", + Action: "security:write", + Effect: app.PolicyEffectDeny, + ResourceFamily: "secrets", + Environment: "prod", + Conditions: json.RawMessage(`{"reason":"integration"}`), + Reason: "integration deny override", + }) + if err != nil { + t.Fatal(err) + } + policies, err := store.ListAccessPolicyRules(ctx, actor.TenantID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresAccessPolicyRule(policies, policy.ID, domain.StateActive) { + t.Fatalf("expected active access policy in tenant list, got %+v", policies) + } + denied, err := store.ExplainAuthorization(ctx, actor.TenantID, user.ID, app.AuthzExplainRequest{ + Action: "security:write", + ResourceFamily: "secrets", + ResourceID: "secret-" + suffix, + Environment: "prod", + }) + if err != nil { + t.Fatal(err) + } + if denied.Allowed || denied.MatchedPolicyRuleID != policy.ID || denied.Reason != "denied by access policy" { + t.Fatalf("expected deny policy to override role binding, got %+v", denied) + } + if _, err := store.UpdateRoleBinding(ctx, actor.TenantID, binding.ID, actor.ID, app.UpdateRoleBindingRequest{Reason: "integration binding update"}); err != nil { + t.Fatal(err) + } + if _, err := store.DisableRoleBinding(ctx, actor.TenantID, binding.ID, actor.ID, "integration binding disable"); err != nil { + t.Fatal(err) + } + if _, err := store.UpdateAccessPolicyRule(ctx, actor.TenantID, policy.ID, actor.ID, app.UpdateAccessPolicyRuleRequest{Reason: "integration policy update"}); err != nil { + t.Fatal(err) + } + if _, err := store.DisableAccessPolicyRule(ctx, actor.TenantID, policy.ID, actor.ID, "integration policy disable"); err != nil { + t.Fatal(err) + } + if _, err := store.SCIMDeactivateGroup(ctx, actor.TenantID, scimActor.ID, group.ID); err != nil { + t.Fatal(err) + } + deactivatedUser, err := store.SCIMDeactivateUser(ctx, actor.TenantID, scimActor.ID, user.ID) + if err != nil { + t.Fatal(err) + } + if deactivatedUser.Active { + t.Fatalf("expected deactivated SCIM user, got %+v", deactivatedUser) + } + revokedToken, err := store.RevokeSCIMToken(ctx, actor.TenantID, scimToken.ID, actor.ID, "integration revoke") + if err != nil { + t.Fatal(err) + } + if revokedToken.State != "revoked" { + t.Fatalf("expected revoked SCIM token, got %+v", revokedToken) + } + if _, err := store.AuthenticateSCIMTokenHash(ctx, app.HashToken(tokenValue)); !errors.Is(err, app.ErrUnauthorized) { + t.Fatalf("revoked SCIM token must not authenticate, got %v", err) + } + + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "scim_token.revoked", "scim_token", scimToken.ID) + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "role_binding.updated", "role_binding", binding.ID) + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "access_policy.updated", "access_policy", policy.ID) +} + +func TestPostgresSchemaAndTransformationLifecycle(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + control := app.NewControlService(store, ssrf.Validator{}) + suffix := strings.ReplaceAll(time.Now().UTC().Format("150405.000000000"), ".", "_") + eventTypeName := "invoice.schema_" + suffix + eventType, err := control.CreateEventType(ctx, actor, app.CreateEventTypeRequest{ + Name: eventTypeName, + Description: "schema lifecycle integration", + }) + if err != nil { + t.Fatal(err) + } + if eventType.Name != eventTypeName || eventType.State != domain.StateActive { + t.Fatalf("unexpected event type: %+v", eventType) + } + eventTypes, err := control.ListEventTypes(ctx, actor, 20) + if err != nil { + t.Fatal(err) + } + if !containsPostgresEventType(eventTypes, eventTypeName, domain.StateActive) { + t.Fatalf("expected event type in tenant list, got %+v", eventTypes) + } + if got, err := control.GetEventType(ctx, actor, eventTypeName); err != nil || got.Name != eventTypeName { + t.Fatalf("expected event type lookup to round trip, got=%+v err=%v", got, err) + } + updatedDescription := "schema lifecycle integration updated" + updatedEventType, err := control.UpdateEventType(ctx, actor, eventTypeName, app.UpdateEventTypeRequest{ + Description: &updatedDescription, + Reason: "integration update", + }) + if err != nil { + t.Fatal(err) + } + if updatedEventType.Description != updatedDescription { + t.Fatalf("expected updated event type description, got %+v", updatedEventType) + } + + schema, err := control.CreateEventSchema(ctx, actor, eventTypeName, app.CreateEventSchemaRequest{ + Version: "1", + Schema: `{"type":"object","required":["id"],"properties":{"id":{"type":"string"}}}`, + }) + if err != nil { + t.Fatal(err) + } + if schema.ID == "" || schema.State != domain.StateActive { + t.Fatalf("unexpected event schema: %+v", schema) + } + schemas, err := control.ListEventSchemas(ctx, actor, eventTypeName, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresEventSchema(schemas, schema.ID, domain.StateActive) { + t.Fatalf("expected event schema in tenant list, got %+v", schemas) + } + if got, err := control.GetEventSchema(ctx, actor, eventTypeName, "1"); err != nil || got.ID != schema.ID { + t.Fatalf("expected event schema lookup to round trip, got=%+v err=%v", got, err) + } + deprecated := domain.StateDeprecated + updatedSchema, err := control.UpdateEventSchema(ctx, actor, eventTypeName, "1", app.UpdateEventSchemaRequest{ + State: &deprecated, + Reason: "integration deprecation", + }) + if err != nil { + t.Fatal(err) + } + if updatedSchema.State != domain.StateDeprecated { + t.Fatalf("expected deprecated schema, got %+v", updatedSchema) + } + + transformation, err := control.CreateTransformation(ctx, actor, app.CreateTransformationRequest{ + Name: "integration transformation", + Operations: json.RawMessage(`[{"op":"set","path":"/metadata/integration","value":"created"}]`), + }) + if err != nil { + t.Fatal(err) + } + if transformation.ID == "" || transformation.ActiveVersionID == "" { + t.Fatalf("expected transformation with active version, got %+v", transformation) + } + transformations, err := control.ListTransformations(ctx, actor, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresTransformation(transformations, transformation.ID, domain.StateActive) { + t.Fatalf("expected transformation in tenant list, got %+v", transformations) + } + if got, err := control.GetTransformation(ctx, actor, transformation.ID); err != nil || got.ID != transformation.ID { + t.Fatalf("expected transformation lookup to round trip, got=%+v err=%v", got, err) + } + version, err := control.CreateTransformationVersion(ctx, actor, transformation.ID, app.CreateTransformationVersionRequest{ + Operations: json.RawMessage(`[{"op":"set","path":"/metadata/integration","value":"version2"}]`), + }) + if err != nil { + t.Fatal(err) + } + if version.State != "draft" { + t.Fatalf("expected draft transformation version, got %+v", version) + } + versions, err := control.ListTransformationVersions(ctx, actor, transformation.ID, 10) + if err != nil { + t.Fatal(err) + } + if !containsPostgresTransformationVersion(versions, version.ID, "draft") { + t.Fatalf("expected draft transformation version in tenant list, got %+v", versions) + } + activated, err := control.ActivateTransformationVersion(ctx, actor, transformation.ID, version.ID, app.ActivateTransformationVersionRequest{Reason: "integration activation"}) + if err != nil { + t.Fatal(err) + } + if activated.State != domain.StateActive { + t.Fatalf("expected activated transformation version, got %+v", activated) + } + retiredSchema, err := control.DeleteEventSchema(ctx, actor, eventTypeName, "1", app.StateChangeRequest{Reason: "integration retire"}) + if err != nil { + t.Fatal(err) + } + if retiredSchema.State != domain.StateRetired { + t.Fatalf("expected retired schema, got %+v", retiredSchema) + } + disabledType, err := control.DeleteEventType(ctx, actor, eventTypeName, app.StateChangeRequest{Reason: "integration disable"}) + if err != nil { + t.Fatal(err) + } + if disabledType.State != domain.StateDisabled { + t.Fatalf("expected disabled event type, got %+v", disabledType) + } + + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "event_type.updated", "event_type", eventTypeName) + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "event_schema.retired", "event_schema", schema.ID) + assertPostgresAuditEvent(t, ctx, store, actor.TenantID, "transformation_version.activated", "transformation", transformation.ID) +} + +func TestPostgresMigrationsAreIdempotentAndEnforceKeyConstraints(t *testing.T) { + ctx, store, _ := openPostgresIntegrationStore(t) + defer store.Close() + + migrationsDir := filepath.Join("..", "..", "..", "migrations") + if err := MigrateUp(ctx, os.Getenv("WEBHOOKERY_TEST_DATABASE_URL"), migrationsDir); err != nil { + t.Fatal(err) + } + files, err := filepath.Glob(filepath.Join(migrationsDir, "*.up.sql")) + if err != nil { + t.Fatal(err) + } + if len(files) == 0 { + t.Fatal("expected migration files") + } + for _, file := range files { + version := strings.TrimSuffix(filepath.Base(file), ".up.sql") + var count int + if err := store.pool.QueryRow(ctx, `SELECT count(*) FROM schema_migrations WHERE version=$1`, version).Scan(&count); err != nil { + t.Fatal(err) + } + if count != 1 { + t.Fatalf("expected migration %s to be recorded once after rerun, got %d", version, count) + } + } + + checksumDrillSuffix := strings.ReplaceAll(time.Now().UTC().Format("150405.000000000"), ".", "_") + checksumDrillDir := t.TempDir() + checksumDrillVersion := "999_checksum_drill_" + checksumDrillSuffix + checksumDrillFile := filepath.Join(checksumDrillDir, checksumDrillVersion+".up.sql") + checksumDrillBody := "CREATE TABLE IF NOT EXISTS checksum_drill_" + checksumDrillSuffix + "(id integer);\n" + if err := os.WriteFile(checksumDrillFile, []byte(checksumDrillBody), 0o600); err != nil { + t.Fatal(err) + } + if err := MigrateUp(ctx, os.Getenv("WEBHOOKERY_TEST_DATABASE_URL"), checksumDrillDir); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(checksumDrillFile, []byte(checksumDrillBody+"-- altered after apply\n"), 0o600); err != nil { + t.Fatal(err) + } + err = MigrateUp(ctx, os.Getenv("WEBHOOKERY_TEST_DATABASE_URL"), checksumDrillDir) + if err == nil || !strings.Contains(err.Error(), "checksum mismatch") { + t.Fatalf("expected migration checksum mismatch, got %v", err) + } + + suffix := time.Now().UTC().Format("150405.000000000") + tenantID := "ten_it_migration_" + suffix + if _, err := store.pool.Exec(ctx, `INSERT INTO tenants(id, name) VALUES($1, 'migration constraints') ON CONFLICT (id) DO NOTHING`, tenantID); err != nil { + t.Fatal(err) + } + if _, err := store.pool.Exec(ctx, ` + INSERT INTO sources(id, tenant_id, name, provider, adapter, state, encrypted_secret) + VALUES($1,$2,'migration source','stripe','stripe','active',$3)`, + "src_it_migration_"+suffix, tenantID, []byte("secret")); err != nil { + t.Fatal(err) + } + if _, err := store.pool.Exec(ctx, ` + INSERT INTO raw_payloads(id, tenant_id, sha256, content_type, size_bytes, body) + VALUES($1,$2,'sha256:migration','application/json',2,'{}')`, + "raw_it_migration_"+suffix, tenantID); err != nil { + t.Fatal(err) + } + insertEvent := func(id string) error { + _, err := store.pool.Exec(ctx, ` + INSERT INTO events(id, tenant_id, source_id, provider, type, provider_event_id, raw_payload_id, raw_payload_hash, + signature_verified, verification_reason, dedupe_key, dedupe_status, received_at) + VALUES($1,$2,$3,'stripe','invoice.created',$1,$4,'sha256:migration',true,'valid','same-dedupe-key','new',now())`, + id, tenantID, "src_it_migration_"+suffix, "raw_it_migration_"+suffix) + return err + } + if err := insertEvent("evt_it_migration_a_" + suffix); err != nil { + t.Fatal(err) + } + expectPostgresSQLFailure(t, insertEvent("evt_it_migration_b_"+suffix), "duplicate event dedupe key") + + auditA := "aud_it_migration_a_" + suffix + auditB := "aud_it_migration_b_" + suffix + if _, err := store.pool.Exec(ctx, ` + INSERT INTO audit_events(id, tenant_id, actor_id, action, resource, resource_id, reason, occurred_at) + VALUES($1,$2,'usr_it','migration.constraint','test',$1,'constraint',now()), + ($3,$2,'usr_it','migration.constraint','test',$3,'constraint',now())`, + auditA, tenantID, auditB); err != nil { + t.Fatal(err) + } + if _, err := store.pool.Exec(ctx, ` + INSERT INTO audit_chain_entries(id, tenant_id, sequence, audit_event_id, event_hash, previous_chain_hash, chain_hash, + canonicalization_version, source, state) + VALUES($1,$2,1,$3,'sha256:event-a','','sha256:chain-a','audit-chain-v1','backfill','active')`, + "ace_it_migration_a_"+suffix, tenantID, auditA); err != nil { + t.Fatal(err) + } + _, err = store.pool.Exec(ctx, ` + INSERT INTO audit_chain_entries(id, tenant_id, sequence, audit_event_id, event_hash, previous_chain_hash, chain_hash, + canonicalization_version, source, state) + VALUES($1,$2,1,$3,'sha256:event-b','sha256:chain-a','sha256:chain-b','audit-chain-v1','backfill','active')`, + "ace_it_migration_b_"+suffix, tenantID, auditB) + expectPostgresSQLFailure(t, err, "duplicate audit chain sequence") + + fingerprint := "sha256:migration-fingerprint-" + suffix + if _, err := store.pool.Exec(ctx, ` + INSERT INTO producer_mtls_identities(id, tenant_id, name, certificate_fingerprint_sha256, cert_subject, not_before, not_after, state) + VALUES($1,$2,'migration mTLS',$3,'CN=migration',now(),now() + interval '1 hour','active')`, + "mtls_it_migration_a_"+suffix, tenantID, fingerprint); err != nil { + t.Fatal(err) + } + _, err = store.pool.Exec(ctx, ` + INSERT INTO producer_mtls_identities(id, tenant_id, name, certificate_fingerprint_sha256, cert_subject, not_before, not_after, state) + VALUES($1,$2,'migration mTLS duplicate',$3,'CN=migration',now(),now() + interval '1 hour','active')`, + "mtls_it_migration_b_"+suffix, tenantID, fingerprint) + expectPostgresSQLFailure(t, err, "duplicate producer mTLS fingerprint") +} + +func TestPostgresAuditFailureRollsBackAPIKeyRevocation(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + rawToken := "whkey_audit_failure_" + time.Now().UTC().Format("150405.000000000") + key, err := store.CreateAPIKey(ctx, app.APIKeyCreateInput{ + Key: domain.APIKey{ + TenantID: actor.TenantID, + UserID: actor.ID, + Name: "audit failure key", + Prefix: "whkey_af", + Last4: "0001", + Hash: app.HashToken(rawToken), + Scopes: []string{"events:read"}, + State: domain.StateActive, + }, + Role: authz.RoleOperator, + ActorID: actor.ID, + }) + if err != nil { + t.Fatal(err) + } + poisonNextPostgresAuditSequence(t, ctx, store, actor.TenantID) + + if _, err := store.RevokeAPIKey(ctx, actor.TenantID, key.ID, actor.ID, "audit failure injection"); err == nil { + t.Fatal("expected audit-chain failure to abort API key revocation") + } + var state string + if err := store.pool.QueryRow(ctx, `SELECT state FROM api_keys WHERE tenant_id=$1 AND id=$2`, actor.TenantID, key.ID).Scan(&state); err != nil { + t.Fatal(err) + } + if state != domain.StateActive { + t.Fatalf("API key revocation must roll back when audit evidence fails, got state %q", state) + } + assertPostgresNoAuditEvent(t, ctx, store, actor.TenantID, "api_key.revoked", "api_key", key.ID) +} + +func TestPostgresAuditFailureRollsBackReplayStateChange(t *testing.T) { + ctx, store, actor := openPostgresIntegrationStore(t) + defer store.Close() + + job, err := store.CreateReplay(ctx, actor.TenantID, actor.ID, app.ReplayRequest{Reason: "audit failure replay", ConfigMode: app.ReplayConfigCurrent}) + if err != nil { + t.Fatal(err) + } + poisonNextPostgresAuditSequence(t, ctx, store, actor.TenantID) + + if _, err := store.PauseReplayJob(ctx, actor.TenantID, job.ID, actor.ID, "audit failure injection"); err == nil { + t.Fatal("expected audit-chain failure to abort replay pause") + } + var state string + if err := store.pool.QueryRow(ctx, `SELECT state FROM replay_jobs WHERE tenant_id=$1 AND id=$2`, actor.TenantID, job.ID).Scan(&state); err != nil { + t.Fatal(err) + } + if state != "scheduled" { + t.Fatalf("replay state change must roll back when audit evidence fails, got state %q", state) + } + assertPostgresNoAuditEvent(t, ctx, store, actor.TenantID, "replay.paused", "replay_job", job.ID) +} + +func assertPostgresNotFound(t *testing.T, err error) { + t.Helper() + if !errors.Is(err, app.ErrNotFound) { + t.Fatalf("expected wrong-tenant lookup to return not found, got %v", err) + } +} + +func expectPostgresSQLFailure(t *testing.T, err error, operation string) { + t.Helper() + if err == nil { + t.Fatalf("expected SQL constraint failure for %s", operation) + } +} + +func assertPostgresConfigVersion(t *testing.T, ctx context.Context, store *Store, tenantID, resourceType, resourceID string) { + t.Helper() + var count int + if err := store.pool.QueryRow(ctx, ` + SELECT count(*) + FROM config_versions + WHERE tenant_id=$1 AND resource_type=$2 AND resource_id=$3`, + tenantID, resourceType, resourceID, + ).Scan(&count); err != nil { + t.Fatal(err) + } + if count == 0 { + t.Fatalf("expected config version for %s/%s", resourceType, resourceID) + } +} + +func assertPostgresAuditEvent(t *testing.T, ctx context.Context, store *Store, tenantID, action, resource, resourceID string) { + t.Helper() + var count int + if err := store.pool.QueryRow(ctx, ` + SELECT count(*) + FROM audit_events + WHERE tenant_id=$1 AND action=$2 AND resource=$3 AND resource_id=$4`, + tenantID, action, resource, resourceID, + ).Scan(&count); err != nil { + t.Fatal(err) + } + if count == 0 { + t.Fatalf("expected audit event %s for %s/%s", action, resource, resourceID) + } +} + +func assertPostgresNoAuditEvent(t *testing.T, ctx context.Context, store *Store, tenantID, action, resource, resourceID string) { + t.Helper() + var count int + if err := store.pool.QueryRow(ctx, ` + SELECT count(*) + FROM audit_events + WHERE tenant_id=$1 AND action=$2 AND resource=$3 AND resource_id=$4`, + tenantID, action, resource, resourceID, + ).Scan(&count); err != nil { + t.Fatal(err) + } + if count != 0 { + t.Fatalf("expected no audit event %s for %s/%s, got %d", action, resource, resourceID, count) + } +} + +func containsPostgresAuthSession(sessions []domain.AuthSession, id, state string) bool { + for _, session := range sessions { + if session.ID == id && session.State == state { + return true + } + } + return false +} + +func containsPostgresSource(sources []domain.Source, id, state string) bool { + for _, source := range sources { + if source.ID == id && source.State == state { + return true + } + } + return false +} + +func containsPostgresEndpoint(endpoints []domain.Endpoint, id, state string) bool { + for _, endpoint := range endpoints { + if endpoint.ID == id && endpoint.State == state { + return true + } + } + return false +} + +func containsPostgresSubscription(subscriptions []domain.Subscription, id, state string) bool { + for _, subscription := range subscriptions { + if subscription.ID == id && subscription.State == state { + return true + } + } + return false +} + +func containsPostgresRoute(routes []domain.Route, id, state string) bool { + for _, route := range routes { + if route.ID == id && route.State == state { + return true + } + } + return false +} + +func containsPostgresRetryPolicy(policies []domain.RetryPolicy, id, state string) bool { + for _, policy := range policies { + if policy.ID == id && policy.State == state { + return true + } + } + return false +} + +func containsPostgresAlertRule(rules []domain.AlertRule, id, state string) bool { + for _, rule := range rules { + if rule.ID == id && rule.State == state { + return true + } + } + return false +} + +func containsPostgresNotificationChannel(channels []domain.NotificationChannel, id, state string) bool { + for _, channel := range channels { + if channel.ID == id && channel.State == state { + return true + } + } + return false +} + +func containsPostgresSIEMSink(sinks []domain.SIEMSink, id, state string) bool { + for _, sink := range sinks { + if sink.ID == id && sink.State == state { + return true + } + } + return false +} + +func containsPostgresEventType(types []domain.EventType, name, state string) bool { + for _, eventType := range types { + if eventType.Name == name && eventType.State == state { + return true + } + } + return false +} + +func containsPostgresEventSchema(schemas []domain.EventSchema, id, state string) bool { + for _, schema := range schemas { + if schema.ID == id && schema.State == state { + return true + } + } + return false +} + +func containsPostgresTransformation(transformations []domain.Transformation, id, state string) bool { + for _, transformation := range transformations { + if transformation.ID == id && transformation.State == state { + return true + } + } + return false +} + +func containsPostgresTransformationVersion(versions []domain.TransformationVersion, id, state string) bool { + for _, version := range versions { + if version.ID == id && version.State == state { + return true + } + } + return false +} + +func containsPostgresIdentityProvider(idps []domain.IdentityProvider, id, state string) bool { + for _, idp := range idps { + if idp.ID == id && idp.State == state { + return true + } + } + return false +} + +func containsPostgresSCIMToken(tokens []domain.SCIMToken, id, state string) bool { + for _, token := range tokens { + if token.ID == id && token.State == state { + return true + } + } + return false +} + +func containsPostgresSCIMUser(users []app.SCIMUser, id string, active bool) bool { + for _, user := range users { + if user.ID == id && user.Active == active { + return true + } + } + return false +} + +func containsPostgresSCIMGroup(groups []app.SCIMGroup, id string, active bool) bool { + for _, group := range groups { + if group.ID == id && group.Active == active { + return true + } + } + return false +} + +func containsPostgresRoleBinding(bindings []domain.RoleBinding, id, state string) bool { + for _, binding := range bindings { + if binding.ID == id && binding.State == state { + return true + } + } + return false +} + +func containsPostgresAccessPolicyRule(rules []domain.AccessPolicyRule, id, state string) bool { + for _, rule := range rules { + if rule.ID == id && rule.State == state { + return true + } + } + return false +} + +func poisonNextPostgresAuditSequence(t *testing.T, ctx context.Context, store *Store, tenantID string) { + t.Helper() + var maxSequence int64 + if err := store.pool.QueryRow(ctx, `SELECT COALESCE(max(sequence), 0) FROM audit_chain_entries WHERE tenant_id=$1`, tenantID).Scan(&maxSequence); err != nil { + t.Fatal(err) + } + nextSequence := maxSequence + 1 + if _, err := store.pool.Exec(ctx, ` + INSERT INTO audit_chain_heads(tenant_id, sequence, chain_hash) + VALUES($1,$2,'sha256:poison-head') + ON CONFLICT (tenant_id) DO UPDATE SET sequence=EXCLUDED.sequence, chain_hash=EXCLUDED.chain_hash`, + tenantID, nextSequence-1, + ); err != nil { + t.Fatal(err) + } + suffix := strings.NewReplacer(".", "_", ":", "_").Replace(time.Now().UTC().Format("150405.000000000")) + if _, err := store.pool.Exec(ctx, ` + INSERT INTO audit_chain_entries(id, tenant_id, sequence, audit_event_id, event_hash, previous_chain_hash, chain_hash, + canonicalization_version, source, state, created_at) + VALUES($1,$2,$3,$4,'sha256:poison-event','sha256:poison-head','sha256:poison-chain','audit-chain-v1','live','active',now())`, + "ace_it_poison_"+suffix, tenantID, nextSequence, "aud_it_poison_"+suffix, + ); err != nil { + t.Fatal(err) + } +} + func openPostgresIntegrationStore(t *testing.T) (context.Context, *Store, authz.Actor) { t.Helper() databaseURL := os.Getenv("WEBHOOKERY_TEST_DATABASE_URL") if databaseURL == "" { - t.Skip("WEBHOOKERY_TEST_DATABASE_URL is required") + t.Skip("WEBHOOKERY_TEST_DATABASE_URL is required to prove live Postgres tenant predicates, transactions, locks, outbox, replay, export, and migration behavior") } ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) t.Cleanup(cancel) @@ -339,6 +1879,22 @@ func openPostgresIntegrationStore(t *testing.T) (context.Context, *Store, authz. } suffix := time.Now().UTC().Format("150405.000000000") actor := authz.Actor{ID: "usr_it_" + suffix, TenantID: "ten_it_" + suffix, Role: authz.RoleOwner, Scopes: []string{"*"}} + if _, err := store.CreateAPIKey(ctx, app.APIKeyCreateInput{ + Key: domain.APIKey{ + TenantID: actor.TenantID, + UserID: actor.ID, + Name: "integration owner", + Prefix: "it-owner", + Last4: "test", + Hash: app.HashToken("integration-owner-" + suffix), + Scopes: []string{"*"}, + State: domain.StateActive, + }, + Role: authz.RoleOwner, + ActorID: actor.ID, + }); err != nil { + t.Fatal(err) + } return ctx, store, actor } diff --git a/internal/adapters/postgres/store_split_static_test.go b/internal/adapters/postgres/store_split_static_test.go new file mode 100644 index 0000000..8e28195 --- /dev/null +++ b/internal/adapters/postgres/store_split_static_test.go @@ -0,0 +1,20 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestPostgresStoreIsSplitByResourceFamily(t *testing.T) { + if _, err := os.Stat("store_ingest.go"); err != nil { + t.Fatal("expected inbound capture methods to live in store_ingest.go") + } + body, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(body), "func (s *Store) CaptureInbound") { + t.Fatal("CaptureInbound should live in the ingest resource-family file") + } +} diff --git a/internal/adapters/postgres/subscription_crud_static_test.go b/internal/adapters/postgres/subscription_crud_static_test.go deleted file mode 100644 index eef9e31..0000000 --- a/internal/adapters/postgres/subscription_crud_static_test.go +++ /dev/null @@ -1,27 +0,0 @@ -package postgres - -import ( - "os" - "strings" - "testing" -) - -func TestSubscriptionCRUDStoreQueriesAreTenantScopedVersionedAndAudited(t *testing.T) { - body, err := os.ReadFile("store.go") - if err != nil { - t.Fatal(err) - } - text := string(body) - for _, want := range []string{ - "FROM subscriptions WHERE tenant_id=$1 AND id=$2", - "FOR UPDATE", - "domain.ConfigResourceSubscription", - "subscription.updated", - "subscription.disabled", - "SELECT state FROM endpoints WHERE tenant_id=$1 AND id=$2", - } { - if !strings.Contains(text, want) { - t.Fatalf("subscription CRUD store missing tenant-scoped/config/audit evidence %q", want) - } - } -} diff --git a/internal/adapters/postgres/tenant_isolation_static_test.go b/internal/adapters/postgres/tenant_isolation_static_test.go new file mode 100644 index 0000000..8a20ff2 --- /dev/null +++ b/internal/adapters/postgres/tenant_isolation_static_test.go @@ -0,0 +1,45 @@ +package postgres + +import ( + "os" + "strings" + "testing" +) + +func TestTenantIsolationEvidenceAuthorityPredicates(t *testing.T) { + storeBody, err := os.ReadFile("store.go") + if err != nil { + t.Fatal(err) + } + incidentBody, err := os.ReadFile("incidents.go") + if err != nil { + t.Fatal(err) + } + storeText := string(storeBody) + incidentText := string(incidentBody) + + storeRequirements := map[string]string{ + "events export": "query := `SELECT id FROM events WHERE tenant_id=$1`", + "deliveries": "WHERE d.tenant_id=$1", + "replay jobs": "replayJobSelectSQL+` WHERE tenant_id=$1", + "raw payload export": "WHERE tenant_id=$1 AND event_id=$2", + "audit chain": "WHERE c.tenant_id=$1 AND c.sequence BETWEEN $2 AND $3", + "evidence export": "FROM evidence_exports\n\t\tWHERE tenant_id=$1 AND id=$2", + "provider connection": "FROM provider_connections\n\t\tWHERE tenant_id=$1", + } + for name, want := range storeRequirements { + if !strings.Contains(storeText, want) { + t.Fatalf("%s missing tenant predicate evidence %q", name, want) + } + } + + incidentRequirements := map[string]string{ + "incident exists": "SELECT EXISTS (SELECT 1 FROM incidents WHERE tenant_id=$1 AND id=$2)", + "event exists": "SELECT EXISTS (SELECT 1 FROM events WHERE tenant_id=$1 AND id=$2)", + } + for name, want := range incidentRequirements { + if !strings.Contains(incidentText, want) { + t.Fatalf("%s missing same-tenant incident evidence predicate %q", name, want) + } + } +} diff --git a/internal/adapters/signalhttp/client.go b/internal/adapters/signalhttp/client.go index d65b65e..7b04eac 100644 --- a/internal/adapters/signalhttp/client.go +++ b/internal/adapters/signalhttp/client.go @@ -3,6 +3,7 @@ package signalhttp import ( "bytes" "context" + "errors" "fmt" "io" "net/http" @@ -25,9 +26,16 @@ type Result struct { FailureClass string } -func HTTPClient(timeout time.Duration) *http.Client { +var errUnsafeCustomTransport = errors.New("custom HTTP transport cannot enforce pinned egress") + +func HTTPClient(timeout time.Duration, resolvers ...ssrf.Resolver) *http.Client { + var resolver ssrf.Resolver + if len(resolvers) > 0 { + resolver = resolvers[0] + } return &http.Client{ - Timeout: timeout, + Timeout: timeout, + Transport: ssrf.NewPinnedTransport(nil, resolver, ssrf.DefaultPolicy()), CheckRedirect: func(req *http.Request, via []*http.Request) error { return http.ErrUseLastResponse }, @@ -61,19 +69,14 @@ func (c Client) Deliver(ctx context.Context, rawURL string, body []byte, secret if err != nil { return Result{FailureClass: "policy_blocked"}, err } - httpClient := c.HTTP - if httpClient == nil { - httpClient = HTTPClient(10 * time.Second) - } else if httpClient.CheckRedirect == nil { - copy := *httpClient - copy.CheckRedirect = func(req *http.Request, via []*http.Request) error { - return http.ErrUseLastResponse - } - httpClient = © + httpClient, err := c.httpClient() + if err != nil { + return Result{FailureClass: "client_configuration_error"}, err } resp, err := httpClient.Do(req) if err != nil { - return Result{FailureClass: "network_error"}, err + failureClass, safeErr := safeDoError(err) + return Result{FailureClass: failureClass}, safeErr } defer func() { _ = resp.Body.Close() }() bodyBytes, err := readTruncated(resp.Body, 16<<10) @@ -88,6 +91,35 @@ func (c Client) Deliver(ctx context.Context, rawURL string, body []byte, secret }, nil } +func (c Client) httpClient() (*http.Client, error) { + if c.HTTP == nil { + return HTTPClient(10*time.Second, c.SSRF.Resolver), nil + } + copy := *c.HTTP + if copy.CheckRedirect == nil { + copy.CheckRedirect = func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + } + } + switch transport := copy.Transport.(type) { + case nil: + copy.Transport = ssrf.NewPinnedTransport(nil, c.SSRF.Resolver, ssrf.DefaultPolicy()) + case *http.Transport: + copy.Transport = ssrf.NewPinnedTransport(transport, c.SSRF.Resolver, ssrf.DefaultPolicy()) + default: + return nil, errUnsafeCustomTransport + } + return ©, nil +} + +func safeDoError(err error) (string, error) { + var policyErr ssrf.PolicyError + if errors.As(err, &policyErr) { + return "policy_blocked", policyErr + } + return "network_error", errors.New("signal network error") +} + func readTruncated(body io.Reader, max int64) ([]byte, error) { return io.ReadAll(io.LimitReader(body, max)) } diff --git a/internal/adapters/signalhttp/client_test.go b/internal/adapters/signalhttp/client_test.go index 989c86f..9893166 100644 --- a/internal/adapters/signalhttp/client_test.go +++ b/internal/adapters/signalhttp/client_test.go @@ -2,7 +2,10 @@ package signalhttp import ( "context" + "errors" + "net/http" "net/netip" + "strings" "testing" "time" @@ -37,3 +40,102 @@ func TestBuildRequestBlocksSSRFUnsafeURLs(t *testing.T) { t.Fatal("expected unsafe signal URL to be blocked") } } + +func TestHTTPClientUsesPinnedEgressTransport(t *testing.T) { + client := HTTPClient(2*time.Second, ssrf.StaticResolver{ + "signals.example": {netip.MustParseAddr("10.0.0.10")}, + }) + transport, ok := client.Transport.(*http.Transport) + if !ok { + t.Fatalf("expected pinned HTTP transport, got %T", client.Transport) + } + _, err := transport.DialContext(context.Background(), "tcp", "signals.example:443") + var policyErr ssrf.PolicyError + if !errors.As(err, &policyErr) { + t.Fatalf("expected dial-time SSRF policy error, got %v", err) + } +} + +func TestSafeDoErrorDoesNotLeakCustomerURLTokens(t *testing.T) { + failureClass, err := safeDoError(errors.New(`Post "https://signals.example/hook?token=secret-token": dial tcp 203.0.113.10:443: connect: refused`)) + if failureClass != "network_error" { + t.Fatalf("expected network_error, got %q", failureClass) + } + if err == nil || strings.Contains(err.Error(), "secret-token") || strings.Contains(err.Error(), "signals.example") { + t.Fatalf("network error leaked customer URL detail: %v", err) + } +} + +func TestSignalClassifyHTTPStatuses(t *testing.T) { + tests := []struct { + status int + want string + }{ + {status: http.StatusOK, want: "success"}, + {status: http.StatusNoContent, want: "success"}, + {status: http.StatusTemporaryRedirect, want: "redirect_blocked"}, + {status: http.StatusTooManyRequests, want: "temporary_http"}, + {status: http.StatusBadGateway, want: "temporary_http"}, + {status: http.StatusBadRequest, want: "permanent_http"}, + } + for _, tt := range tests { + t.Run(http.StatusText(tt.status), func(t *testing.T) { + if got := classify(tt.status); got != tt.want { + t.Fatalf("classify(%d)=%q want %q", tt.status, got, tt.want) + } + }) + } +} + +func TestDeliverRejectsUnsafeCustomHTTPTransport(t *testing.T) { + client := Client{ + HTTP: &http.Client{Transport: roundTripFunc(func(*http.Request) (*http.Response, error) { + t.Fatal("unsafe custom transport must not be used") + return nil, nil + })}, + SSRF: ssrf.Validator{Resolver: ssrf.StaticResolver{ + "signals.example": {netip.MustParseAddr("93.184.216.34")}, + }}, + } + result, err := client.Deliver(context.Background(), "https://signals.example/hook", []byte("{}"), []byte("secret")) + if err == nil { + t.Fatal("expected unsafe custom transport rejection") + } + if result.FailureClass != "client_configuration_error" { + t.Fatalf("expected client_configuration_error, got %+v", result) + } +} + +func TestReadTruncatedSignalResponse(t *testing.T) { + body, err := readTruncated(strings.NewReader(strings.Repeat("x", 20)), 8) + if err != nil { + t.Fatal(err) + } + if string(body) != "xxxxxxxx" { + t.Fatalf("unexpected truncated body %q", string(body)) + } +} + +func TestSafeDoErrorPreservesPolicyErrors(t *testing.T) { + failureClass, err := safeDoError(ssrf.PolicyError{Reasons: []string{"blocked_ip_range"}}) + if failureClass != "policy_blocked" { + t.Fatalf("expected policy_blocked, got %q", failureClass) + } + var policyErr ssrf.PolicyError + if !errors.As(err, &policyErr) { + t.Fatalf("expected policy error, got %v", err) + } +} + +func TestHTTPClientDisablesRedirects(t *testing.T) { + client := HTTPClient(2 * time.Second) + if err := client.CheckRedirect(nil, nil); !errors.Is(err, http.ErrUseLastResponse) { + t.Fatalf("expected redirects disabled, got %v", err) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req) +} diff --git a/internal/app/adapter_registry.go b/internal/app/adapter_registry.go index 68ff728..6bc5ab5 100644 --- a/internal/app/adapter_registry.go +++ b/internal/app/adapter_registry.go @@ -44,7 +44,7 @@ type AdapterVersionTransitionRequest struct { } func (s *ControlService) CreateProviderAdapter(ctx context.Context, actor authz.Actor, req CreateProviderAdapterRequest) (domain.ProviderAdapter, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "provider_adapter", "", "") { return domain.ProviderAdapter{}, ErrForbidden } if err := validateProviderAdapterRequest(&req); err != nil { @@ -54,14 +54,14 @@ func (s *ControlService) CreateProviderAdapter(ctx context.Context, actor authz. } func (s *ControlService) ListProviderAdapters(ctx context.Context, actor authz.Actor, limit int) ([]domain.ProviderAdapter, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "provider_adapter", "", "") { return nil, ErrForbidden } return s.store.ListProviderAdapters(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetProviderAdapter(ctx context.Context, actor authz.Actor, adapterID string) (domain.ProviderAdapter, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "provider_adapter", adapterID, "") { return domain.ProviderAdapter{}, ErrForbidden } if strings.TrimSpace(adapterID) == "" { @@ -71,7 +71,7 @@ func (s *ControlService) GetProviderAdapter(ctx context.Context, actor authz.Act } func (s *ControlService) CreateAdapterVersion(ctx context.Context, actor authz.Actor, adapterID string, req CreateAdapterVersionRequest) (domain.AdapterVersion, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "provider_adapter", adapterID, "") { return domain.AdapterVersion{}, ErrForbidden } if strings.TrimSpace(adapterID) == "" { @@ -84,7 +84,7 @@ func (s *ControlService) CreateAdapterVersion(ctx context.Context, actor authz.A } func (s *ControlService) ListAdapterVersions(ctx context.Context, actor authz.Actor, adapterID string, limit int) ([]domain.AdapterVersion, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "provider_adapter", adapterID, "") { return nil, ErrForbidden } if strings.TrimSpace(adapterID) == "" { @@ -94,7 +94,7 @@ func (s *ControlService) ListAdapterVersions(ctx context.Context, actor authz.Ac } func (s *ControlService) CreateAdapterTestVector(ctx context.Context, actor authz.Actor, adapterID, versionID string, req CreateAdapterTestVectorRequest) (domain.AdapterTestVector, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "adapter_version", versionID, "") { return domain.AdapterTestVector{}, ErrForbidden } if strings.TrimSpace(adapterID) == "" || strings.TrimSpace(versionID) == "" { @@ -107,7 +107,7 @@ func (s *ControlService) CreateAdapterTestVector(ctx context.Context, actor auth } func (s *ControlService) TransitionAdapterVersion(ctx context.Context, actor authz.Actor, adapterID, versionID string, req AdapterVersionTransitionRequest) (domain.AdapterVersion, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "adapter_version", versionID, "") { return domain.AdapterVersion{}, ErrForbidden } if strings.TrimSpace(adapterID) == "" || strings.TrimSpace(versionID) == "" || strings.TrimSpace(req.Reason) == "" { diff --git a/internal/app/authorization.go b/internal/app/authorization.go new file mode 100644 index 0000000..89734d6 --- /dev/null +++ b/internal/app/authorization.go @@ -0,0 +1,83 @@ +package app + +import ( + "context" + "strings" + + "webhookery/internal/authz" +) + +type AuthorizationService struct { + store EnterpriseIdentityStore +} + +type AuthorizationRequest struct { + Actor authz.Actor + TenantID string + Action string + ResourceFamily string + ResourceID string + Environment string +} + +func NewAuthorizationService(store ControlStore) AuthorizationService { + enterpriseStore, _ := store.(EnterpriseIdentityStore) + return AuthorizationService{store: enterpriseStore} +} + +func (s AuthorizationService) Authorize(ctx context.Context, req AuthorizationRequest) authz.Decision { + resource := authz.Resource{ + TenantID: strings.TrimSpace(req.TenantID), + Family: strings.TrimSpace(req.ResourceFamily), + ID: strings.TrimSpace(req.ResourceID), + Environment: strings.TrimSpace(req.Environment), + } + action := strings.TrimSpace(req.Action) + decision := authz.Decision{ + Allowed: false, + Action: action, + Resource: resource, + Reason: "authorization context is incomplete", + RequiredScopes: []string{action}, + } + if req.Actor.ID == "" || req.Actor.TenantID == "" || resource.TenantID == "" || action == "" || resource.Family == "" { + return decision + } + if req.Actor.TenantID != resource.TenantID { + decision.Reason = "actor tenant does not match resource tenant" + return decision + } + if s.store != nil { + explained, err := s.store.ExplainAuthorization(ctx, resource.TenantID, req.Actor.ID, AuthzExplainRequest{ + Action: action, + ResourceFamily: resource.Family, + ResourceID: resource.ID, + Environment: resource.Environment, + }) + if err == nil { + explained.RequiredScopes = []string{action} + explained = redactAuthorizationDecision(explained) + if !explained.Allowed { + return explained + } + if !actorScopesAllow(req.Actor, action) { + explained.Allowed = false + explained.Reason = "actor scope does not allow action" + } + return explained + } + } + if !authz.Can(req.Actor, action, resource.TenantID) { + decision.Reason = "baseline role or scope does not allow action" + return decision + } + decision.Allowed = true + decision.Reason = "allowed by baseline role" + decision.MatchedRole = string(req.Actor.Role) + return decision +} + +func redactAuthorizationDecision(decision authz.Decision) authz.Decision { + decision.Resource.Attributes = nil + return decision +} diff --git a/internal/app/authorization_test.go b/internal/app/authorization_test.go new file mode 100644 index 0000000..5d5d6ec --- /dev/null +++ b/internal/app/authorization_test.go @@ -0,0 +1,199 @@ +package app + +import ( + "context" + "errors" + "testing" + + "webhookery/internal/authz" +) + +func TestAuthorizationServiceAllowsBaselineRoleAndScope(t *testing.T) { + service := AuthorizationService{} + decision := service.Authorize(context.Background(), AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}}, + TenantID: "ten_1", + Action: "events:read", + ResourceFamily: "event", + ResourceID: "evt_1", + }) + if !decision.Allowed || decision.MatchedRole != string(authz.RoleDeveloper) { + t.Fatalf("expected baseline allow, got %+v", decision) + } +} + +func TestAuthorizationServiceDeniesIncompleteAndWrongTenantContext(t *testing.T) { + service := AuthorizationService{} + base := AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"*"}}, + TenantID: "ten_1", + Action: "events:read", + ResourceFamily: "event", + ResourceID: "evt_1", + } + cases := map[string]AuthorizationRequest{ + "missing actor": {TenantID: "ten_1", Action: "events:read", ResourceFamily: "event"}, + "missing tenant": {Actor: base.Actor, Action: "events:read", ResourceFamily: "event"}, + "missing action": {Actor: base.Actor, TenantID: "ten_1", ResourceFamily: "event"}, + "missing family": {Actor: base.Actor, TenantID: "ten_1", Action: "events:read"}, + "wrong tenant": {Actor: base.Actor, TenantID: "ten_2", Action: "events:read", ResourceFamily: "event"}, + "scope disallows": {Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"events:read"}}, TenantID: "ten_1", Action: "events:raw", ResourceFamily: "event"}, + } + for name, req := range cases { + t.Run(name, func(t *testing.T) { + if decision := service.Authorize(context.Background(), req); decision.Allowed { + t.Fatalf("expected deny, got %+v", decision) + } + }) + } +} + +func TestAuthorizationServiceDeniesWrongTenantForSensitiveResourceFamilies(t *testing.T) { + service := AuthorizationService{} + cases := []struct { + name string + action string + family string + id string + }{ + {"api key", "api_keys:write", "api_key", "key_1"}, + {"source", "sources:write", "source", "src_1"}, + {"provider connection", "sources:write", "provider_connection", "pco_1"}, + {"endpoint", "endpoints:write", "endpoint", "end_1"}, + {"subscription", "subscriptions:write", "subscription", "sub_1"}, + {"route", "routes:write", "route", "rou_1"}, + {"retry policy", "routes:write", "retry_policy", "rtp_1"}, + {"event type", "schemas:write", "event_type", "invoice.paid"}, + {"event schema", "schemas:write", "event_schema", "invoice.paid:2026-05-01"}, + {"event raw", "events:raw", "event", "evt_1"}, + {"delivery", "deliveries:retry", "delivery", "del_1"}, + {"replay", "replay:write", "replay", "rpl_1"}, + {"audit event", "audit:read", "audit_event", "aud_1"}, + {"audit export", "audit:read", "audit_export", "exp_1"}, + {"audit anchor", "security:write", "audit_chain_anchor", "anc_1"}, + {"retention policy", "security:write", "retention_policy", "ret_1"}, + {"reconciliation", "replay:write", "reconciliation_job", "rec_1"}, + {"transformation", "routes:write", "transformation", "trn_1"}, + {"notification channel", "ops:write", "notification_channel", "nch_1"}, + {"notification delivery", "ops:write", "notification_delivery", "ndl_1"}, + {"siem sink", "security:write", "siem_sink", "snk_1"}, + {"siem delivery", "security:write", "siem_delivery", "sdl_1"}, + {"producer client", "security:write", "producer_client", "pcl_1"}, + {"producer mtls", "security:write", "producer_mtls_identity", "pmi_1"}, + {"identity provider", "security:write", "identity_provider", "idp_1"}, + {"auth session", "security:write", "auth_session", "ses_1"}, + {"scim token", "security:write", "scim_token", "scm_1"}, + {"role binding", "security:write", "role_binding", "rbd_1"}, + {"access policy", "security:write", "access_policy", "pol_1"}, + {"provider adapter", "security:write", "provider_adapter", "pad_1"}, + {"adapter version", "security:write", "adapter_version", "adv_1"}, + {"dead letter", "deliveries:retry", "dead_letter", "dlq_1"}, + {"quarantine", "security:write", "quarantine", "qua_1"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + decision := service.Authorize(context.Background(), AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"*"}}, + TenantID: "ten_b", + Action: tc.action, + ResourceFamily: tc.family, + ResourceID: tc.id, + Environment: "production", + }) + if decision.Allowed || decision.Reason != "actor tenant does not match resource tenant" { + t.Fatalf("expected wrong-tenant deny, got %+v", decision) + } + }) + } +} + +func TestAuthorizationServiceUsesEnterpriseExplainWithResourceContext(t *testing.T) { + store := &authorizationFakeStore{decision: authz.Decision{ + Allowed: true, + Action: "endpoints:write", + Resource: authz.Resource{TenantID: "ten_1", Family: "endpoint", ID: "end_1", Environment: "production"}, + Reason: "allowed by resource role binding", + MatchedRoleBindingID: "rb_1", + }} + service := NewAuthorizationService(store) + decision := service.Authorize(context.Background(), AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleSupport, Scopes: []string{"endpoints:write"}}, + TenantID: "ten_1", + Action: "endpoints:write", + ResourceFamily: "endpoint", + ResourceID: "end_1", + Environment: "production", + }) + if !decision.Allowed || decision.MatchedRoleBindingID != "rb_1" { + t.Fatalf("expected enterprise allow, got %+v", decision) + } + if store.lastTenantID != "ten_1" || store.lastActorID != "usr_1" || store.lastReq.ResourceID != "end_1" || store.lastReq.Environment != "production" { + t.Fatalf("enterprise explain did not receive resource context: %+v", store) + } +} + +func TestAuthorizationServicePreservesEnterpriseDenyAndScopeLimit(t *testing.T) { + store := &authorizationFakeStore{decision: authz.Decision{ + Allowed: false, + Action: "events:raw", + Resource: authz.Resource{TenantID: "ten_1", Family: "event", ID: "evt_1"}, + Reason: "denied by access policy", + }} + service := NewAuthorizationService(store) + denied := service.Authorize(context.Background(), AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"*"}}, + TenantID: "ten_1", + Action: "events:raw", + ResourceFamily: "event", + ResourceID: "evt_1", + }) + if denied.Allowed || denied.Reason != "denied by access policy" { + t.Fatalf("expected enterprise deny, got %+v", denied) + } + + store.decision.Allowed = true + limited := service.Authorize(context.Background(), AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"events:read"}}, + TenantID: "ten_1", + Action: "events:raw", + ResourceFamily: "event", + ResourceID: "evt_1", + }) + if limited.Allowed || limited.Reason != "actor scope does not allow action" { + t.Fatalf("expected scope-limited deny, got %+v", limited) + } +} + +func TestAuthorizationServiceFallsBackToBaselineOnExplainError(t *testing.T) { + store := &authorizationFakeStore{err: errors.New("temporary policy store unavailable")} + service := NewAuthorizationService(store) + decision := service.Authorize(context.Background(), AuthorizationRequest{ + Actor: authz.Actor{ID: "usr_1", TenantID: "ten_1", Role: authz.RoleOwner, Scopes: []string{"*"}}, + TenantID: "ten_1", + Action: "replay:write", + ResourceFamily: "replay", + ResourceID: "rpl_1", + }) + if !decision.Allowed || decision.Reason != "allowed by baseline role" { + t.Fatalf("expected baseline fallback allow, got %+v", decision) + } +} + +type authorizationFakeStore struct { + enterpriseFakeStore + decision authz.Decision + err error + lastTenantID string + lastActorID string + lastReq AuthzExplainRequest +} + +func (s *authorizationFakeStore) ExplainAuthorization(_ context.Context, tenantID, actorID string, req AuthzExplainRequest) (authz.Decision, error) { + s.lastTenantID = tenantID + s.lastActorID = actorID + s.lastReq = req + if s.err != nil { + return authz.Decision{}, s.err + } + return s.decision, nil +} diff --git a/internal/app/control.go b/internal/app/control.go index 67e4949..57f4ca6 100644 --- a/internal/app/control.go +++ b/internal/app/control.go @@ -25,157 +25,63 @@ var ( ) type ControlStore interface { - CreateAPIKey(ctx context.Context, input APIKeyCreateInput) (domain.APIKey, error) - ListAPIKeys(ctx context.Context, tenantID string, limit int) ([]domain.APIKey, error) - RevokeAPIKey(ctx context.Context, tenantID, apiKeyID, actorID, reason string) (domain.APIKey, error) - CreateSource(ctx context.Context, source domain.Source) (domain.Source, error) - ListSources(ctx context.Context, tenantID string, limit int) ([]domain.Source, error) - GetSource(ctx context.Context, tenantID, sourceID string) (domain.Source, error) - UpdateSource(ctx context.Context, tenantID, sourceID, actorID string, req UpdateSourceRequest) (domain.Source, error) - DeleteSource(ctx context.Context, tenantID, sourceID, actorID, reason string) (domain.Source, error) - CreateEndpoint(ctx context.Context, endpoint domain.Endpoint) (domain.Endpoint, error) - ListEndpoints(ctx context.Context, tenantID string, limit int) ([]domain.Endpoint, error) - GetEndpoint(ctx context.Context, tenantID, endpointID string) (domain.Endpoint, error) - UpdateEndpoint(ctx context.Context, tenantID, endpointID, actorID string, req UpdateEndpointRequest) (domain.Endpoint, error) - DeleteEndpoint(ctx context.Context, tenantID, endpointID, actorID, reason string) (domain.Endpoint, error) - TestEndpoint(ctx context.Context, tenantID, endpointID, actorID, reason string) (domain.Delivery, error) - CreateSubscription(ctx context.Context, subscription domain.Subscription) (domain.Subscription, error) - ListSubscriptions(ctx context.Context, tenantID string, limit int) ([]domain.Subscription, error) - GetSubscription(ctx context.Context, tenantID, subscriptionID string) (domain.Subscription, error) - UpdateSubscription(ctx context.Context, tenantID, subscriptionID, actorID string, req UpdateSubscriptionRequest) (domain.Subscription, error) - DeleteSubscription(ctx context.Context, tenantID, subscriptionID, actorID, reason string) (domain.Subscription, error) - CreateRoute(ctx context.Context, route domain.Route) (domain.Route, error) - ListRoutes(ctx context.Context, tenantID string, limit int) ([]domain.Route, error) - GetRoute(ctx context.Context, tenantID, routeID string) (domain.Route, error) - UpdateRoute(ctx context.Context, tenantID, routeID, actorID string, req UpdateRouteRequest) (domain.Route, error) - DeleteRoute(ctx context.Context, tenantID, routeID, actorID, reason string) (domain.Route, error) - ListRouteVersions(ctx context.Context, tenantID, routeID string, limit int) ([]domain.RouteVersion, error) - ActivateRoute(ctx context.Context, tenantID, routeID, actorID, reason string) (domain.Route, error) - DryRunRoute(ctx context.Context, tenantID, routeID, eventID string) (RouteDryRun, error) - CreateRetryPolicy(ctx context.Context, tenantID, actorID string, req CreateRetryPolicyRequest) (domain.RetryPolicy, error) - ListRetryPolicies(ctx context.Context, tenantID string, limit int) ([]domain.RetryPolicy, error) - GetRetryPolicy(ctx context.Context, tenantID, retryPolicyID string) (domain.RetryPolicy, error) - UpdateRetryPolicy(ctx context.Context, tenantID, retryPolicyID, actorID string, req UpdateRetryPolicyRequest) (domain.RetryPolicy, error) - DeleteRetryPolicy(ctx context.Context, tenantID, retryPolicyID, actorID, reason string) (domain.RetryPolicy, error) - CreateEventType(ctx context.Context, eventType domain.EventType) (domain.EventType, error) - ListEventTypes(ctx context.Context, tenantID string, limit int) ([]domain.EventType, error) - GetEventType(ctx context.Context, tenantID, eventType string) (domain.EventType, error) - UpdateEventType(ctx context.Context, tenantID, eventType, actorID string, req UpdateEventTypeRequest) (domain.EventType, error) - DeleteEventType(ctx context.Context, tenantID, eventType, actorID, reason string) (domain.EventType, error) - CreateEventSchema(ctx context.Context, schema domain.EventSchema) (domain.EventSchema, error) - ListEventSchemas(ctx context.Context, tenantID, eventType string, limit int) ([]domain.EventSchema, error) - GetEventSchema(ctx context.Context, tenantID, eventType, version string) (domain.EventSchema, error) - UpdateEventSchema(ctx context.Context, tenantID, eventType, version, actorID string, req UpdateEventSchemaRequest) (domain.EventSchema, error) - DeleteEventSchema(ctx context.Context, tenantID, eventType, version, actorID, reason string) (domain.EventSchema, error) - RotateSourceSecret(ctx context.Context, tenantID, sourceID, actorID string, req RotateSourceSecretRequest) (domain.SourceSecretVersion, error) - RotateEndpointSecret(ctx context.Context, tenantID, endpointID, actorID string, req RotateEndpointSecretRequest) (domain.EndpointSecretVersion, error) - ListEvents(ctx context.Context, tenantID string, limit int) ([]domain.Event, error) - GetEvent(ctx context.Context, tenantID, eventID string) (domain.Event, error) - GetRawPayload(ctx context.Context, tenantID, eventID, actorID string) (domain.RawPayload, error) - GetNormalizedEvent(ctx context.Context, tenantID, eventID, actorID string, includeData bool) (domain.NormalizedEnvelope, error) - ListEventTimeline(ctx context.Context, tenantID, eventID string, limit int) ([]map[string]any, error) - ListDeliveries(ctx context.Context, tenantID string, limit int) ([]domain.Delivery, error) - ListDeliveryAttempts(ctx context.Context, tenantID, deliveryID string, limit int) ([]domain.DeliveryAttempt, error) - GetDeliveryAttempt(ctx context.Context, tenantID, attemptID string) (domain.DeliveryAttempt, error) - RetryDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.Delivery, error) - CancelDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.Delivery, error) - ListEndpointHealth(ctx context.Context, tenantID string, limit int) ([]domain.EndpointHealth, error) - OpsMetrics(ctx context.Context, tenantID string) (domain.OpsMetrics, error) - ListWorkers(ctx context.Context, tenantID string, limit int) ([]domain.WorkerStatus, error) - GetWorker(ctx context.Context, tenantID, workerID string) (domain.WorkerStatus, error) - ListQueues(ctx context.Context, tenantID string) ([]domain.QueueStats, error) - OpsStorage(ctx context.Context, tenantID string) (domain.OpsStorageStatus, error) - ListMetricRollups(ctx context.Context, tenantID, metricName string, limit int) ([]domain.MetricRollup, error) - CreateAlertRule(ctx context.Context, tenantID, actorID string, req CreateAlertRuleRequest) (domain.AlertRule, error) - ListAlertRules(ctx context.Context, tenantID string, limit int) ([]domain.AlertRule, error) - GetAlertRule(ctx context.Context, tenantID, alertID string) (domain.AlertRule, error) - UpdateAlertRule(ctx context.Context, tenantID, alertID, actorID string, req UpdateAlertRuleRequest) (domain.AlertRule, error) - DeleteAlertRule(ctx context.Context, tenantID, alertID, actorID, reason string) (domain.AlertRule, error) - ListAlertFirings(ctx context.Context, tenantID, state string, limit int) ([]domain.AlertFiring, error) - GetAlertFiring(ctx context.Context, tenantID, firingID string) (domain.AlertFiring, error) - AcknowledgeAlertFiring(ctx context.Context, tenantID, firingID, actorID, reason string) (domain.AlertFiring, error) - CreateNotificationChannel(ctx context.Context, tenantID, actorID string, req CreateNotificationChannelRequest) (domain.NotificationChannel, error) - ListNotificationChannels(ctx context.Context, tenantID string, limit int) ([]domain.NotificationChannel, error) - GetNotificationChannel(ctx context.Context, tenantID, channelID string) (domain.NotificationChannel, error) - UpdateNotificationChannel(ctx context.Context, tenantID, channelID, actorID string, req UpdateNotificationChannelRequest) (domain.NotificationChannel, error) - DeleteNotificationChannel(ctx context.Context, tenantID, channelID, actorID, reason string) (domain.NotificationChannel, error) - TestNotificationChannel(ctx context.Context, tenantID, channelID, actorID, reason string) (domain.NotificationDelivery, error) - ListNotificationDeliveries(ctx context.Context, tenantID, state string, limit int) ([]domain.NotificationDelivery, error) - ListNotificationDeliveryAttempts(ctx context.Context, tenantID, deliveryID string, limit int) ([]domain.NotificationDeliveryAttempt, error) - RetryNotificationDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.NotificationDelivery, error) - CreateSIEMSink(ctx context.Context, tenantID, actorID string, req CreateSIEMSinkRequest) (domain.SIEMSink, error) - ListSIEMSinks(ctx context.Context, tenantID string, limit int) ([]domain.SIEMSink, error) - GetSIEMSink(ctx context.Context, tenantID, sinkID string) (domain.SIEMSink, error) - UpdateSIEMSink(ctx context.Context, tenantID, sinkID, actorID string, req UpdateSIEMSinkRequest) (domain.SIEMSink, error) - DeleteSIEMSink(ctx context.Context, tenantID, sinkID, actorID, reason string) (domain.SIEMSink, error) - TestSIEMSink(ctx context.Context, tenantID, sinkID, actorID, reason string) (domain.SIEMDelivery, error) - ListSIEMDeliveries(ctx context.Context, tenantID, state string, limit int) ([]domain.SIEMDelivery, error) - ListSIEMDeliveryAttempts(ctx context.Context, tenantID, deliveryID string, limit int) ([]domain.SIEMDeliveryAttempt, error) - RetrySIEMDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.SIEMDelivery, error) - ListAuditEvents(ctx context.Context, tenantID string, limit int) ([]domain.AuditEvent, error) - GetAuditChainHead(ctx context.Context, tenantID string) (domain.AuditChainHead, error) - VerifyAuditChain(ctx context.Context, tenantID string, req AuditChainVerifyRequest) (domain.AuditChainVerification, error) - CreateAuditChainAnchor(ctx context.Context, tenantID, actorID string, req AuditChainAnchorRequest) (domain.AuditChainAnchor, error) - ListAuditChainAnchors(ctx context.Context, tenantID string, limit int) ([]domain.AuditChainAnchor, error) - GetAuditChainAnchor(ctx context.Context, tenantID, anchorID string) (domain.AuditChainAnchor, error) - ListRetentionPolicies(ctx context.Context, tenantID string, limit int) ([]domain.RetentionPolicy, error) - CreateRetentionPolicy(ctx context.Context, tenantID, actorID string, req CreateRetentionPolicyRequest) (domain.RetentionPolicy, error) - UpdateRetentionPolicy(ctx context.Context, tenantID, policyID, actorID string, req UpdateRetentionPolicyRequest) (domain.RetentionPolicy, error) - CreateProviderConnection(ctx context.Context, tenantID, actorID string, req CreateProviderConnectionRequest) (domain.ProviderConnection, error) - ListProviderConnections(ctx context.Context, tenantID string, limit int) ([]domain.ProviderConnection, error) - GetProviderConnection(ctx context.Context, tenantID, connectionID string) (domain.ProviderConnection, error) - VerifyProviderConnection(ctx context.Context, tenantID, connectionID, actorID, reason string) (domain.ProviderConnection, error) - RevokeProviderConnection(ctx context.Context, tenantID, connectionID, actorID, reason string) (domain.ProviderConnection, error) - DryRunReconciliation(ctx context.Context, tenantID string, req ReconciliationJobRequest) (domain.ReconciliationJob, error) - CreateReconciliationJob(ctx context.Context, tenantID, actorID string, req ReconciliationJobRequest) (domain.ReconciliationJob, error) - ListReconciliationJobs(ctx context.Context, tenantID string, limit int) ([]domain.ReconciliationJob, error) - GetReconciliationJob(ctx context.Context, tenantID, jobID string) (domain.ReconciliationJob, error) - ListReconciliationItems(ctx context.Context, tenantID, jobID string, limit int) ([]domain.ReconciliationItem, error) - CancelReconciliationJob(ctx context.Context, tenantID, jobID, actorID, reason string) (domain.ReconciliationJob, error) - CreateProviderAdapter(ctx context.Context, tenantID, actorID string, req CreateProviderAdapterRequest) (domain.ProviderAdapter, error) - ListProviderAdapters(ctx context.Context, tenantID string, limit int) ([]domain.ProviderAdapter, error) - GetProviderAdapter(ctx context.Context, tenantID, adapterID string) (domain.ProviderAdapter, error) - CreateAdapterVersion(ctx context.Context, tenantID, adapterID, actorID string, req CreateAdapterVersionRequest) (domain.AdapterVersion, error) - ListAdapterVersions(ctx context.Context, tenantID, adapterID string, limit int) ([]domain.AdapterVersion, error) - CreateAdapterTestVector(ctx context.Context, tenantID, adapterID, versionID, actorID string, req CreateAdapterTestVectorRequest) (domain.AdapterTestVector, error) - TransitionAdapterVersion(ctx context.Context, tenantID, adapterID, versionID, actorID string, req AdapterVersionTransitionRequest) (domain.AdapterVersion, error) - CreateAuditExport(ctx context.Context, tenantID, actorID string, req CreateAuditExportRequest) (domain.EvidenceExport, error) - ListAuditExports(ctx context.Context, tenantID string, limit int) ([]domain.EvidenceExport, error) - GetAuditExport(ctx context.Context, tenantID, exportID string) (domain.EvidenceExport, error) - DownloadAuditExport(ctx context.Context, tenantID, exportID, actorID string) (EvidenceExportDownload, error) - ListDeadLetter(ctx context.Context, tenantID string, limit int) ([]map[string]any, error) - ReleaseDeadLetter(ctx context.Context, tenantID, entryID, actorID, reason string) (ReplayJob, error) - BulkReleaseDeadLetter(ctx context.Context, tenantID string, entryIDs []string, actorID, reason string) ([]ReplayJob, error) - ListQuarantine(ctx context.Context, tenantID string, limit int) ([]map[string]any, error) - ApproveQuarantine(ctx context.Context, tenantID, entryID, actorID, reason string, routeAfterRelease bool) (map[string]any, error) - RejectQuarantine(ctx context.Context, tenantID, entryID, actorID, reason string) (map[string]any, error) - DryRunReplay(ctx context.Context, tenantID string, req ReplayRequest) (ReplayDryRun, error) - CreateReplay(ctx context.Context, tenantID, actorID string, req ReplayRequest) (ReplayJob, error) - ListReplayJobs(ctx context.Context, tenantID string, limit int) ([]ReplayJob, error) - ApproveReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) - PauseReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) - ResumeReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) - CancelReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) - CreateTransformation(ctx context.Context, tenantID, actorID string, req CreateTransformationRequest) (domain.Transformation, error) - ListTransformations(ctx context.Context, tenantID string, limit int) ([]domain.Transformation, error) - GetTransformation(ctx context.Context, tenantID, transformationID string) (domain.Transformation, error) - CreateTransformationVersion(ctx context.Context, tenantID, transformationID, actorID string, req CreateTransformationVersionRequest) (domain.TransformationVersion, error) - ListTransformationVersions(ctx context.Context, tenantID, transformationID string, limit int) ([]domain.TransformationVersion, error) - ActivateTransformationVersion(ctx context.Context, tenantID, transformationID, versionID, actorID, reason string) (domain.TransformationVersion, error) + APIKeyStore + SourceStore + EndpointStore + SubscriptionStore + RouteStore + SchemaStore + EventStore + IncidentStore + DeliveryStore + OpsStore + SignalStore + AuditStore + RetentionStore + ProviderConnectionStore + ReconciliationStore + ProviderAdapterStore + EvidenceExportStore + DeadLetterStore + ReplayStore + ReplayApprovalPolicyStore + TransformationStore } type ControlService struct { - store ControlStore - ssrfValidator ssrf.Validator - runtimeConfig domain.OpsConfig + store ControlStore + ssrfValidator ssrf.Validator + runtimeConfig domain.OpsConfig + authorizer AuthorizationService + reconciliation *ReconciliationService } func NewControlService(store ControlStore, validator ssrf.Validator) *ControlService { - return &ControlService{store: store, ssrfValidator: validator} + service := &ControlService{store: store, ssrfValidator: validator, authorizer: NewAuthorizationService(store)} + if reconciliationStore, ok := any(store).(ReconciliationWorkStore); ok { + service.reconciliation = NewReconciliationService(reconciliationStore, nil) + } + return service } func NewControlServiceWithRuntimeConfig(store ControlStore, validator ssrf.Validator, runtimeConfig domain.OpsConfig) *ControlService { - return &ControlService{store: store, ssrfValidator: validator, runtimeConfig: runtimeConfig} + service := &ControlService{store: store, ssrfValidator: validator, runtimeConfig: runtimeConfig, authorizer: NewAuthorizationService(store)} + if reconciliationStore, ok := any(store).(ReconciliationWorkStore); ok { + service.reconciliation = NewReconciliationService(reconciliationStore, nil) + } + return service +} + +const EventTimelineSchemaV1 = "webhookery.event_timeline.v1" + +type EventTimelineEntry struct { + SchemaVersion string `json:"schema_version"` + Sequence int `json:"sequence"` + Kind string `json:"kind"` + RefID string `json:"ref_id"` + State string `json:"state"` + Detail string `json:"detail"` + OccurredAt time.Time `json:"occurred_at"` } type CreateSourceRequest struct { @@ -457,24 +363,64 @@ const ( ReplayConfigOriginal = "original" ) +const ReplayApprovalDefaultExpiry = 24 * time.Hour + +const ( + ReplayApprovalScopeTenant = "tenant" + ReplayApprovalScopeSource = "source" + ReplayApprovalScopeRoute = "route" +) + +const ( + ReplayReasonReceiverFixed = "receiver_fixed" + ReplayReasonProviderReconciliation = "provider_reconciliation" + ReplayReasonOperatorRequested = "operator_requested" + ReplayReasonSupportInvestigation = "support_investigation" + ReplayReasonCustomerDispute = "customer_dispute" + ReplayReasonTestDrill = "test_drill" + ReplayReasonIncidentRecovery = "incident_recovery" +) + +var replayReasonCodes = map[string]struct{}{ + ReplayReasonReceiverFixed: {}, + ReplayReasonProviderReconciliation: {}, + ReplayReasonOperatorRequested: {}, + ReplayReasonSupportInvestigation: {}, + ReplayReasonCustomerDispute: {}, + ReplayReasonTestDrill: {}, + ReplayReasonIncidentRecovery: {}, +} + type ReplayRequest struct { - EventID string `json:"event_id"` - DeliveryID string `json:"delivery_id"` - EndpointID string `json:"endpoint_id"` - Reason string `json:"reason"` - DryRun bool `json:"dry_run"` - ConfigMode string `json:"config_mode,omitempty"` - RateLimitPerMinute int `json:"rate_limit_per_minute,omitempty"` - RequireApproval bool `json:"require_approval,omitempty"` + EventID string `json:"event_id"` + DeliveryID string `json:"delivery_id"` + EndpointID string `json:"endpoint_id"` + ReasonCode string `json:"reason_code"` + Reason string `json:"reason"` + DryRun bool `json:"dry_run"` + ConfigMode string `json:"config_mode,omitempty"` + RateLimitPerMinute int `json:"rate_limit_per_minute,omitempty"` + RequireApproval bool `json:"require_approval,omitempty"` + ApprovalExpiresAt *time.Time `json:"approval_expires_at,omitempty"` +} + +type CreateReplayApprovalPolicyRequest struct { + ScopeType string `json:"scope_type"` + ScopeID string `json:"scope_id,omitempty"` + RequireApproval bool `json:"require_approval"` + DefaultExpirySeconds int `json:"default_expiry_seconds,omitempty"` + Reason string `json:"reason"` } type DeadLetterReleaseRequest struct { - Reason string `json:"reason"` + ReasonCode string `json:"reason_code"` + Reason string `json:"reason"` } type DeadLetterBulkReleaseRequest struct { - EntryIDs []string `json:"entry_ids"` - Reason string `json:"reason"` + EntryIDs []string `json:"entry_ids"` + ReasonCode string `json:"reason_code"` + Reason string `json:"reason"` } type QuarantineDecisionRequest struct { @@ -492,14 +438,19 @@ type ReplayJob struct { ID string `json:"id"` State string `json:"state"` ScopeHash string `json:"scope_hash"` + ReasonCode string `json:"reason_code"` + Reason string `json:"reason"` ConfigMode string `json:"config_mode,omitempty"` RateLimitPerMinute int `json:"rate_limit_per_minute,omitempty"` TotalItems int `json:"total_items"` ProcessedItems int `json:"processed_items"` FailedItems int `json:"failed_items"` ApprovalRequired bool `json:"approval_required"` + ApprovalExpiresAt *time.Time `json:"approval_expires_at,omitempty"` ApprovedBy string `json:"approved_by,omitempty"` ApprovedAt *time.Time `json:"approved_at,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CreatedAt *time.Time `json:"created_at,omitempty"` } type StateChangeRequest struct { @@ -641,7 +592,7 @@ type EvidenceExportDownload struct { } func (s *ControlService) CreateAPIKey(ctx context.Context, actor authz.Actor, req CreateAPIKeyRequest) (APIKeyCreated, error) { - if !authz.Can(actor, "api_keys:write", actor.TenantID) { + if !s.authorized(ctx, actor, "api_keys:write", "api_key", "", "") { return APIKeyCreated{}, ErrForbidden } if strings.TrimSpace(req.Name) == "" { @@ -689,7 +640,7 @@ func (s *ControlService) CreateAPIKey(ctx context.Context, actor authz.Actor, re } func (s *ControlService) ListAPIKeys(ctx context.Context, actor authz.Actor, limit int) ([]domain.APIKey, error) { - if !authz.Can(actor, "api_keys:read", actor.TenantID) { + if !s.authorized(ctx, actor, "api_keys:read", "api_key", "", "") { return nil, ErrForbidden } keys, err := s.store.ListAPIKeys(ctx, actor.TenantID, normalizeLimit(limit)) @@ -703,7 +654,7 @@ func (s *ControlService) ListAPIKeys(ctx context.Context, actor authz.Actor, lim } func (s *ControlService) RevokeAPIKey(ctx context.Context, actor authz.Actor, apiKeyID string, req RevokeAPIKeyRequest) (domain.APIKey, error) { - if !authz.Can(actor, "api_keys:write", actor.TenantID) { + if !s.authorized(ctx, actor, "api_keys:write", "api_key", apiKeyID, "") { return domain.APIKey{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -758,7 +709,7 @@ func (s *ControlService) IssueProducerToken(ctx context.Context, clientID, clien } func (s *ControlService) CreateProducerClient(ctx context.Context, actor authz.Actor, req CreateProducerClientRequest) (ProducerClientCreated, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_client", "", "") { return ProducerClientCreated{}, ErrForbidden } store, ok := s.store.(producerClientStore) @@ -797,7 +748,7 @@ func (s *ControlService) CreateProducerClient(ctx context.Context, actor authz.A } func (s *ControlService) ListProducerClients(ctx context.Context, actor authz.Actor, limit int) ([]domain.ProducerClient, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "producer_client", "", "") { return nil, ErrForbidden } store, ok := s.store.(producerClientStore) @@ -808,7 +759,7 @@ func (s *ControlService) ListProducerClients(ctx context.Context, actor authz.Ac } func (s *ControlService) GetProducerClient(ctx context.Context, actor authz.Actor, clientID string) (domain.ProducerClient, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "producer_client", clientID, "") { return domain.ProducerClient{}, ErrForbidden } if strings.TrimSpace(clientID) == "" { @@ -822,7 +773,7 @@ func (s *ControlService) GetProducerClient(ctx context.Context, actor authz.Acto } func (s *ControlService) UpdateProducerClient(ctx context.Context, actor authz.Actor, clientID string, req UpdateProducerClientRequest) (domain.ProducerClient, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_client", clientID, "") { return domain.ProducerClient{}, ErrForbidden } if strings.TrimSpace(clientID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -839,7 +790,7 @@ func (s *ControlService) UpdateProducerClient(ctx context.Context, actor authz.A } func (s *ControlService) DeleteProducerClient(ctx context.Context, actor authz.Actor, clientID string, req StateChangeRequest) (domain.ProducerClient, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_client", clientID, "") { return domain.ProducerClient{}, ErrForbidden } if strings.TrimSpace(clientID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -853,7 +804,7 @@ func (s *ControlService) DeleteProducerClient(ctx context.Context, actor authz.A } func (s *ControlService) RotateProducerClientSecret(ctx context.Context, actor authz.Actor, clientID string, req RotateProducerClientSecretRequest) (ProducerClientSecretRotated, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_client", clientID, "") { return ProducerClientSecretRotated{}, ErrForbidden } if strings.TrimSpace(clientID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -885,7 +836,7 @@ func (s *ControlService) RotateProducerClientSecret(ctx context.Context, actor a } func (s *ControlService) CreateProducerMTLSIdentity(ctx context.Context, actor authz.Actor, req CreateProducerMTLSIdentityRequest) (domain.ProducerMTLSIdentity, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_mtls_identity", "", "") { return domain.ProducerMTLSIdentity{}, ErrForbidden } store, ok := s.store.(producerMTLSIdentityStore) @@ -903,7 +854,7 @@ func (s *ControlService) CreateProducerMTLSIdentity(ctx context.Context, actor a } func (s *ControlService) ListProducerMTLSIdentities(ctx context.Context, actor authz.Actor, limit int) ([]domain.ProducerMTLSIdentity, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "producer_mtls_identity", "", "") { return nil, ErrForbidden } store, ok := s.store.(producerMTLSIdentityStore) @@ -914,7 +865,7 @@ func (s *ControlService) ListProducerMTLSIdentities(ctx context.Context, actor a } func (s *ControlService) GetProducerMTLSIdentity(ctx context.Context, actor authz.Actor, identityID string) (domain.ProducerMTLSIdentity, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "producer_mtls_identity", identityID, "") { return domain.ProducerMTLSIdentity{}, ErrForbidden } if strings.TrimSpace(identityID) == "" { @@ -928,7 +879,7 @@ func (s *ControlService) GetProducerMTLSIdentity(ctx context.Context, actor auth } func (s *ControlService) UpdateProducerMTLSIdentity(ctx context.Context, actor authz.Actor, identityID string, req UpdateProducerMTLSIdentityRequest) (domain.ProducerMTLSIdentity, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_mtls_identity", identityID, "") { return domain.ProducerMTLSIdentity{}, ErrForbidden } if strings.TrimSpace(identityID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -945,7 +896,7 @@ func (s *ControlService) UpdateProducerMTLSIdentity(ctx context.Context, actor a } func (s *ControlService) DeleteProducerMTLSIdentity(ctx context.Context, actor authz.Actor, identityID string, req StateChangeRequest) (domain.ProducerMTLSIdentity, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_mtls_identity", identityID, "") { return domain.ProducerMTLSIdentity{}, ErrForbidden } if strings.TrimSpace(identityID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -959,7 +910,7 @@ func (s *ControlService) DeleteProducerMTLSIdentity(ctx context.Context, actor a } func (s *ControlService) VerifyProducerMTLSIdentity(ctx context.Context, actor authz.Actor, identityID string, req VerifyProducerMTLSIdentityRequest) (ProducerMTLSIdentityVerification, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "producer_mtls_identity", identityID, "") { return ProducerMTLSIdentityVerification{}, ErrForbidden } store, ok := s.store.(producerMTLSIdentityStore) @@ -979,7 +930,7 @@ func (s *ControlService) VerifyProducerMTLSIdentity(ctx context.Context, actor a } func (s *ControlService) CreateSource(ctx context.Context, actor authz.Actor, req CreateSourceRequest) (domain.Source, error) { - if !authz.Can(actor, "sources:write", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:write", "source", "", "") { return domain.Source{}, ErrForbidden } if req.Provider == "" || (req.Provider != "internal" && req.VerificationSecret == "") { @@ -1001,14 +952,14 @@ func (s *ControlService) CreateSource(ctx context.Context, actor authz.Actor, re } func (s *ControlService) ListSources(ctx context.Context, actor authz.Actor, limit int) ([]domain.Source, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "source", "", "") { return nil, ErrForbidden } return s.store.ListSources(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetSource(ctx context.Context, actor authz.Actor, sourceID string) (domain.Source, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "source", sourceID, "") { return domain.Source{}, ErrForbidden } if strings.TrimSpace(sourceID) == "" { @@ -1018,7 +969,7 @@ func (s *ControlService) GetSource(ctx context.Context, actor authz.Actor, sourc } func (s *ControlService) UpdateSource(ctx context.Context, actor authz.Actor, sourceID string, req UpdateSourceRequest) (domain.Source, error) { - if !authz.Can(actor, "sources:write", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:write", "source", sourceID, "") { return domain.Source{}, ErrForbidden } if strings.TrimSpace(sourceID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1045,7 +996,7 @@ func (s *ControlService) UpdateSource(ctx context.Context, actor authz.Actor, so } func (s *ControlService) DeleteSource(ctx context.Context, actor authz.Actor, sourceID string, req StateChangeRequest) (domain.Source, error) { - if !authz.Can(actor, "sources:write", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:write", "source", sourceID, "") { return domain.Source{}, ErrForbidden } if strings.TrimSpace(sourceID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1055,7 +1006,7 @@ func (s *ControlService) DeleteSource(ctx context.Context, actor authz.Actor, so } func (s *ControlService) CreateEndpoint(ctx context.Context, actor authz.Actor, req CreateEndpointRequest) (domain.Endpoint, ssrf.Result, error) { - if !authz.Can(actor, "endpoints:write", actor.TenantID) { + if !s.authorized(ctx, actor, "endpoints:write", "endpoint", "", "") { return domain.Endpoint{}, ssrf.Result{}, ErrForbidden } if req.URL == "" { @@ -1089,14 +1040,14 @@ func (s *ControlService) ValidateEndpointURL(ctx context.Context, rawURL string) } func (s *ControlService) ListEndpoints(ctx context.Context, actor authz.Actor, limit int) ([]domain.Endpoint, error) { - if !authz.Can(actor, "endpoints:read", actor.TenantID) { + if !s.authorized(ctx, actor, "endpoints:read", "endpoint", "", "") { return nil, ErrForbidden } return s.store.ListEndpoints(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetEndpoint(ctx context.Context, actor authz.Actor, endpointID string) (domain.Endpoint, error) { - if !authz.Can(actor, "endpoints:read", actor.TenantID) { + if !s.authorized(ctx, actor, "endpoints:read", "endpoint", endpointID, "") { return domain.Endpoint{}, ErrForbidden } if strings.TrimSpace(endpointID) == "" { @@ -1150,7 +1101,7 @@ func (s *ControlService) UpdateEndpoint(ctx context.Context, actor authz.Actor, } func (s *ControlService) DeleteEndpoint(ctx context.Context, actor authz.Actor, endpointID string, req StateChangeRequest) (domain.Endpoint, error) { - if !authz.Can(actor, "endpoints:write", actor.TenantID) { + if !s.authorized(ctx, actor, "endpoints:write", "endpoint", endpointID, "") { return domain.Endpoint{}, ErrForbidden } if strings.TrimSpace(endpointID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1160,7 +1111,7 @@ func (s *ControlService) DeleteEndpoint(ctx context.Context, actor authz.Actor, } func (s *ControlService) TestEndpoint(ctx context.Context, actor authz.Actor, endpointID string, req TestEndpointRequest) (domain.Delivery, error) { - if !authz.Can(actor, "endpoints:write", actor.TenantID) { + if !s.authorized(ctx, actor, "endpoints:write", "endpoint", endpointID, "") { return domain.Delivery{}, ErrForbidden } if strings.TrimSpace(endpointID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1170,7 +1121,7 @@ func (s *ControlService) TestEndpoint(ctx context.Context, actor authz.Actor, en } func (s *ControlService) CreateSubscription(ctx context.Context, actor authz.Actor, req CreateSubscriptionRequest) (domain.Subscription, error) { - if !authz.Can(actor, "subscriptions:write", actor.TenantID) { + if !s.authorized(ctx, actor, "subscriptions:write", "subscription", "", "") { return domain.Subscription{}, ErrForbidden } eventTypes := normalizeEventTypes(req.EventTypes) @@ -1192,14 +1143,14 @@ func (s *ControlService) CreateSubscription(ctx context.Context, actor authz.Act } func (s *ControlService) ListSubscriptions(ctx context.Context, actor authz.Actor, limit int) ([]domain.Subscription, error) { - if !authz.Can(actor, "subscriptions:read", actor.TenantID) { + if !s.authorized(ctx, actor, "subscriptions:read", "subscription", "", "") { return nil, ErrForbidden } return s.store.ListSubscriptions(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetSubscription(ctx context.Context, actor authz.Actor, subscriptionID string) (domain.Subscription, error) { - if !authz.Can(actor, "subscriptions:read", actor.TenantID) { + if !s.authorized(ctx, actor, "subscriptions:read", "subscription", subscriptionID, "") { return domain.Subscription{}, ErrForbidden } if strings.TrimSpace(subscriptionID) == "" { @@ -1209,7 +1160,7 @@ func (s *ControlService) GetSubscription(ctx context.Context, actor authz.Actor, } func (s *ControlService) UpdateSubscription(ctx context.Context, actor authz.Actor, subscriptionID string, req UpdateSubscriptionRequest) (domain.Subscription, error) { - if !authz.Can(actor, "subscriptions:write", actor.TenantID) { + if !s.authorized(ctx, actor, "subscriptions:write", "subscription", subscriptionID, "") { return domain.Subscription{}, ErrForbidden } if strings.TrimSpace(subscriptionID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1253,7 +1204,7 @@ func (s *ControlService) UpdateSubscription(ctx context.Context, actor authz.Act } func (s *ControlService) DeleteSubscription(ctx context.Context, actor authz.Actor, subscriptionID string, req StateChangeRequest) (domain.Subscription, error) { - if !authz.Can(actor, "subscriptions:write", actor.TenantID) { + if !s.authorized(ctx, actor, "subscriptions:write", "subscription", subscriptionID, "") { return domain.Subscription{}, ErrForbidden } if strings.TrimSpace(subscriptionID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1263,7 +1214,7 @@ func (s *ControlService) DeleteSubscription(ctx context.Context, actor authz.Act } func (s *ControlService) CreateRoute(ctx context.Context, actor authz.Actor, req CreateRouteRequest) (domain.Route, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "route", "", "") { return domain.Route{}, ErrForbidden } eventTypes := normalizeEventTypes(req.EventTypes) @@ -1304,14 +1255,14 @@ func (s *ControlService) CreateRoute(ctx context.Context, actor authz.Actor, req } func (s *ControlService) ListRoutes(ctx context.Context, actor authz.Actor, limit int) ([]domain.Route, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "route", "", "") { return nil, ErrForbidden } return s.store.ListRoutes(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetRoute(ctx context.Context, actor authz.Actor, routeID string) (domain.Route, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "route", routeID, "") { return domain.Route{}, ErrForbidden } if strings.TrimSpace(routeID) == "" { @@ -1321,7 +1272,7 @@ func (s *ControlService) GetRoute(ctx context.Context, actor authz.Actor, routeI } func (s *ControlService) UpdateRoute(ctx context.Context, actor authz.Actor, routeID string, req UpdateRouteRequest) (domain.Route, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "route", routeID, "") { return domain.Route{}, ErrForbidden } if strings.TrimSpace(routeID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1379,7 +1330,7 @@ func (s *ControlService) UpdateRoute(ctx context.Context, actor authz.Actor, rou } func (s *ControlService) DeleteRoute(ctx context.Context, actor authz.Actor, routeID string, req StateChangeRequest) (domain.Route, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "route", routeID, "") { return domain.Route{}, ErrForbidden } if strings.TrimSpace(routeID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1389,7 +1340,7 @@ func (s *ControlService) DeleteRoute(ctx context.Context, actor authz.Actor, rou } func (s *ControlService) ListRouteVersions(ctx context.Context, actor authz.Actor, routeID string, limit int) ([]domain.RouteVersion, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "route", routeID, "") { return nil, ErrForbidden } if strings.TrimSpace(routeID) == "" { @@ -1399,7 +1350,7 @@ func (s *ControlService) ListRouteVersions(ctx context.Context, actor authz.Acto } func (s *ControlService) ActivateRoute(ctx context.Context, actor authz.Actor, routeID, reason string) (domain.Route, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "route", routeID, "") { return domain.Route{}, ErrForbidden } if strings.TrimSpace(routeID) == "" || strings.TrimSpace(reason) == "" { @@ -1409,7 +1360,7 @@ func (s *ControlService) ActivateRoute(ctx context.Context, actor authz.Actor, r } func (s *ControlService) DryRunRoute(ctx context.Context, actor authz.Actor, routeID, eventID string) (RouteDryRun, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "route", routeID, "") { return RouteDryRun{}, ErrForbidden } if strings.TrimSpace(routeID) == "" || strings.TrimSpace(eventID) == "" { @@ -1419,7 +1370,7 @@ func (s *ControlService) DryRunRoute(ctx context.Context, actor authz.Actor, rou } func (s *ControlService) CreateRetryPolicy(ctx context.Context, actor authz.Actor, req CreateRetryPolicyRequest) (domain.RetryPolicy, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "retry_policy", "", "") { return domain.RetryPolicy{}, ErrForbidden } req.Name = strings.TrimSpace(req.Name) @@ -1434,14 +1385,14 @@ func (s *ControlService) CreateRetryPolicy(ctx context.Context, actor authz.Acto } func (s *ControlService) ListRetryPolicies(ctx context.Context, actor authz.Actor, limit int) ([]domain.RetryPolicy, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "retry_policy", "", "") { return nil, ErrForbidden } return s.store.ListRetryPolicies(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetRetryPolicy(ctx context.Context, actor authz.Actor, retryPolicyID string) (domain.RetryPolicy, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "retry_policy", retryPolicyID, "") { return domain.RetryPolicy{}, ErrForbidden } if strings.TrimSpace(retryPolicyID) == "" { @@ -1451,7 +1402,7 @@ func (s *ControlService) GetRetryPolicy(ctx context.Context, actor authz.Actor, } func (s *ControlService) UpdateRetryPolicy(ctx context.Context, actor authz.Actor, retryPolicyID string, req UpdateRetryPolicyRequest) (domain.RetryPolicy, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "retry_policy", retryPolicyID, "") { return domain.RetryPolicy{}, ErrForbidden } if strings.TrimSpace(retryPolicyID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1496,7 +1447,7 @@ func (s *ControlService) UpdateRetryPolicy(ctx context.Context, actor authz.Acto } func (s *ControlService) DeleteRetryPolicy(ctx context.Context, actor authz.Actor, retryPolicyID string, req StateChangeRequest) (domain.RetryPolicy, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "retry_policy", retryPolicyID, "") { return domain.RetryPolicy{}, ErrForbidden } if strings.TrimSpace(retryPolicyID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1506,7 +1457,7 @@ func (s *ControlService) DeleteRetryPolicy(ctx context.Context, actor authz.Acto } func (s *ControlService) CreateEventType(ctx context.Context, actor authz.Actor, req CreateEventTypeRequest) (domain.EventType, error) { - if !authz.Can(actor, "schemas:write", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:write", "event_type", "", "") { return domain.EventType{}, ErrForbidden } req.Name = strings.TrimSpace(req.Name) @@ -1523,14 +1474,14 @@ func (s *ControlService) CreateEventType(ctx context.Context, actor authz.Actor, } func (s *ControlService) ListEventTypes(ctx context.Context, actor authz.Actor, limit int) ([]domain.EventType, error) { - if !authz.Can(actor, "schemas:read", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:read", "event_type", "", "") { return nil, ErrForbidden } return s.store.ListEventTypes(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetEventType(ctx context.Context, actor authz.Actor, eventType string) (domain.EventType, error) { - if !authz.Can(actor, "schemas:read", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:read", "event_type", eventType, "") { return domain.EventType{}, ErrForbidden } if strings.TrimSpace(eventType) == "" { @@ -1540,7 +1491,7 @@ func (s *ControlService) GetEventType(ctx context.Context, actor authz.Actor, ev } func (s *ControlService) UpdateEventType(ctx context.Context, actor authz.Actor, eventType string, req UpdateEventTypeRequest) (domain.EventType, error) { - if !authz.Can(actor, "schemas:write", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:write", "event_type", eventType, "") { return domain.EventType{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1564,7 +1515,7 @@ func (s *ControlService) UpdateEventType(ctx context.Context, actor authz.Actor, } func (s *ControlService) DeleteEventType(ctx context.Context, actor authz.Actor, eventType string, req StateChangeRequest) (domain.EventType, error) { - if !authz.Can(actor, "schemas:write", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:write", "event_type", eventType, "") { return domain.EventType{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1574,7 +1525,7 @@ func (s *ControlService) DeleteEventType(ctx context.Context, actor authz.Actor, } func (s *ControlService) CreateEventSchema(ctx context.Context, actor authz.Actor, eventType string, req CreateEventSchemaRequest) (domain.EventSchema, error) { - if !authz.Can(actor, "schemas:write", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:write", "event_schema", eventType+":"+req.Version, "") { return domain.EventSchema{}, ErrForbidden } if eventType == "" || req.Version == "" || req.Schema == "" { @@ -1590,14 +1541,14 @@ func (s *ControlService) CreateEventSchema(ctx context.Context, actor authz.Acto } func (s *ControlService) ListEventSchemas(ctx context.Context, actor authz.Actor, eventType string, limit int) ([]domain.EventSchema, error) { - if !authz.Can(actor, "schemas:read", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:read", "event_schema", eventType, "") { return nil, ErrForbidden } return s.store.ListEventSchemas(ctx, actor.TenantID, eventType, normalizeLimit(limit)) } func (s *ControlService) GetEventSchema(ctx context.Context, actor authz.Actor, eventType, version string) (domain.EventSchema, error) { - if !authz.Can(actor, "schemas:read", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:read", "event_schema", eventType+":"+version, "") { return domain.EventSchema{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(version) == "" { @@ -1607,7 +1558,7 @@ func (s *ControlService) GetEventSchema(ctx context.Context, actor authz.Actor, } func (s *ControlService) UpdateEventSchema(ctx context.Context, actor authz.Actor, eventType, version string, req UpdateEventSchemaRequest) (domain.EventSchema, error) { - if !authz.Can(actor, "schemas:write", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:write", "event_schema", eventType+":"+version, "") { return domain.EventSchema{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(version) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1625,7 +1576,7 @@ func (s *ControlService) UpdateEventSchema(ctx context.Context, actor authz.Acto } func (s *ControlService) DeleteEventSchema(ctx context.Context, actor authz.Actor, eventType, version string, req StateChangeRequest) (domain.EventSchema, error) { - if !authz.Can(actor, "schemas:write", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:write", "event_schema", eventType+":"+version, "") { return domain.EventSchema{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(version) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1635,7 +1586,7 @@ func (s *ControlService) DeleteEventSchema(ctx context.Context, actor authz.Acto } func (s *ControlService) ValidateEventSchema(ctx context.Context, actor authz.Actor, eventType, version string, req ValidateSchemaRequest) (SchemaValidationResult, error) { - if !authz.Can(actor, "schemas:read", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:read", "event_schema", eventType+":"+version, "") { return SchemaValidationResult{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(version) == "" || strings.TrimSpace(req.Payload) == "" { @@ -1649,7 +1600,7 @@ func (s *ControlService) ValidateEventSchema(ctx context.Context, actor authz.Ac } func (s *ControlService) CheckEventSchemaCompatibility(ctx context.Context, actor authz.Actor, eventType, baseVersion string, req CheckSchemaCompatibilityRequest) (SchemaCompatibilityResult, error) { - if !authz.Can(actor, "schemas:read", actor.TenantID) { + if !s.authorized(ctx, actor, "schemas:read", "event_schema", eventType+":"+baseVersion, "") { return SchemaCompatibilityResult{}, ErrForbidden } if strings.TrimSpace(eventType) == "" || strings.TrimSpace(baseVersion) == "" || strings.TrimSpace(req.NewSchema) == "" { @@ -1688,29 +1639,53 @@ func (s *ControlService) RotateEndpointSecret(ctx context.Context, actor authz.A return s.store.RotateEndpointSecret(ctx, actor.TenantID, endpointID, actor.ID, req) } +type EventSearchRequest struct { + Limit int `json:"limit,omitempty"` + Provider string `json:"provider,omitempty"` + ExternalID string `json:"external_id,omitempty"` + DeliveryID string `json:"delivery_id,omitempty"` + Status string `json:"status,omitempty"` + Verification string `json:"verification,omitempty"` + ReceivedAfter time.Time `json:"received_after,omitempty"` + RouteID string `json:"route_id,omitempty"` +} + func (s *ControlService) ListEvents(ctx context.Context, actor authz.Actor, limit int) ([]domain.Event, error) { - if !authz.Can(actor, "events:read", actor.TenantID) { + return s.SearchEvents(ctx, actor, EventSearchRequest{Limit: limit}) +} + +func (s *ControlService) SearchEvents(ctx context.Context, actor authz.Actor, req EventSearchRequest) ([]domain.Event, error) { + if !s.authorized(ctx, actor, "events:read", "event", "", "") { return nil, ErrForbidden } - return s.store.ListEvents(ctx, actor.TenantID, normalizeLimit(limit)) + normalized, err := normalizeEventSearchRequest(req) + if err != nil { + return nil, err + } + return s.store.ListEvents(ctx, actor.TenantID, normalized) } func (s *ControlService) GetEvent(ctx context.Context, actor authz.Actor, eventID string) (domain.Event, error) { - if !authz.Can(actor, "events:read", actor.TenantID) { + if !s.authorized(ctx, actor, "events:read", "event", eventID, "") { return domain.Event{}, ErrForbidden } return s.store.GetEvent(ctx, actor.TenantID, eventID) } -func (s *ControlService) GetRawPayload(ctx context.Context, actor authz.Actor, eventID string) (domain.RawPayload, error) { +func (s *ControlService) GetRawPayload(ctx context.Context, actor authz.Actor, eventID, reason string) (domain.RawPayload, error) { if !s.authorized(ctx, actor, "events:raw", "event", eventID, "") { return domain.RawPayload{}, ErrForbidden } - return s.store.GetRawPayload(ctx, actor.TenantID, eventID, actor.ID) + eventID = strings.TrimSpace(eventID) + reason = strings.TrimSpace(reason) + if eventID == "" || reason == "" { + return domain.RawPayload{}, fmt.Errorf("%w: event_id and reason are required", ErrInvalidInput) + } + return s.store.GetRawPayload(ctx, actor.TenantID, eventID, actor.ID, reason) } func (s *ControlService) GetNormalizedEvent(ctx context.Context, actor authz.Actor, eventID string, includeData bool) (domain.NormalizedEnvelope, error) { - if !authz.Can(actor, "events:read", actor.TenantID) { + if !s.authorized(ctx, actor, "events:read", "event", eventID, "") { return domain.NormalizedEnvelope{}, ErrForbidden } if includeData && !s.authorized(ctx, actor, "events:raw", "event", eventID, "") { @@ -1719,36 +1694,40 @@ func (s *ControlService) GetNormalizedEvent(ctx context.Context, actor authz.Act return s.store.GetNormalizedEvent(ctx, actor.TenantID, eventID, actor.ID, includeData) } -func (s *ControlService) ListEventTimeline(ctx context.Context, actor authz.Actor, eventID string, limit int) ([]map[string]any, error) { - if !authz.Can(actor, "events:read", actor.TenantID) { +func (s *ControlService) ListEventTimeline(ctx context.Context, actor authz.Actor, eventID string, limit int) ([]EventTimelineEntry, error) { + if !s.authorized(ctx, actor, "events:read", "event", eventID, "") { return nil, ErrForbidden } - return s.store.ListEventTimeline(ctx, actor.TenantID, eventID, normalizeLimit(limit)) + items, err := s.store.ListEventTimeline(ctx, actor.TenantID, eventID, normalizeLimit(limit)) + if err != nil { + return nil, err + } + return normalizeEventTimelineEntries(items), nil } func (s *ControlService) ListDeliveries(ctx context.Context, actor authz.Actor, limit int) ([]domain.Delivery, error) { - if !authz.Can(actor, "deliveries:read", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:read", "delivery", "", "") { return nil, ErrForbidden } return s.store.ListDeliveries(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) ListDeliveryAttempts(ctx context.Context, actor authz.Actor, deliveryID string, limit int) ([]domain.DeliveryAttempt, error) { - if !authz.Can(actor, "deliveries:read", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:read", "delivery", deliveryID, "") { return nil, ErrForbidden } return s.store.ListDeliveryAttempts(ctx, actor.TenantID, deliveryID, normalizeLimit(limit)) } func (s *ControlService) GetDeliveryAttempt(ctx context.Context, actor authz.Actor, attemptID string) (domain.DeliveryAttempt, error) { - if !authz.Can(actor, "deliveries:read", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:read", "delivery_attempt", attemptID, "") { return domain.DeliveryAttempt{}, ErrForbidden } return s.store.GetDeliveryAttempt(ctx, actor.TenantID, attemptID) } func (s *ControlService) RetryDelivery(ctx context.Context, actor authz.Actor, deliveryID, reason string) (domain.Delivery, error) { - if !authz.Can(actor, "deliveries:retry", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:retry", "delivery", deliveryID, "") { return domain.Delivery{}, ErrForbidden } if reason == "" { @@ -1758,7 +1737,7 @@ func (s *ControlService) RetryDelivery(ctx context.Context, actor authz.Actor, d } func (s *ControlService) CancelDelivery(ctx context.Context, actor authz.Actor, deliveryID string, req StateChangeRequest) (domain.Delivery, error) { - if !authz.Can(actor, "deliveries:retry", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:retry", "delivery", deliveryID, "") { return domain.Delivery{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -1768,14 +1747,14 @@ func (s *ControlService) CancelDelivery(ctx context.Context, actor authz.Actor, } func (s *ControlService) ListEndpointHealth(ctx context.Context, actor authz.Actor, limit int) ([]domain.EndpointHealth, error) { - if !authz.Can(actor, "endpoints:read", actor.TenantID) { + if !s.authorized(ctx, actor, "endpoints:read", "endpoint_health", "", "") { return nil, ErrForbidden } return s.store.ListEndpointHealth(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) OpsMetrics(ctx context.Context, actor authz.Actor) (domain.OpsMetrics, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "ops", "metrics", "") { return domain.OpsMetrics{}, ErrForbidden } return s.store.OpsMetrics(ctx, actor.TenantID) @@ -1786,14 +1765,14 @@ func (s *ControlService) PublicOpsMetrics(ctx context.Context) (domain.OpsMetric } func (s *ControlService) ListWorkers(ctx context.Context, actor authz.Actor, limit int) ([]domain.WorkerStatus, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "worker", "", "") { return nil, ErrForbidden } return s.store.ListWorkers(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetWorker(ctx context.Context, actor authz.Actor, workerID string) (domain.WorkerStatus, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "worker", workerID, "") { return domain.WorkerStatus{}, ErrForbidden } if strings.TrimSpace(workerID) == "" { @@ -1803,28 +1782,28 @@ func (s *ControlService) GetWorker(ctx context.Context, actor authz.Actor, worke } func (s *ControlService) ListQueues(ctx context.Context, actor authz.Actor) ([]domain.QueueStats, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "queue", "", "") { return nil, ErrForbidden } return s.store.ListQueues(ctx, actor.TenantID) } func (s *ControlService) OpsStorage(ctx context.Context, actor authz.Actor) (domain.OpsStorageStatus, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "ops", "storage", "") { return domain.OpsStorageStatus{}, ErrForbidden } return s.store.OpsStorage(ctx, actor.TenantID) } func (s *ControlService) OpsConfig(ctx context.Context, actor authz.Actor) (domain.OpsConfig, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "ops", "config", "") { return domain.OpsConfig{}, ErrForbidden } return s.runtimeConfig, nil } func (s *ControlService) ListMetricRollups(ctx context.Context, actor authz.Actor, metricName string, limit int) ([]domain.MetricRollup, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "metric", metricName, "") { return nil, ErrForbidden } metricName = strings.TrimSpace(metricName) @@ -1835,7 +1814,7 @@ func (s *ControlService) ListMetricRollups(ctx context.Context, actor authz.Acto } func (s *ControlService) CreateAlertRule(ctx context.Context, actor authz.Actor, req CreateAlertRuleRequest) (domain.AlertRule, error) { - if !authz.Can(actor, "ops:write", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:write", "alert_rule", "", "") { return domain.AlertRule{}, ErrForbidden } if err := normalizeCreateAlertRule(&req); err != nil { @@ -1845,14 +1824,14 @@ func (s *ControlService) CreateAlertRule(ctx context.Context, actor authz.Actor, } func (s *ControlService) ListAlertRules(ctx context.Context, actor authz.Actor, limit int) ([]domain.AlertRule, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "alert_rule", "", "") { return nil, ErrForbidden } return s.store.ListAlertRules(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetAlertRule(ctx context.Context, actor authz.Actor, alertID string) (domain.AlertRule, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "alert_rule", alertID, "") { return domain.AlertRule{}, ErrForbidden } if strings.TrimSpace(alertID) == "" { @@ -1862,7 +1841,7 @@ func (s *ControlService) GetAlertRule(ctx context.Context, actor authz.Actor, al } func (s *ControlService) UpdateAlertRule(ctx context.Context, actor authz.Actor, alertID string, req UpdateAlertRuleRequest) (domain.AlertRule, error) { - if !authz.Can(actor, "ops:write", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:write", "alert_rule", alertID, "") { return domain.AlertRule{}, ErrForbidden } if strings.TrimSpace(alertID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1875,7 +1854,7 @@ func (s *ControlService) UpdateAlertRule(ctx context.Context, actor authz.Actor, } func (s *ControlService) DeleteAlertRule(ctx context.Context, actor authz.Actor, alertID string, req StateChangeRequest) (domain.AlertRule, error) { - if !authz.Can(actor, "ops:write", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:write", "alert_rule", alertID, "") { return domain.AlertRule{}, ErrForbidden } if strings.TrimSpace(alertID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1885,7 +1864,7 @@ func (s *ControlService) DeleteAlertRule(ctx context.Context, actor authz.Actor, } func (s *ControlService) ListAlertFirings(ctx context.Context, actor authz.Actor, state string, limit int) ([]domain.AlertFiring, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "alert_firing", "", "") { return nil, ErrForbidden } state = strings.TrimSpace(state) @@ -1896,7 +1875,7 @@ func (s *ControlService) ListAlertFirings(ctx context.Context, actor authz.Actor } func (s *ControlService) GetAlertFiring(ctx context.Context, actor authz.Actor, firingID string) (domain.AlertFiring, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "alert_firing", firingID, "") { return domain.AlertFiring{}, ErrForbidden } if strings.TrimSpace(firingID) == "" { @@ -1906,7 +1885,7 @@ func (s *ControlService) GetAlertFiring(ctx context.Context, actor authz.Actor, } func (s *ControlService) AcknowledgeAlertFiring(ctx context.Context, actor authz.Actor, firingID string, req StateChangeRequest) (domain.AlertFiring, error) { - if !authz.Can(actor, "ops:write", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:write", "alert_firing", firingID, "") { return domain.AlertFiring{}, ErrForbidden } if strings.TrimSpace(firingID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -1932,14 +1911,14 @@ func (s *ControlService) CreateNotificationChannel(ctx context.Context, actor au } func (s *ControlService) ListNotificationChannels(ctx context.Context, actor authz.Actor, limit int) ([]domain.NotificationChannel, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "notification_channel", "", "") { return nil, ErrForbidden } return s.store.ListNotificationChannels(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetNotificationChannel(ctx context.Context, actor authz.Actor, channelID string) (domain.NotificationChannel, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "notification_channel", channelID, "") { return domain.NotificationChannel{}, ErrForbidden } if strings.TrimSpace(channelID) == "" { @@ -1999,7 +1978,7 @@ func (s *ControlService) TestNotificationChannel(ctx context.Context, actor auth } func (s *ControlService) ListNotificationDeliveries(ctx context.Context, actor authz.Actor, state string, limit int) ([]domain.NotificationDelivery, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "notification_delivery", "", "") { return nil, ErrForbidden } state = strings.TrimSpace(state) @@ -2010,7 +1989,7 @@ func (s *ControlService) ListNotificationDeliveries(ctx context.Context, actor a } func (s *ControlService) ListNotificationDeliveryAttempts(ctx context.Context, actor authz.Actor, deliveryID string, limit int) ([]domain.NotificationDeliveryAttempt, error) { - if !authz.Can(actor, "ops:read", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:read", "notification_delivery", deliveryID, "") { return nil, ErrForbidden } if strings.TrimSpace(deliveryID) == "" { @@ -2020,7 +1999,7 @@ func (s *ControlService) ListNotificationDeliveryAttempts(ctx context.Context, a } func (s *ControlService) RetryNotificationDelivery(ctx context.Context, actor authz.Actor, deliveryID string, req StateChangeRequest) (domain.NotificationDelivery, error) { - if !authz.Can(actor, "ops:write", actor.TenantID) { + if !s.authorized(ctx, actor, "ops:write", "notification_delivery", deliveryID, "") { return domain.NotificationDelivery{}, ErrForbidden } if strings.TrimSpace(deliveryID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -2046,14 +2025,14 @@ func (s *ControlService) CreateSIEMSink(ctx context.Context, actor authz.Actor, } func (s *ControlService) ListSIEMSinks(ctx context.Context, actor authz.Actor, limit int) ([]domain.SIEMSink, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "siem_sink", "", "") { return nil, ErrForbidden } return s.store.ListSIEMSinks(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetSIEMSink(ctx context.Context, actor authz.Actor, sinkID string) (domain.SIEMSink, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "siem_sink", sinkID, "") { return domain.SIEMSink{}, ErrForbidden } if strings.TrimSpace(sinkID) == "" { @@ -2113,7 +2092,7 @@ func (s *ControlService) TestSIEMSink(ctx context.Context, actor authz.Actor, si } func (s *ControlService) ListSIEMDeliveries(ctx context.Context, actor authz.Actor, state string, limit int) ([]domain.SIEMDelivery, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "siem_delivery", "", "") { return nil, ErrForbidden } state = strings.TrimSpace(state) @@ -2124,7 +2103,7 @@ func (s *ControlService) ListSIEMDeliveries(ctx context.Context, actor authz.Act } func (s *ControlService) ListSIEMDeliveryAttempts(ctx context.Context, actor authz.Actor, deliveryID string, limit int) ([]domain.SIEMDeliveryAttempt, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "siem_delivery", deliveryID, "") { return nil, ErrForbidden } if strings.TrimSpace(deliveryID) == "" { @@ -2134,7 +2113,7 @@ func (s *ControlService) ListSIEMDeliveryAttempts(ctx context.Context, actor aut } func (s *ControlService) RetrySIEMDelivery(ctx context.Context, actor authz.Actor, deliveryID string, req StateChangeRequest) (domain.SIEMDelivery, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "siem_delivery", deliveryID, "") { return domain.SIEMDelivery{}, ErrForbidden } if strings.TrimSpace(deliveryID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -2144,10 +2123,10 @@ func (s *ControlService) RetrySIEMDelivery(ctx context.Context, actor authz.Acto } func (s *ControlService) DryRunReplay(ctx context.Context, actor authz.Actor, req ReplayRequest) (ReplayDryRun, error) { - if !authz.Can(actor, "replay:read", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:read", "replay", req.EventID, "") { return ReplayDryRun{}, ErrForbidden } - if err := normalizeReplayRequest(&req, false); err != nil { + if err := normalizeReplayRequest(&req, false, true); err != nil { return ReplayDryRun{}, err } return s.store.DryRunReplay(ctx, actor.TenantID, req) @@ -2157,23 +2136,47 @@ func (s *ControlService) CreateReplay(ctx context.Context, actor authz.Actor, re if !s.authorized(ctx, actor, "replay:write", "replay", req.EventID, "") { return ReplayJob{}, ErrForbidden } - req.Reason = strings.TrimSpace(req.Reason) - if req.Reason == "" { - return ReplayJob{}, fmt.Errorf("%w: reason is required", ErrInvalidInput) - } - if err := normalizeReplayRequest(&req, true); err != nil { + if err := normalizeReplayRequest(&req, true, true); err != nil { return ReplayJob{}, err } return s.store.CreateReplay(ctx, actor.TenantID, actor.ID, req) } func (s *ControlService) ListReplayJobs(ctx context.Context, actor authz.Actor, limit int) ([]ReplayJob, error) { - if !authz.Can(actor, "replay:read", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:read", "replay", "", "") { return nil, ErrForbidden } return s.store.ListReplayJobs(ctx, actor.TenantID, normalizeLimit(limit)) } +func (s *ControlService) CreateReplayApprovalPolicy(ctx context.Context, actor authz.Actor, req CreateReplayApprovalPolicyRequest) (domain.ReplayApprovalPolicy, error) { + if !s.authorized(ctx, actor, "security:write", "replay_approval_policy", "", "") { + return domain.ReplayApprovalPolicy{}, ErrForbidden + } + if err := normalizeReplayApprovalPolicyRequest(&req); err != nil { + return domain.ReplayApprovalPolicy{}, err + } + return s.store.CreateReplayApprovalPolicy(ctx, actor.TenantID, actor.ID, req) +} + +func (s *ControlService) ListReplayApprovalPolicies(ctx context.Context, actor authz.Actor, limit int) ([]domain.ReplayApprovalPolicy, error) { + if !s.authorized(ctx, actor, "security:read", "replay_approval_policy", "", "") { + return nil, ErrForbidden + } + return s.store.ListReplayApprovalPolicies(ctx, actor.TenantID, normalizeLimit(limit)) +} + +func (s *ControlService) DisableReplayApprovalPolicy(ctx context.Context, actor authz.Actor, policyID string, req StateChangeRequest) (domain.ReplayApprovalPolicy, error) { + if !s.authorized(ctx, actor, "security:write", "replay_approval_policy", policyID, "") { + return domain.ReplayApprovalPolicy{}, ErrForbidden + } + policyID = strings.TrimSpace(policyID) + if policyID == "" || strings.TrimSpace(req.Reason) == "" { + return domain.ReplayApprovalPolicy{}, fmt.Errorf("%w: policy_id and reason are required", ErrInvalidInput) + } + return s.store.DisableReplayApprovalPolicy(ctx, actor.TenantID, policyID, actor.ID, req.Reason) +} + func (s *ControlService) ApproveReplayJob(ctx context.Context, actor authz.Actor, replayJobID string, req StateChangeRequest) (ReplayJob, error) { return s.changeReplayState(ctx, actor, replayJobID, req, s.store.ApproveReplayJob) } @@ -2191,7 +2194,7 @@ func (s *ControlService) CancelReplayJob(ctx context.Context, actor authz.Actor, } func (s *ControlService) changeReplayState(ctx context.Context, actor authz.Actor, replayJobID string, req StateChangeRequest, fn func(context.Context, string, string, string, string) (ReplayJob, error)) (ReplayJob, error) { - if !authz.Can(actor, "replay:write", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:write", "replay", replayJobID, "") { return ReplayJob{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -2201,21 +2204,21 @@ func (s *ControlService) changeReplayState(ctx context.Context, actor authz.Acto } func (s *ControlService) ListAuditEvents(ctx context.Context, actor authz.Actor, limit int) ([]domain.AuditEvent, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_event", "", "") { return nil, ErrForbidden } return s.store.ListAuditEvents(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetAuditChainHead(ctx context.Context, actor authz.Actor) (domain.AuditChainHead, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_chain", "", "") { return domain.AuditChainHead{}, ErrForbidden } return s.store.GetAuditChainHead(ctx, actor.TenantID) } func (s *ControlService) VerifyAuditChain(ctx context.Context, actor authz.Actor, req AuditChainVerifyRequest) (domain.AuditChainVerification, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_chain", "", "") { return domain.AuditChainVerification{}, ErrForbidden } if req.FromSequence < 0 || req.ToSequence < 0 { @@ -2228,7 +2231,7 @@ func (s *ControlService) VerifyAuditChain(ctx context.Context, actor authz.Actor } func (s *ControlService) CreateAuditChainAnchor(ctx context.Context, actor authz.Actor, req AuditChainAnchorRequest) (domain.AuditChainAnchor, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "audit_chain_anchor", "", "") { return domain.AuditChainAnchor{}, ErrForbidden } req.Reason = strings.TrimSpace(req.Reason) @@ -2245,14 +2248,14 @@ func (s *ControlService) CreateAuditChainAnchor(ctx context.Context, actor authz } func (s *ControlService) ListAuditChainAnchors(ctx context.Context, actor authz.Actor, limit int) ([]domain.AuditChainAnchor, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_chain_anchor", "", "") { return nil, ErrForbidden } return s.store.ListAuditChainAnchors(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetAuditChainAnchor(ctx context.Context, actor authz.Actor, anchorID string) (domain.AuditChainAnchor, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_chain_anchor", anchorID, "") { return domain.AuditChainAnchor{}, ErrForbidden } if strings.TrimSpace(anchorID) == "" { @@ -2262,14 +2265,14 @@ func (s *ControlService) GetAuditChainAnchor(ctx context.Context, actor authz.Ac } func (s *ControlService) ListRetentionPolicies(ctx context.Context, actor authz.Actor, limit int) ([]domain.RetentionPolicy, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "retention_policy", "", "") { return nil, ErrForbidden } return s.store.ListRetentionPolicies(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) CreateRetentionPolicy(ctx context.Context, actor authz.Actor, req CreateRetentionPolicyRequest) (domain.RetentionPolicy, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "retention_policy", "", "") { return domain.RetentionPolicy{}, ErrForbidden } req.ResourceType = strings.TrimSpace(req.ResourceType) @@ -2286,7 +2289,7 @@ func (s *ControlService) CreateRetentionPolicy(ctx context.Context, actor authz. } func (s *ControlService) UpdateRetentionPolicy(ctx context.Context, actor authz.Actor, policyID string, req UpdateRetentionPolicyRequest) (domain.RetentionPolicy, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "retention_policy", policyID, "") { return domain.RetentionPolicy{}, ErrForbidden } if strings.TrimSpace(policyID) == "" { @@ -2314,7 +2317,7 @@ func (s *ControlService) UpdateRetentionPolicy(ctx context.Context, actor authz. } func (s *ControlService) CreateProviderConnection(ctx context.Context, actor authz.Actor, req CreateProviderConnectionRequest) (domain.ProviderConnection, error) { - if !authz.Can(actor, "sources:write", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:write", "provider_connection", "", "") { return domain.ProviderConnection{}, ErrForbidden } if err := validateProviderConnectionRequest(&req); err != nil { @@ -2324,14 +2327,14 @@ func (s *ControlService) CreateProviderConnection(ctx context.Context, actor aut } func (s *ControlService) ListProviderConnections(ctx context.Context, actor authz.Actor, limit int) ([]domain.ProviderConnection, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "provider_connection", "", "") { return nil, ErrForbidden } return s.store.ListProviderConnections(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetProviderConnection(ctx context.Context, actor authz.Actor, connectionID string) (domain.ProviderConnection, error) { - if !authz.Can(actor, "sources:read", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:read", "provider_connection", connectionID, "") { return domain.ProviderConnection{}, ErrForbidden } if strings.TrimSpace(connectionID) == "" { @@ -2341,7 +2344,7 @@ func (s *ControlService) GetProviderConnection(ctx context.Context, actor authz. } func (s *ControlService) VerifyProviderConnection(ctx context.Context, actor authz.Actor, connectionID string, req ProviderConnectionStateRequest) (domain.ProviderConnection, error) { - if !authz.Can(actor, "sources:write", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:write", "provider_connection", connectionID, "") { return domain.ProviderConnection{}, ErrForbidden } if strings.TrimSpace(connectionID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -2351,7 +2354,7 @@ func (s *ControlService) VerifyProviderConnection(ctx context.Context, actor aut } func (s *ControlService) RevokeProviderConnection(ctx context.Context, actor authz.Actor, connectionID string, req ProviderConnectionStateRequest) (domain.ProviderConnection, error) { - if !authz.Can(actor, "sources:write", actor.TenantID) { + if !s.authorized(ctx, actor, "sources:write", "provider_connection", connectionID, "") { return domain.ProviderConnection{}, ErrForbidden } if strings.TrimSpace(connectionID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -2361,18 +2364,26 @@ func (s *ControlService) RevokeProviderConnection(ctx context.Context, actor aut } func (s *ControlService) DryRunReconciliation(ctx context.Context, actor authz.Actor, req ReconciliationJobRequest) (domain.ReconciliationJob, error) { - if !authz.Can(actor, "replay:read", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:read", "reconciliation_job", req.ConnectionID, "") { return domain.ReconciliationJob{}, ErrForbidden } if err := validateReconciliationJobRequest(&req, false); err != nil { return domain.ReconciliationJob{}, err } req.DryRun = true - return s.store.DryRunReconciliation(ctx, actor.TenantID, req) + if s.reconciliation != nil { + return s.reconciliation.DryRunReconciliation(ctx, actor.TenantID, req) + } + if dryRunner, ok := any(s.store).(interface { + DryRunReconciliation(context.Context, string, ReconciliationJobRequest) (domain.ReconciliationJob, error) + }); ok { + return dryRunner.DryRunReconciliation(ctx, actor.TenantID, req) + } + return domain.ReconciliationJob{}, ErrInvalidInput } func (s *ControlService) CreateReconciliationJob(ctx context.Context, actor authz.Actor, req ReconciliationJobRequest) (domain.ReconciliationJob, error) { - if !authz.Can(actor, "replay:write", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:write", "reconciliation_job", req.ConnectionID, "") { return domain.ReconciliationJob{}, ErrForbidden } if err := validateReconciliationJobRequest(&req, true); err != nil { @@ -2382,14 +2393,14 @@ func (s *ControlService) CreateReconciliationJob(ctx context.Context, actor auth } func (s *ControlService) ListReconciliationJobs(ctx context.Context, actor authz.Actor, limit int) ([]domain.ReconciliationJob, error) { - if !authz.Can(actor, "replay:read", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:read", "reconciliation_job", "", "") { return nil, ErrForbidden } return s.store.ListReconciliationJobs(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetReconciliationJob(ctx context.Context, actor authz.Actor, jobID string) (domain.ReconciliationJob, error) { - if !authz.Can(actor, "replay:read", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:read", "reconciliation_job", jobID, "") { return domain.ReconciliationJob{}, ErrForbidden } if strings.TrimSpace(jobID) == "" { @@ -2399,7 +2410,7 @@ func (s *ControlService) GetReconciliationJob(ctx context.Context, actor authz.A } func (s *ControlService) ListReconciliationItems(ctx context.Context, actor authz.Actor, jobID string, limit int) ([]domain.ReconciliationItem, error) { - if !authz.Can(actor, "replay:read", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:read", "reconciliation_job", jobID, "") { return nil, ErrForbidden } if strings.TrimSpace(jobID) == "" { @@ -2409,7 +2420,7 @@ func (s *ControlService) ListReconciliationItems(ctx context.Context, actor auth } func (s *ControlService) CancelReconciliationJob(ctx context.Context, actor authz.Actor, jobID string, req ProviderConnectionStateRequest) (domain.ReconciliationJob, error) { - if !authz.Can(actor, "replay:write", actor.TenantID) { + if !s.authorized(ctx, actor, "replay:write", "reconciliation_job", jobID, "") { return domain.ReconciliationJob{}, ErrForbidden } if strings.TrimSpace(jobID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -2419,7 +2430,7 @@ func (s *ControlService) CancelReconciliationJob(ctx context.Context, actor auth } func (s *ControlService) CreateAuditExport(ctx context.Context, actor authz.Actor, req CreateAuditExportRequest) (domain.EvidenceExport, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_export", "", "") { return domain.EvidenceExport{}, ErrForbidden } if (req.IncludeRawPayloads || req.IncludePayloadBodies) && !s.authorized(ctx, actor, "events:raw", "audit_export", "", "") { @@ -2433,7 +2444,7 @@ func (s *ControlService) CreateAuditExport(ctx context.Context, actor authz.Acto } func (s *ControlService) CreateTransformation(ctx context.Context, actor authz.Actor, req CreateTransformationRequest) (domain.Transformation, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "transformation", "", "") { return domain.Transformation{}, ErrForbidden } req.Name = strings.TrimSpace(req.Name) @@ -2449,21 +2460,21 @@ func (s *ControlService) CreateTransformation(ctx context.Context, actor authz.A } func (s *ControlService) ListTransformations(ctx context.Context, actor authz.Actor, limit int) ([]domain.Transformation, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "transformation", "", "") { return nil, ErrForbidden } return s.store.ListTransformations(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) GetTransformation(ctx context.Context, actor authz.Actor, transformationID string) (domain.Transformation, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "transformation", transformationID, "") { return domain.Transformation{}, ErrForbidden } return s.store.GetTransformation(ctx, actor.TenantID, transformationID) } func (s *ControlService) CreateTransformationVersion(ctx context.Context, actor authz.Actor, transformationID string, req CreateTransformationVersionRequest) (domain.TransformationVersion, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "transformation", transformationID, "") { return domain.TransformationVersion{}, ErrForbidden } if strings.TrimSpace(transformationID) == "" || len(req.Operations) == 0 { @@ -2476,14 +2487,14 @@ func (s *ControlService) CreateTransformationVersion(ctx context.Context, actor } func (s *ControlService) ListTransformationVersions(ctx context.Context, actor authz.Actor, transformationID string, limit int) ([]domain.TransformationVersion, error) { - if !authz.Can(actor, "routes:read", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:read", "transformation", transformationID, "") { return nil, ErrForbidden } return s.store.ListTransformationVersions(ctx, actor.TenantID, transformationID, normalizeLimit(limit)) } func (s *ControlService) ActivateTransformationVersion(ctx context.Context, actor authz.Actor, transformationID, versionID string, req ActivateTransformationVersionRequest) (domain.TransformationVersion, error) { - if !authz.Can(actor, "routes:write", actor.TenantID) { + if !s.authorized(ctx, actor, "routes:write", "transformation_version", versionID, "") { return domain.TransformationVersion{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -2493,7 +2504,7 @@ func (s *ControlService) ActivateTransformationVersion(ctx context.Context, acto } func (s *ControlService) ListAuditExports(ctx context.Context, actor authz.Actor, limit int) ([]domain.EvidenceExport, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_export", "", "") { return nil, ErrForbidden } exports, err := s.store.ListAuditExports(ctx, actor.TenantID, normalizeLimit(limit)) @@ -2513,7 +2524,7 @@ func (s *ControlService) ListAuditExports(ctx context.Context, actor authz.Actor } func (s *ControlService) GetAuditExport(ctx context.Context, actor authz.Actor, exportID string) (domain.EvidenceExport, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_export", exportID, "") { return domain.EvidenceExport{}, ErrForbidden } export, err := s.store.GetAuditExport(ctx, actor.TenantID, exportID) @@ -2527,7 +2538,7 @@ func (s *ControlService) GetAuditExport(ctx context.Context, actor authz.Actor, } func (s *ControlService) DownloadAuditExport(ctx context.Context, actor authz.Actor, exportID string) (EvidenceExportDownload, error) { - if !authz.Can(actor, "audit:read", actor.TenantID) { + if !s.authorized(ctx, actor, "audit:read", "audit_export", exportID, "") { return EvidenceExportDownload{}, ErrForbidden } export, err := s.store.GetAuditExport(ctx, actor.TenantID, exportID) @@ -2545,41 +2556,51 @@ func (s *ControlService) DownloadAuditExport(ctx context.Context, actor authz.Ac } func (s *ControlService) ListDeadLetter(ctx context.Context, actor authz.Actor, limit int) ([]map[string]any, error) { - if !authz.Can(actor, "deliveries:read", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:read", "dead_letter", "", "") { return nil, ErrForbidden } return s.store.ListDeadLetter(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) ReleaseDeadLetter(ctx context.Context, actor authz.Actor, entryID string, req DeadLetterReleaseRequest) (ReplayJob, error) { - if !authz.Can(actor, "deliveries:retry", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:retry", "dead_letter", entryID, "") { return ReplayJob{}, ErrForbidden } + req.ReasonCode = strings.TrimSpace(req.ReasonCode) + req.Reason = strings.TrimSpace(req.Reason) if req.Reason == "" { return ReplayJob{}, fmt.Errorf("%w: reason is required", ErrInvalidInput) } - return s.store.ReleaseDeadLetter(ctx, actor.TenantID, entryID, actor.ID, req.Reason) + if err := validateReplayReasonCode(req.ReasonCode); err != nil { + return ReplayJob{}, err + } + return s.store.ReleaseDeadLetter(ctx, actor.TenantID, entryID, actor.ID, req.ReasonCode, req.Reason) } func (s *ControlService) BulkReleaseDeadLetter(ctx context.Context, actor authz.Actor, req DeadLetterBulkReleaseRequest) ([]ReplayJob, error) { - if !authz.Can(actor, "deliveries:retry", actor.TenantID) { + if !s.authorized(ctx, actor, "deliveries:retry", "dead_letter", "", "") { return nil, ErrForbidden } - if strings.TrimSpace(req.Reason) == "" { + req.ReasonCode = strings.TrimSpace(req.ReasonCode) + req.Reason = strings.TrimSpace(req.Reason) + if req.Reason == "" { return nil, fmt.Errorf("%w: reason is required", ErrInvalidInput) } - return s.store.BulkReleaseDeadLetter(ctx, actor.TenantID, req.EntryIDs, actor.ID, req.Reason) + if err := validateReplayReasonCode(req.ReasonCode); err != nil { + return nil, err + } + return s.store.BulkReleaseDeadLetter(ctx, actor.TenantID, req.EntryIDs, actor.ID, req.ReasonCode, req.Reason) } func (s *ControlService) ListQuarantine(ctx context.Context, actor authz.Actor, limit int) ([]map[string]any, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "quarantine", "", "") { return nil, ErrForbidden } return s.store.ListQuarantine(ctx, actor.TenantID, normalizeLimit(limit)) } func (s *ControlService) ApproveQuarantine(ctx context.Context, actor authz.Actor, entryID string, req QuarantineDecisionRequest) (map[string]any, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "quarantine", entryID, "") { return nil, ErrForbidden } if req.Reason == "" { @@ -2589,7 +2610,7 @@ func (s *ControlService) ApproveQuarantine(ctx context.Context, actor authz.Acto } func (s *ControlService) RejectQuarantine(ctx context.Context, actor authz.Actor, entryID string, req QuarantineDecisionRequest) (map[string]any, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "quarantine", entryID, "") { return nil, ErrForbidden } if req.Reason == "" { @@ -2605,6 +2626,64 @@ func normalizeLimit(limit int) int { return limit } +func normalizeEventSearchRequest(req EventSearchRequest) (EventSearchRequest, error) { + req.Limit = normalizeLimit(req.Limit) + req.Provider = strings.TrimSpace(req.Provider) + req.ExternalID = strings.TrimSpace(req.ExternalID) + req.DeliveryID = strings.TrimSpace(req.DeliveryID) + req.Status = strings.ToLower(strings.TrimSpace(req.Status)) + req.Verification = strings.ToLower(strings.TrimSpace(req.Verification)) + req.RouteID = strings.TrimSpace(req.RouteID) + for name, value := range map[string]string{ + "provider": req.Provider, + "external_id": req.ExternalID, + "delivery_id": req.DeliveryID, + "status": req.Status, + "verification": req.Verification, + "route_id": req.RouteID, + } { + if len(value) > 256 { + return EventSearchRequest{}, fmt.Errorf("%w: %s is too long", ErrInvalidInput, name) + } + } + switch req.Verification { + case "", "valid", "invalid": + default: + return EventSearchRequest{}, fmt.Errorf("%w: verification must be valid or invalid", ErrInvalidInput) + } + switch req.Status { + case "", "dlq", "dead_lettered": + default: + return EventSearchRequest{}, fmt.Errorf("%w: status must be dlq or dead_lettered", ErrInvalidInput) + } + if req.ReceivedAfter.IsZero() { + return req, nil + } + req.ReceivedAfter = req.ReceivedAfter.UTC() + return req, nil +} + +func normalizeEventTimelineEntries(items []EventTimelineEntry) []EventTimelineEntry { + out := make([]EventTimelineEntry, 0, len(items)) + for i, item := range items { + if item.SchemaVersion == "" { + item.SchemaVersion = EventTimelineSchemaV1 + } + if item.Sequence <= 0 { + item.Sequence = i + 1 + } + item.Kind = strings.TrimSpace(item.Kind) + item.RefID = strings.TrimSpace(item.RefID) + item.State = strings.TrimSpace(item.State) + item.Detail = strings.TrimSpace(item.Detail) + if !item.OccurredAt.IsZero() { + item.OccurredAt = item.OccurredAt.UTC() + } + out = append(out, item) + } + return out +} + func validMetricName(name string) bool { if len(name) > 128 { return false @@ -3007,23 +3086,91 @@ func validateRetryPolicy(req CreateRetryPolicyRequest) error { return nil } -func normalizeReplayRequest(req *ReplayRequest, requireScope bool) error { +func normalizeReplayRequest(req *ReplayRequest, requireScope, requireReason bool) error { + req.EventID = strings.TrimSpace(req.EventID) + req.DeliveryID = strings.TrimSpace(req.DeliveryID) + req.EndpointID = strings.TrimSpace(req.EndpointID) + req.ReasonCode = strings.TrimSpace(req.ReasonCode) + req.Reason = strings.TrimSpace(req.Reason) req.ConfigMode = strings.TrimSpace(req.ConfigMode) if req.ConfigMode == "" { req.ConfigMode = ReplayConfigCurrent } + if requireReason { + if req.Reason == "" { + return fmt.Errorf("%w: reason is required", ErrInvalidInput) + } + if err := validateReplayReasonCode(req.ReasonCode); err != nil { + return err + } + } if req.ConfigMode != ReplayConfigCurrent && req.ConfigMode != ReplayConfigOriginal { return fmt.Errorf("%w: config_mode must be current or original", ErrInvalidInput) } if req.RateLimitPerMinute < 0 || req.RateLimitPerMinute > 60000 { return fmt.Errorf("%w: rate_limit_per_minute must be between 0 and 60000", ErrInvalidInput) } + if req.ApprovalExpiresAt != nil { + approvalExpiresAt := req.ApprovalExpiresAt.UTC() + req.ApprovalExpiresAt = &approvalExpiresAt + } + if !req.RequireApproval && req.ApprovalExpiresAt != nil { + return fmt.Errorf("%w: approval_expires_at requires require_approval", ErrInvalidInput) + } + if req.RequireApproval { + if req.ApprovalExpiresAt == nil { + approvalExpiresAt := time.Now().UTC().Add(ReplayApprovalDefaultExpiry) + req.ApprovalExpiresAt = &approvalExpiresAt + } + if !req.ApprovalExpiresAt.After(time.Now().UTC()) { + return fmt.Errorf("%w: approval_expires_at must be in the future", ErrInvalidInput) + } + } if requireScope && strings.TrimSpace(req.EventID) == "" && strings.TrimSpace(req.DeliveryID) == "" { return fmt.Errorf("%w: event_id or delivery_id is required", ErrInvalidInput) } return nil } +func validateReplayReasonCode(reasonCode string) error { + if reasonCode == "" { + return fmt.Errorf("%w: reason_code is required", ErrInvalidInput) + } + if _, ok := replayReasonCodes[reasonCode]; !ok { + return fmt.Errorf("%w: reason_code must be one of receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery", ErrInvalidInput) + } + return nil +} + +func normalizeReplayApprovalPolicyRequest(req *CreateReplayApprovalPolicyRequest) error { + req.ScopeType = strings.TrimSpace(req.ScopeType) + req.ScopeID = strings.TrimSpace(req.ScopeID) + req.Reason = strings.TrimSpace(req.Reason) + if req.ScopeType == "" || req.Reason == "" { + return fmt.Errorf("%w: scope_type and reason are required", ErrInvalidInput) + } + switch req.ScopeType { + case ReplayApprovalScopeTenant: + req.ScopeID = "" + case ReplayApprovalScopeSource, ReplayApprovalScopeRoute: + if req.ScopeID == "" { + return fmt.Errorf("%w: scope_id is required for source and route replay approval policies", ErrInvalidInput) + } + default: + return fmt.Errorf("%w: scope_type must be tenant, source, or route", ErrInvalidInput) + } + if !req.RequireApproval { + req.RequireApproval = true + } + if req.DefaultExpirySeconds == 0 { + req.DefaultExpirySeconds = int(ReplayApprovalDefaultExpiry / time.Second) + } + if req.DefaultExpirySeconds < 300 || req.DefaultExpirySeconds > 7*24*60*60 { + return fmt.Errorf("%w: default_expiry_seconds must be between 300 and 604800", ErrInvalidInput) + } + return nil +} + func validateEndpointMTLS(certPEM, keyPEM string) (bool, string, []byte, []byte, error) { certPEM = strings.TrimSpace(certPEM) keyPEM = strings.TrimSpace(keyPEM) diff --git a/internal/app/control_test.go b/internal/app/control_test.go index 29a4a21..fd7aba5 100644 --- a/internal/app/control_test.go +++ b/internal/app/control_test.go @@ -21,7 +21,7 @@ import ( ) func TestControlServiceScopesEventReadsToActorTenant(t *testing.T) { - store := &fakeControlStore{} + store := &fakeControlStore{eventSchema: domain.EventSchema{Schema: `{"type":"object"}`}} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}} @@ -34,6 +34,112 @@ func TestControlServiceScopesEventReadsToActorTenant(t *testing.T) { } } +func TestControlServiceSearchEventsNormalizesFiltersAndScopesTenant(t *testing.T) { + store := &fakeControlStore{} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}} + + receivedAfter := time.Date(2026, 6, 4, 12, 0, 0, 0, time.FixedZone("UTC+2", 2*60*60)) + if _, err := svc.SearchEvents(context.Background(), actor, EventSearchRequest{ + Provider: " stripe ", + ExternalID: " evt_external ", + Status: "DLQ", + Verification: "INVALID", + ReceivedAfter: receivedAfter, + Limit: 250, + }); err != nil { + t.Fatal(err) + } + if store.eventSearchTenantID != "ten_a" { + t.Fatalf("expected tenant-scoped event search, got %q", store.eventSearchTenantID) + } + if store.eventSearchReq.Provider != "stripe" || store.eventSearchReq.ExternalID != "evt_external" || store.eventSearchReq.Status != "dlq" || store.eventSearchReq.Verification != "invalid" { + t.Fatalf("filters were not normalized: %+v", store.eventSearchReq) + } + if store.eventSearchReq.Limit != 50 { + t.Fatalf("limit should be bounded to default, got %d", store.eventSearchReq.Limit) + } + if store.eventSearchReq.ReceivedAfter.Location() != time.UTC { + t.Fatalf("received_after should be UTC-normalized: %s", store.eventSearchReq.ReceivedAfter) + } + if _, err := svc.SearchEvents(context.Background(), actor, EventSearchRequest{Verification: "maybe"}); !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected invalid verification to be rejected, got %v", err) + } +} + +func TestControlServiceListEventTimelineReturnsVersionedEntries(t *testing.T) { + store := &fakeControlStore{} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}} + + items, err := svc.ListEventTimeline(context.Background(), actor, "evt_1", 10) + if err != nil { + t.Fatal(err) + } + if len(items) == 0 { + t.Fatal("expected timeline entries") + } + if items[0].SchemaVersion != EventTimelineSchemaV1 || items[0].Sequence != 1 || items[0].OccurredAt.Location() != time.UTC { + t.Fatalf("timeline entry was not normalized: %+v", items[0]) + } +} + +func TestControlServiceIncidentLifecycleScopesAndRedactsReports(t *testing.T) { + store := &fakeControlStore{} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOperator, Scopes: []string{"incidents:read", "incidents:write", "events:read"}} + reader := authz.Actor{ID: "usr_2", TenantID: "ten_a", Role: authz.RoleSupport, Scopes: []string{"incidents:read"}} + + if _, err := svc.CreateIncident(context.Background(), reader, CreateIncidentRequest{Title: "Stripe payment failed", Reason: "support case"}); err != ErrForbidden { + t.Fatalf("expected incident create to require incidents:write, got %v", err) + } + if _, err := svc.CreateIncident(context.Background(), actor, CreateIncidentRequest{Title: " ", Reason: "support case"}); !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected incident title validation, got %v", err) + } + + incident, err := svc.CreateIncident(context.Background(), actor, CreateIncidentRequest{Title: "Stripe payment failed", Reason: "support case"}) + if err != nil { + t.Fatal(err) + } + if incident.TenantID != "ten_a" || incident.CreatedBy != "usr_1" || store.incidentTenantID != "ten_a" || store.incidentActorID != "usr_1" { + t.Fatalf("incident was not tenant-scoped: incident=%+v tenant=%q actor=%q", incident, store.incidentTenantID, store.incidentActorID) + } + + if _, err := svc.AddIncidentEvent(context.Background(), actor, incident.ID, AddIncidentEventRequest{EventID: "evt_1", Reason: "investigate"}); err != nil { + t.Fatal(err) + } + if store.incidentID != incident.ID || store.incidentEventID != "evt_1" || store.incidentReason != "investigate" { + t.Fatalf("incident event link was not scoped with reason: incident=%q event=%q reason=%q", store.incidentID, store.incidentEventID, store.incidentReason) + } + + snapshot, err := svc.GenerateIncidentReport(context.Background(), actor, incident.ID, IncidentReportRequest{Reason: "support handoff"}) + if err != nil { + t.Fatal(err) + } + raw, err := json.Marshal(snapshot) + if err != nil { + t.Fatal(err) + } + for _, forbidden := range []string{"raw-body-secret", "whsec_test", "sk_test_secret", "Stripe-Signature", "v1=secret", "Bearer secret-token", "private-key-secret"} { + if strings.Contains(string(raw), forbidden) { + t.Fatalf("incident report leaked sensitive value %q in %s", forbidden, raw) + } + } + if !strings.Contains(snapshot.Markdown, "Inbound capture does not prove downstream business success") { + t.Fatalf("incident report must include non-claims, got markdown:\n%s", snapshot.Markdown) + } + if !strings.Contains(snapshot.Markdown, "reason_code=incident_recovery") || !strings.Contains(snapshot.Markdown, "receiver restored after DLQ") { + t.Fatalf("incident report must include replay reason code and reason, got markdown:\n%s", snapshot.Markdown) + } + + if _, err := svc.GetIncidentReport(context.Background(), reader, incident.ID); err != nil { + t.Fatal(err) + } + if store.incidentTenantID != "ten_a" { + t.Fatalf("incident report read was not tenant-scoped: %q", store.incidentTenantID) + } +} + func TestControlServiceScopesEventSchemaReadsToActorTenant(t *testing.T) { store := &fakeControlStore{} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) @@ -62,6 +168,225 @@ func TestControlServiceScopesEventTypeReadsToActorTenant(t *testing.T) { } } +func TestControlServiceOwnerHappyPathSurface(t *testing.T) { + store := &fakeControlStore{eventSchema: domain.EventSchema{Schema: `{"type":"object"}`}} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{ + "receiver.example": {netip.MustParseAddr("93.184.216.34")}, + "signals.example": {netip.MustParseAddr("93.184.216.34")}, + "siem.example": {netip.MustParseAddr("93.184.216.34")}, + }}) + actor := authz.Actor{ID: "usr_owner", TenantID: "ten_owner", Role: authz.RoleOwner, Scopes: []string{"*"}} + active := domain.StateActive + disabled := domain.StateDisabled + tests := []struct { + name string + run func(context.Context) error + }{ + {name: "api keys", run: func(ctx context.Context) error { + if _, err := svc.ListAPIKeys(ctx, actor, 10); err != nil { + return err + } + _, err := svc.RevokeAPIKey(ctx, actor, "key_1", RevokeAPIKeyRequest{Reason: "rotate"}) + return err + }}, + {name: "sources", run: func(ctx context.Context) error { + if _, err := svc.CreateSource(ctx, actor, CreateSourceRequest{Name: "stripe", Provider: "stripe", Adapter: "stripe", VerificationSecret: "whsec_test"}); err != nil { + return err + } + if _, err := svc.ListSources(ctx, actor, 10); err != nil { + return err + } + _, err := svc.RotateSourceSecret(ctx, actor, "src_1", RotateSourceSecretRequest{NewSecret: "next", GracePeriodHours: 1, Reason: "rotate"}) + return err + }}, + {name: "endpoints", run: func(ctx context.Context) error { + if _, _, err := svc.CreateEndpoint(ctx, actor, CreateEndpointRequest{Name: "receiver", URL: "https://receiver.example/hook"}); err != nil { + return err + } + if _, err := svc.ListEndpoints(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.TestEndpoint(ctx, actor, "end_1", TestEndpointRequest{Reason: "smoke"}); err != nil { + return err + } + _, err := svc.RotateEndpointSecret(ctx, actor, "end_1", RotateEndpointSecretRequest{GracePeriodHours: 1, Reason: "rotate"}) + return err + }}, + {name: "subscriptions and routes", run: func(ctx context.Context) error { + if _, err := svc.CreateSubscription(ctx, actor, CreateSubscriptionRequest{EndpointID: "end_1", EventTypes: []string{"invoice.paid"}}); err != nil { + return err + } + if _, err := svc.ListSubscriptions(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.CreateRoute(ctx, actor, CreateRouteRequest{Name: "route", SourceID: "src_1", EndpointID: "end_1", EventTypes: []string{"invoice.paid"}}); err != nil { + return err + } + if _, err := svc.ListRoutes(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.ListRouteVersions(ctx, actor, "rte_1", 10); err != nil { + return err + } + if _, err := svc.ActivateRoute(ctx, actor, "rte_1", "publish"); err != nil { + return err + } + _, err := svc.DryRunRoute(ctx, actor, "rte_1", "evt_1") + return err + }}, + {name: "retry policies", run: func(ctx context.Context) error { + if _, err := svc.CreateRetryPolicy(ctx, actor, CreateRetryPolicyRequest{Name: "standard", MaxAttempts: 3, MaxDurationSeconds: 3600, InitialDelaySeconds: 1, MaxDelaySeconds: 60}); err != nil { + return err + } + if _, err := svc.ListRetryPolicies(ctx, actor, 10); err != nil { + return err + } + _, err := svc.DeleteRetryPolicy(ctx, actor, "rtp_1", StateChangeRequest{Reason: "retire"}) + return err + }}, + {name: "event schemas", run: func(ctx context.Context) error { + if _, err := svc.CreateEventType(ctx, actor, CreateEventTypeRequest{Name: "invoice.paid", Description: "Invoice paid"}); err != nil { + return err + } + if _, err := svc.ListEventTypes(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.CreateEventSchema(ctx, actor, "invoice.paid", CreateEventSchemaRequest{Version: "2026-05-01", Schema: `{"type":"object"}`}); err != nil { + return err + } + if _, err := svc.ListEventSchemas(ctx, actor, "invoice.paid", 10); err != nil { + return err + } + if _, err := svc.ValidateEventSchema(ctx, actor, "invoice.paid", "2026-05-01", ValidateSchemaRequest{Payload: `{"id":"evt_1"}`}); err != nil { + return err + } + if _, err := svc.CheckEventSchemaCompatibility(ctx, actor, "invoice.paid", "2026-05-01", CheckSchemaCompatibilityRequest{NewSchema: `{"type":"object"}`}); err != nil { + return err + } + _, err := svc.DeleteEventSchema(ctx, actor, "invoice.paid", "2026-05-01", StateChangeRequest{Reason: "retire"}) + return err + }}, + {name: "events and deliveries", run: func(ctx context.Context) error { + if _, err := svc.ListEvents(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.ListEventTimeline(ctx, actor, "evt_1", 10); err != nil { + return err + } + if _, err := svc.ListDeliveries(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.ListDeliveryAttempts(ctx, actor, "del_1", 10); err != nil { + return err + } + if _, err := svc.GetDeliveryAttempt(ctx, actor, "att_1"); err != nil { + return err + } + if _, err := svc.RetryDelivery(ctx, actor, "del_1", "retry"); err != nil { + return err + } + _, err := svc.CancelDelivery(ctx, actor, "del_1", StateChangeRequest{Reason: "cancel"}) + return err + }}, + {name: "ops and signals", run: func(ctx context.Context) error { + if _, err := svc.ListEndpointHealth(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.OpsMetrics(ctx, actor); err != nil { + return err + } + if _, err := svc.PublicOpsMetrics(ctx); err != nil { + return err + } + if _, err := svc.ListWorkers(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.GetWorker(ctx, actor, "wrk_1"); err != nil { + return err + } + if _, err := svc.ListAlertRules(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.GetAlertRule(ctx, actor, "alr_1"); err != nil { + return err + } + if _, err := svc.UpdateAlertRule(ctx, actor, "alr_1", UpdateAlertRuleRequest{Name: ptrString("latency"), State: &active, Reason: "tune"}); err != nil { + return err + } + if _, err := svc.ListAlertFirings(ctx, actor, domain.AlertFiringOpen, 10); err != nil { + return err + } + if _, err := svc.GetAlertFiring(ctx, actor, "afr_1"); err != nil { + return err + } + if _, _, err := svc.CreateNotificationChannel(ctx, actor, CreateNotificationChannelRequest{Name: "pager", ChannelType: domain.NotificationChannelWebhook, URL: "https://signals.example/hook", SigningSecret: "0123456789abcdef"}); err != nil { + return err + } + if _, err := svc.ListNotificationChannels(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.GetNotificationChannel(ctx, actor, "nch_1"); err != nil { + return err + } + if _, _, err := svc.UpdateNotificationChannel(ctx, actor, "nch_1", UpdateNotificationChannelRequest{State: &disabled, Reason: "pause"}); err != nil { + return err + } + if _, err := svc.DeleteNotificationChannel(ctx, actor, "nch_1", StateChangeRequest{Reason: "retire"}); err != nil { + return err + } + if _, _, err := svc.CreateSIEMSink(ctx, actor, CreateSIEMSinkRequest{Name: "siem", SinkType: domain.SIEMSinkWebhook, URL: "https://siem.example/ingest", SigningSecret: "0123456789abcdef"}); err != nil { + return err + } + if _, err := svc.ListSIEMSinks(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.GetSIEMSink(ctx, actor, "snk_1"); err != nil { + return err + } + _, _, err := svc.UpdateSIEMSink(ctx, actor, "snk_1", UpdateSIEMSinkRequest{State: &disabled, Reason: "pause"}) + return err + }}, + {name: "audit and replay", run: func(ctx context.Context) error { + if _, err := svc.ListAuditEvents(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.GetAuditChainHead(ctx, actor); err != nil { + return err + } + if _, err := svc.ListAuditChainAnchors(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.GetAuditChainAnchor(ctx, actor, "anc_1"); err != nil { + return err + } + if _, err := svc.ListRetentionPolicies(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.ListReplayJobs(ctx, actor, 10); err != nil { + return err + } + if _, err := svc.DryRunReplay(ctx, actor, ReplayRequest{EventID: "evt_1", ReasonCode: ReplayReasonOperatorRequested, Reason: "investigate"}); err != nil { + return err + } + if _, err := svc.PauseReplayJob(ctx, actor, "rpl_1", StateChangeRequest{Reason: "pause"}); err != nil { + return err + } + if _, err := svc.ResumeReplayJob(ctx, actor, "rpl_1", StateChangeRequest{Reason: "resume"}); err != nil { + return err + } + _, err := svc.CancelReplayJob(ctx, actor, "rpl_1", StateChangeRequest{Reason: "cancel"}) + return err + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.run(context.Background()); err != nil { + t.Fatal(err) + } + }) + } +} + func TestControlServiceSchemaLifecycleRequiresSchemasWrite(t *testing.T) { store := &fakeControlStore{} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) @@ -113,6 +438,23 @@ func TestControlServiceScopesSourceReadsToActorTenant(t *testing.T) { } } +func TestControlServiceUsesCentralAuthorizationForSourceRead(t *testing.T) { + store := &policyDecisionStore{decision: authz.Decision{Allowed: false, Reason: "denied by access policy"}} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"*"}} + + _, err := svc.GetSource(context.Background(), actor, "src_123") + if !errors.Is(err, ErrForbidden) { + t.Fatalf("expected policy deny to forbid source read, got %v", err) + } + if store.sourceTenantID != "" { + t.Fatal("source store must not be called after policy deny") + } + if store.lastTenantID != "ten_a" || store.lastActorID != "usr_1" || store.lastReq.Action != "sources:read" || store.lastReq.ResourceFamily != "source" || store.lastReq.ResourceID != "src_123" { + t.Fatalf("unexpected authorization request: tenant=%q actor=%q req=%+v", store.lastTenantID, store.lastActorID, store.lastReq) + } +} + func TestControlServiceSourceMutationRequiresSourcesWrite(t *testing.T) { store := &fakeControlStore{} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) @@ -380,12 +722,40 @@ func TestControlServiceRequiresRawPayloadScope(t *testing.T) { svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"events:read"}} - _, err := svc.GetRawPayload(context.Background(), actor, "evt_123") + _, err := svc.GetRawPayload(context.Background(), actor, "evt_123", "support investigation") if err != ErrForbidden { t.Fatalf("expected forbidden raw payload access, got %v", err) } } +func TestControlServiceRequiresRawPayloadReason(t *testing.T) { + store := &fakeControlStore{} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"events:raw"}} + + _, err := svc.GetRawPayload(context.Background(), actor, "evt_123", " ") + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected invalid input for missing raw payload reason, got %v", err) + } + if store.rawPayloadTenantID != "" { + t.Fatalf("raw payload store called before reason validation: %+v", store) + } +} + +func TestControlServicePassesRawPayloadReasonToStore(t *testing.T) { + store := &fakeControlStore{} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"events:raw"}} + + _, err := svc.GetRawPayload(context.Background(), actor, " evt_123 ", " support case review ") + if err != nil { + t.Fatal(err) + } + if store.rawPayloadTenantID != "ten_a" || store.rawPayloadEventID != "evt_123" || store.rawPayloadActorID != "usr_1" || store.rawPayloadReason != "support case review" { + t.Fatalf("raw payload store context mismatch: %+v", store) + } +} + func TestControlServiceNormalizedEventDataRequiresRawScope(t *testing.T) { store := &fakeControlStore{} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) @@ -630,14 +1000,36 @@ func TestControlServiceReplayValidatesConfigModeAndRate(t *testing.T) { svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"replay:write", "replay:read"}} - _, err := svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", Reason: "repair", ConfigMode: "future"}) + _, err := svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", ReasonCode: ReplayReasonReceiverFixed, Reason: "repair", ConfigMode: "future"}) if !errors.Is(err, ErrInvalidInput) { t.Fatalf("expected invalid config mode, got %v", err) } - _, err = svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", Reason: "repair", ConfigMode: ReplayConfigOriginal, RateLimitPerMinute: -1}) + _, err = svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", ReasonCode: ReplayReasonReceiverFixed, Reason: "repair", ConfigMode: ReplayConfigOriginal, RateLimitPerMinute: -1}) if !errors.Is(err, ErrInvalidInput) { t.Fatalf("expected invalid rate limit, got %v", err) } + _, err = svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", Reason: "repair", ConfigMode: ReplayConfigOriginal}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected missing reason code rejection, got %v", err) + } + _, err = svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", ReasonCode: "because", Reason: "repair", ConfigMode: ReplayConfigOriginal}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected invalid reason code rejection, got %v", err) + } + approvalExpiresAt := time.Now().UTC().Add(time.Hour) + _, err = svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", ReasonCode: ReplayReasonReceiverFixed, Reason: "repair", ApprovalExpiresAt: &approvalExpiresAt}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected approval expiry without approval rejection, got %v", err) + } + expiredApproval := time.Now().UTC().Add(-time.Hour) + _, err = svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", ReasonCode: ReplayReasonReceiverFixed, Reason: "repair", RequireApproval: true, ApprovalExpiresAt: &expiredApproval}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected expired approval window rejection, got %v", err) + } + _, err = svc.DryRunReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", Reason: "repair", ConfigMode: ReplayConfigOriginal}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected dry-run missing reason code rejection, got %v", err) + } } func TestControlServiceReplayApprovalValidationAndTenantScope(t *testing.T) { @@ -645,13 +1037,19 @@ func TestControlServiceReplayApprovalValidationAndTenantScope(t *testing.T) { svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"replay:write", "replay:read"}} - job, err := svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", Reason: "repair", RequireApproval: true}) + job, err := svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_1", ReasonCode: ReplayReasonIncidentRecovery, Reason: "repair", RequireApproval: true}) if err != nil { t.Fatalf("expected replay creation to succeed, got %v", err) } if !store.replayReq.RequireApproval || !job.ApprovalRequired { t.Fatalf("expected approval requirement to propagate, req=%+v job=%+v", store.replayReq, job) } + if store.replayReq.ApprovalExpiresAt == nil || job.ApprovalExpiresAt == nil { + t.Fatalf("expected approval expiry to default and propagate, req=%+v job=%+v", store.replayReq, job) + } + if store.replayReq.ReasonCode != ReplayReasonIncidentRecovery || job.ReasonCode != ReplayReasonIncidentRecovery || job.Reason != "repair" { + t.Fatalf("expected reason code and reason to propagate, req=%+v job=%+v", store.replayReq, job) + } _, err = svc.ApproveReplayJob(context.Background(), actor, "rpl_1", StateChangeRequest{}) if !errors.Is(err, ErrInvalidInput) { @@ -664,15 +1062,63 @@ func TestControlServiceReplayApprovalValidationAndTenantScope(t *testing.T) { t.Fatalf("expected replay write permission requirement, got %v", err) } - _, err = svc.ApproveReplayJob(context.Background(), actor, "rpl_1", StateChangeRequest{Reason: "approved"}) + approver := authz.Actor{ID: "usr_3", TenantID: "ten_a", Role: authz.RoleDeveloper, Scopes: []string{"replay:write", "replay:read"}} + _, err = svc.ApproveReplayJob(context.Background(), approver, "rpl_1", StateChangeRequest{Reason: "approved"}) if err != nil { t.Fatalf("expected approval to succeed, got %v", err) } - if store.approveReplayTenantID != "ten_a" || store.approveReplayActorID != "usr_1" || store.approveReplayReason != "approved" { + if store.approveReplayTenantID != "ten_a" || store.approveReplayActorID != "usr_3" || store.approveReplayReason != "approved" { t.Fatalf("approval was not tenant-scoped with reason: tenant=%q actor=%q reason=%q", store.approveReplayTenantID, store.approveReplayActorID, store.approveReplayReason) } } +func TestControlServiceReplayApprovalPoliciesValidateAndScope(t *testing.T) { + store := &fakeControlStore{} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + admin := authz.Actor{ID: "usr_sec", TenantID: "ten_a", Role: authz.RoleSecurity, Scopes: []string{"security:write", "security:read"}} + + policy, err := svc.CreateReplayApprovalPolicy(context.Background(), admin, CreateReplayApprovalPolicyRequest{ScopeType: ReplayApprovalScopeRoute, ScopeID: "rte_1", Reason: "sensitive route"}) + if err != nil { + t.Fatalf("expected policy creation to succeed, got %v", err) + } + if policy.TenantID != "ten_a" || policy.ScopeType != ReplayApprovalScopeRoute || policy.ScopeID != "rte_1" || !policy.RequireApproval || policy.DefaultExpirySeconds != int(ReplayApprovalDefaultExpiry/time.Second) { + t.Fatalf("policy was not normalized or tenant-scoped: %+v", policy) + } + if store.replayApprovalPolicyReq.ScopeType != ReplayApprovalScopeRoute || store.replayApprovalPolicyReq.ScopeID != "rte_1" || store.replayApprovalPolicyActorID != "usr_sec" { + t.Fatalf("policy request was not propagated: %+v actor=%q", store.replayApprovalPolicyReq, store.replayApprovalPolicyActorID) + } + + _, err = svc.CreateReplayApprovalPolicy(context.Background(), admin, CreateReplayApprovalPolicyRequest{ScopeType: ReplayApprovalScopeSource, Reason: "missing source"}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected source scope_id validation, got %v", err) + } + _, err = svc.CreateReplayApprovalPolicy(context.Background(), admin, CreateReplayApprovalPolicyRequest{ScopeType: ReplayApprovalScopeTenant, DefaultExpirySeconds: 60, Reason: "too short"}) + if !errors.Is(err, ErrInvalidInput) { + t.Fatalf("expected default expiry bounds validation, got %v", err) + } + + readOnly := authz.Actor{ID: "usr_read", TenantID: "ten_a", Role: authz.RoleAuditor, Scopes: []string{"security:read"}} + _, err = svc.CreateReplayApprovalPolicy(context.Background(), readOnly, CreateReplayApprovalPolicyRequest{ScopeType: ReplayApprovalScopeTenant, Reason: "no write"}) + if err != ErrForbidden { + t.Fatalf("expected security write requirement, got %v", err) + } + + if _, err := svc.ListReplayApprovalPolicies(context.Background(), readOnly, 10); err != nil { + t.Fatalf("expected security read list to succeed, got %v", err) + } + if store.replayApprovalPolicyTenantID != "ten_a" { + t.Fatalf("policy list was not tenant-scoped: %q", store.replayApprovalPolicyTenantID) + } + + disabled, err := svc.DisableReplayApprovalPolicy(context.Background(), admin, "rap_1", StateChangeRequest{Reason: "route is no longer sensitive"}) + if err != nil { + t.Fatalf("expected policy disable to succeed, got %v", err) + } + if disabled.State != domain.StateDisabled || store.replayApprovalPolicyID != "rap_1" || store.replayApprovalPolicyReason != "route is no longer sensitive" { + t.Fatalf("policy disable was not propagated: policy=%+v store=%+v", disabled, store) + } +} + func TestControlServiceSecretRotationRequiresSecurityWrite(t *testing.T) { store := &fakeControlStore{} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) @@ -721,6 +1167,166 @@ func TestControlServiceProducerClientsRequireSecurityWriteAndRedactSecrets(t *te } } +func TestControlServiceUsesCentralAuthorizationForProducerSecretRotation(t *testing.T) { + store := &policyDecisionStore{decision: authz.Decision{Allowed: false, Reason: "denied by access policy"}} + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"*"}} + + _, err := svc.RotateProducerClientSecret(context.Background(), actor, "pcl_1", RotateProducerClientSecretRequest{Reason: "rotate compromised secret"}) + if !errors.Is(err, ErrForbidden) { + t.Fatalf("expected policy deny to forbid producer secret rotation, got %v", err) + } + if store.producerClientTenantID != "" || store.producerClientReason != "" { + t.Fatal("producer client store must not be called after policy deny") + } + if store.lastReq.Action != "security:write" || store.lastReq.ResourceFamily != "producer_client" || store.lastReq.ResourceID != "pcl_1" { + t.Fatalf("unexpected authorization request: %+v", store.lastReq) + } +} + +func TestControlServiceResourcePoliciesDenySensitiveOperations(t *testing.T) { + cases := []struct { + name string + run func(*ControlService, authz.Actor) error + wantAction string + wantFamily string + wantID string + wantEnv string + }{ + { + name: "raw payload read", wantAction: "events:raw", wantFamily: "event", wantID: "evt_raw", + run: func(svc *ControlService, actor authz.Actor) error { + _, err := svc.GetRawPayload(context.Background(), actor, "evt_raw", "support investigation") + return err + }, + }, + { + name: "replay creation", wantAction: "replay:write", wantFamily: "replay", wantID: "evt_replay", + run: func(svc *ControlService, actor authz.Actor) error { + _, err := svc.CreateReplay(context.Background(), actor, ReplayRequest{EventID: "evt_replay", ReasonCode: ReplayReasonSupportInvestigation, Reason: "investigate"}) + return err + }, + }, + { + name: "audit export payload inclusion", wantAction: "events:raw", wantFamily: "audit_export", + run: func(svc *ControlService, actor authz.Actor) error { + _, err := svc.CreateAuditExport(context.Background(), actor, CreateAuditExportRequest{IncludePayloadBodies: true, Reason: "export evidence"}) + return err + }, + }, + { + name: "endpoint production change", wantAction: "endpoints:write", wantFamily: "endpoint", wantID: "end_prod", wantEnv: "production", + run: func(svc *ControlService, actor authz.Actor) error { + _, _, err := svc.UpdateEndpoint(context.Background(), actor, "end_prod", UpdateEndpointRequest{Name: ptrString("prod receiver"), Reason: "rename"}) + return err + }, + }, + { + name: "notification mutation", wantAction: "ops:write", wantFamily: "notification_channel", wantID: "nch_1", + run: func(svc *ControlService, actor authz.Actor) error { + _, _, err := svc.UpdateNotificationChannel(context.Background(), actor, "nch_1", UpdateNotificationChannelRequest{Name: ptrString("ops"), Reason: "rename"}) + return err + }, + }, + { + name: "siem mutation", wantAction: "security:write", wantFamily: "siem_sink", wantID: "snk_1", + run: func(svc *ControlService, actor authz.Actor) error { + _, _, err := svc.UpdateSIEMSink(context.Background(), actor, "snk_1", UpdateSIEMSinkRequest{Name: ptrString("siem"), Reason: "rename"}) + return err + }, + }, + { + name: "secret rotation", wantAction: "security:write", wantFamily: "producer_client", wantID: "pcl_1", + run: func(svc *ControlService, actor authz.Actor) error { + _, err := svc.RotateProducerClientSecret(context.Background(), actor, "pcl_1", RotateProducerClientSecretRequest{Reason: "rotate"}) + return err + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + store := &policyDecisionStore{ + decide: func(tenantID, _ string, req AuthzExplainRequest) (authz.Decision, error) { + return testAuthorizationDecision(tenantID, req, req.Action == "audit:read"), nil + }, + } + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{"receiver.example": {netip.MustParseAddr("93.184.216.34")}}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"*"}} + + err := tc.run(svc, actor) + if !errors.Is(err, ErrForbidden) { + t.Fatalf("expected forbidden from policy deny, got %v", err) + } + if store.lastReq.Action != tc.wantAction || store.lastReq.ResourceFamily != tc.wantFamily || store.lastReq.ResourceID != tc.wantID || store.lastReq.Environment != tc.wantEnv { + t.Fatalf("unexpected authorization request: got %+v want action=%q family=%q id=%q env=%q", store.lastReq, tc.wantAction, tc.wantFamily, tc.wantID, tc.wantEnv) + } + }) + } +} + +func TestControlServiceResourcePolicyAllowsBindingAndPreservesScopeLimit(t *testing.T) { + store := &policyDecisionStore{ + decide: func(tenantID, _ string, req AuthzExplainRequest) (authz.Decision, error) { + return testAuthorizationDecision(tenantID, req, true), nil + }, + } + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{"receiver.example": {netip.MustParseAddr("93.184.216.34")}}}) + boundActor := authz.Actor{ID: "usr_binding", TenantID: "ten_a", Role: authz.RoleSupport, Scopes: []string{"endpoints:write"}} + rawURL := "https://receiver.example/hook" + + _, result, err := svc.UpdateEndpoint(context.Background(), boundActor, "end_1", UpdateEndpointRequest{URL: &rawURL, Reason: "resource binding allows endpoint change"}) + if err != nil { + t.Fatalf("expected resource binding allow, got %v", err) + } + if !result.Allowed || store.endpointTenantID != "ten_a" || store.endpointID != "end_1" { + t.Fatalf("expected tenant-scoped endpoint update after binding allow, result=%+v tenant=%q endpoint=%q", result, store.endpointTenantID, store.endpointID) + } + + scopeLimited := authz.Actor{ID: "usr_limited", TenantID: "ten_a", Role: authz.RoleOwner, Scopes: []string{"events:read"}} + _, err = svc.GetRawPayload(context.Background(), scopeLimited, "evt_1", "support investigation") + if !errors.Is(err, ErrForbidden) { + t.Fatalf("expected actor scope to limit enterprise allow, got %v", err) + } +} + +func TestControlServiceExplainAuthorizationOmitsSensitiveAttributes(t *testing.T) { + store := &policyDecisionStore{ + decide: func(tenantID, _ string, req AuthzExplainRequest) (authz.Decision, error) { + decision := testAuthorizationDecision(tenantID, req, true) + decision.Resource.Attributes = req.Attributes + return decision, nil + }, + } + svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) + actor := authz.Actor{ID: "usr_1", TenantID: "ten_a", Role: authz.RoleSecurity, Scopes: []string{"security:read"}} + decision, err := svc.ExplainAuthorization(context.Background(), actor, AuthzExplainRequest{ + Action: "events:raw", + ResourceFamily: "event", + ResourceID: "evt_1", + Attributes: map[string]string{ + "payload_body": `{"token":"payload-secret-value"}`, + "session_token": "sess_secret_value", + "provider_token": "ghp_secret_value", + "webhook_secret": "whsec_secret_value", + }, + }) + if err != nil { + t.Fatal(err) + } + if len(decision.Resource.Attributes) != 0 { + t.Fatalf("expected explain response attributes to be omitted, got %+v", decision.Resource.Attributes) + } + raw, err := json.Marshal(decision) + if err != nil { + t.Fatal(err) + } + for _, secret := range []string{"payload-secret-value", "sess_secret_value", "ghp_secret_value", "whsec_secret_value"} { + if strings.Contains(string(raw), secret) { + t.Fatalf("explain output leaked sensitive value %q: %s", secret, raw) + } + } +} + func TestControlServiceProducerMTLSIdentitiesValidateCertAndScopeTenant(t *testing.T) { store := &fakeControlStore{} svc := NewControlService(store, ssrf.Validator{Resolver: ssrf.StaticResolver{}}) @@ -1288,61 +1894,124 @@ func testClientCertificatePEM(t *testing.T, commonName string) (string, string) } type fakeControlStore struct { - eventTenantID string - auditExportTenantID string - auditExport domain.EvidenceExport - auditExportDownloaded bool - auditExports []domain.EvidenceExport - apiKeyInput APIKeyCreateInput - producerClientTenantID string - producerClientActorID string - producerClientReason string - producerClientInput ProducerClientCreateInput - producerMTLSTenantID string - producerMTLSActorID string - producerMTLSIdentity domain.ProducerMTLSIdentity - eventSchema domain.EventSchema - schemaTenantID string - schemaReason string - retryPolicyTenantID string - retryPolicyID string - retryPolicyReq UpdateRetryPolicyRequest - normalizedTenantID string - normalizedMetadataOnly bool - sourceTenantID string - sourceID string - sourceReason string - endpointTenantID string - endpointID string - endpointReason string - subscriptionTenantID string - subscriptionID string - subscriptionReason string - subscription domain.Subscription - routeTenantID string - routeID string - routeReason string - route domain.Route - transformationTenantID string - providerConnectionTenantID string - providerConnectionReq CreateProviderConnectionRequest - adapterTenantID string - adapterActorID string - adapterVersionReq CreateAdapterVersionRequest - reconciliationTenantID string - opsTenantID string - metricName string - alertTenantID string - alertActorID string - notificationTenantID string - notificationActorID string - siemTenantID string - siemActorID string - replayReq ReplayRequest - approveReplayTenantID string - approveReplayActorID string - approveReplayReason string - endpoint domain.Endpoint + eventTenantID string + eventSearchTenantID string + eventSearchReq EventSearchRequest + incidentTenantID string + incidentActorID string + incidentID string + incidentEventID string + incidentReason string + auditExportTenantID string + auditExport domain.EvidenceExport + auditExportDownloaded bool + auditExports []domain.EvidenceExport + apiKeyInput APIKeyCreateInput + producerClientTenantID string + producerClientActorID string + producerClientReason string + producerClientInput ProducerClientCreateInput + producerMTLSTenantID string + producerMTLSActorID string + producerMTLSIdentity domain.ProducerMTLSIdentity + eventSchema domain.EventSchema + schemaTenantID string + schemaReason string + retryPolicyTenantID string + retryPolicyID string + retryPolicyReq UpdateRetryPolicyRequest + normalizedTenantID string + normalizedMetadataOnly bool + rawPayloadTenantID string + rawPayloadEventID string + rawPayloadActorID string + rawPayloadReason string + sourceTenantID string + sourceID string + sourceReason string + endpointTenantID string + endpointID string + endpointReason string + subscriptionTenantID string + subscriptionID string + subscriptionReason string + subscription domain.Subscription + routeTenantID string + routeID string + routeReason string + route domain.Route + transformationTenantID string + providerConnectionTenantID string + providerConnectionReq CreateProviderConnectionRequest + adapterTenantID string + adapterActorID string + adapterVersionReq CreateAdapterVersionRequest + reconciliationTenantID string + opsTenantID string + metricName string + alertTenantID string + alertActorID string + notificationTenantID string + notificationActorID string + siemTenantID string + siemActorID string + replayReq ReplayRequest + approveReplayTenantID string + approveReplayActorID string + approveReplayReason string + replayApprovalPolicyTenantID string + replayApprovalPolicyActorID string + replayApprovalPolicyID string + replayApprovalPolicyReason string + replayApprovalPolicyReq CreateReplayApprovalPolicyRequest + endpoint domain.Endpoint +} + +type policyDecisionStore struct { + enterpriseFakeStore + decision authz.Decision + err error + decide func(tenantID, actorID string, req AuthzExplainRequest) (authz.Decision, error) + lastTenantID string + lastActorID string + lastReq AuthzExplainRequest + calls []AuthzExplainRequest +} + +func (s *policyDecisionStore) ExplainAuthorization(_ context.Context, tenantID, actorID string, req AuthzExplainRequest) (authz.Decision, error) { + s.lastTenantID = tenantID + s.lastActorID = actorID + s.lastReq = req + s.calls = append(s.calls, req) + if s.err != nil { + return authz.Decision{}, s.err + } + if s.decide != nil { + return s.decide(tenantID, actorID, req) + } + return s.decision, nil +} + +func testAuthorizationDecision(tenantID string, req AuthzExplainRequest, allowed bool) authz.Decision { + decision := authz.Decision{ + Allowed: allowed, + Action: req.Action, + Resource: authz.Resource{ + TenantID: tenantID, + Family: req.ResourceFamily, + ID: req.ResourceID, + Environment: req.Environment, + }, + RequiredScopes: []string{req.Action}, + } + if allowed { + decision.Reason = "allowed by resource role binding" + decision.MatchedRoleBindingID = "rb_1" + return decision + } + decision.Reason = "denied by access policy" + decision.MatchedPolicyRuleID = "pol_1" + return decision } func (f *fakeControlStore) CreateAPIKey(_ context.Context, input APIKeyCreateInput) (domain.APIKey, error) { @@ -1684,23 +2353,117 @@ func (f *fakeControlStore) RotateSourceSecret(context.Context, string, string, s func (f *fakeControlStore) RotateEndpointSecret(context.Context, string, string, string, RotateEndpointSecretRequest) (domain.EndpointSecretVersion, error) { return domain.EndpointSecretVersion{}, nil } -func (f *fakeControlStore) ListEvents(context.Context, string, int) ([]domain.Event, error) { +func (f *fakeControlStore) ListEvents(_ context.Context, tenantID string, req EventSearchRequest) ([]domain.Event, error) { + f.eventSearchTenantID = tenantID + f.eventSearchReq = req return nil, nil } func (f *fakeControlStore) GetEvent(_ context.Context, tenantID, eventID string) (domain.Event, error) { f.eventTenantID = tenantID - return domain.Event{ID: eventID, TenantID: tenantID}, nil + return domain.Event{ + ID: eventID, + TenantID: tenantID, + SourceID: "src_1", + Provider: "stripe", + Type: "invoice.paid", + ProviderID: "evt_provider_1", + RawPayloadID: "raw_1", + RawPayloadHash: "sha256:raw", + Verified: true, + VerifyReason: "valid_signature", + DedupeStatus: domain.DedupeUnique, + ReceivedAt: time.Unix(100, 0).UTC(), + }, nil } -func (f *fakeControlStore) GetRawPayload(context.Context, string, string, string) (domain.RawPayload, error) { - return domain.RawPayload{}, nil +func (f *fakeControlStore) GetRawPayload(_ context.Context, tenantID, eventID, actorID, reason string) (domain.RawPayload, error) { + f.rawPayloadTenantID = tenantID + f.rawPayloadEventID = eventID + f.rawPayloadActorID = actorID + f.rawPayloadReason = reason + return domain.RawPayload{ID: "raw_1", TenantID: tenantID, EventID: eventID}, nil } func (f *fakeControlStore) GetNormalizedEvent(_ context.Context, tenantID, eventID, actorID string, includeData bool) (domain.NormalizedEnvelope, error) { f.normalizedTenantID = tenantID f.normalizedMetadataOnly = !includeData return domain.NormalizedEnvelope{ID: "nenv_1", TenantID: tenantID, EventID: eventID}, nil } -func (f *fakeControlStore) ListEventTimeline(context.Context, string, string, int) ([]map[string]any, error) { - return nil, nil +func (f *fakeControlStore) ListEventTimeline(context.Context, string, string, int) ([]EventTimelineEntry, error) { + return []EventTimelineEntry{ + {SchemaVersion: EventTimelineSchemaV1, Sequence: 1, Kind: "event", RefID: "evt_1", State: "unique", Detail: "valid_signature", OccurredAt: time.Unix(100, 0).UTC()}, + {SchemaVersion: EventTimelineSchemaV1, Sequence: 2, Kind: "delivery", RefID: "del_1", State: "failed", Detail: "route_version=rtv_1 subscription_version=none retry_policy=rtp_1", OccurredAt: time.Unix(101, 0).UTC()}, + {SchemaVersion: EventTimelineSchemaV1, Sequence: 3, Kind: "attempt", RefID: "att_1", State: "failed", Detail: "network_error retryable=true retry_delay_ms=1000", OccurredAt: time.Unix(102, 0).UTC()}, + {SchemaVersion: EventTimelineSchemaV1, Sequence: 4, Kind: "replay", RefID: "rpl_1", State: "completed", Detail: "reason_code=incident_recovery reason=receiver restored after DLQ config_mode=original event_id=evt_1", OccurredAt: time.Unix(102, 500000000).UTC()}, + {SchemaVersion: EventTimelineSchemaV1, Sequence: 5, Kind: "audit", RefID: "aud_1", State: "raw_payload.read", Detail: "operator reason", OccurredAt: time.Unix(103, 0).UTC()}, + }, nil +} +func (f *fakeControlStore) CreateIncident(_ context.Context, incident domain.Incident) (domain.Incident, error) { + f.incidentTenantID = incident.TenantID + f.incidentActorID = incident.CreatedBy + if incident.ID == "" { + incident.ID = "inc_1" + } + if incident.CreatedAt.IsZero() { + incident.CreatedAt = time.Unix(1, 0).UTC() + incident.UpdatedAt = incident.CreatedAt + } + return incident, nil +} +func (f *fakeControlStore) ListIncidents(_ context.Context, tenantID string, limit int) ([]domain.Incident, error) { + f.incidentTenantID = tenantID + return []domain.Incident{{ID: "inc_1", TenantID: tenantID, Title: "Stripe payment failed", State: domain.StateActive}}, nil +} +func (f *fakeControlStore) GetIncident(_ context.Context, tenantID, incidentID string) (domain.Incident, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + return domain.Incident{ID: incidentID, TenantID: tenantID, Title: "Stripe payment failed", Reason: "support case", State: domain.StateActive, CreatedBy: "usr_1", CreatedAt: time.Unix(1, 0).UTC()}, nil +} +func (f *fakeControlStore) AddIncidentEvent(_ context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + f.incidentEventID = eventID + f.incidentActorID = actorID + f.incidentReason = reason + return domain.IncidentEvent{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: eventID, AddedBy: actorID, Reason: reason, CreatedAt: time.Unix(2, 0).UTC()}, nil +} +func (f *fakeControlStore) RemoveIncidentEvent(_ context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + f.incidentEventID = eventID + f.incidentActorID = actorID + f.incidentReason = reason + return domain.IncidentEvent{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: eventID, AddedBy: actorID, Reason: reason, CreatedAt: time.Unix(2, 0).UTC()}, nil +} +func (f *fakeControlStore) ListIncidentEvents(_ context.Context, tenantID, incidentID string) ([]domain.IncidentEvent, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + eventID := f.incidentEventID + if eventID == "" { + eventID = "evt_1" + } + return []domain.IncidentEvent{{ID: "ine_1", TenantID: tenantID, IncidentID: incidentID, EventID: eventID, AddedBy: "usr_1", Reason: "investigate", CreatedAt: time.Unix(2, 0).UTC()}}, nil +} +func (f *fakeControlStore) CreateIncidentReportSnapshot(_ context.Context, tenantID, incidentID, actorID, reason string, report IncidentReport, markdown string) (domain.IncidentReportSnapshot, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + f.incidentActorID = actorID + f.incidentReason = reason + raw, err := json.Marshal(report) + if err != nil { + return domain.IncidentReportSnapshot{}, err + } + return domain.IncidentReportSnapshot{ID: "irs_1", TenantID: tenantID, IncidentID: incidentID, SchemaVersion: report.SchemaVersion, Report: raw, Markdown: markdown, GeneratedBy: actorID, GeneratedAt: report.GeneratedAt}, nil +} +func (f *fakeControlStore) GetIncidentReportSnapshot(_ context.Context, tenantID, incidentID string) (domain.IncidentReportSnapshot, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + return domain.IncidentReportSnapshot{ID: "irs_1", TenantID: tenantID, IncidentID: incidentID, SchemaVersion: incidentReportSchemaV1, Markdown: "incident report", GeneratedBy: "usr_1", GeneratedAt: time.Unix(3, 0).UTC()}, nil +} +func (f *fakeControlStore) CreateIncidentEvidenceExport(_ context.Context, tenantID, incidentID, actorID string, req CreateIncidentEvidenceExportRequest, report IncidentReport, markdown string) (domain.IncidentEvidenceExport, domain.EvidenceExport, error) { + f.incidentTenantID = tenantID + f.incidentID = incidentID + f.incidentActorID = actorID + f.incidentReason = req.Reason + return domain.IncidentEvidenceExport{ID: "iex_1", TenantID: tenantID, IncidentID: incidentID, ExportID: "exp_1", CreatedBy: actorID}, domain.EvidenceExport{ID: "exp_1", TenantID: tenantID, IncludeTimelines: true, CreatedBy: actorID}, nil } func (f *fakeControlStore) ListDeliveries(context.Context, string, int) ([]domain.Delivery, error) { return nil, nil @@ -1996,10 +2759,10 @@ func (f *fakeControlStore) DownloadAuditExport(_ context.Context, tenantID, expo func (f *fakeControlStore) ListDeadLetter(context.Context, string, int) ([]map[string]any, error) { return nil, nil } -func (f *fakeControlStore) ReleaseDeadLetter(context.Context, string, string, string, string) (ReplayJob, error) { +func (f *fakeControlStore) ReleaseDeadLetter(context.Context, string, string, string, string, string) (ReplayJob, error) { return ReplayJob{}, nil } -func (f *fakeControlStore) BulkReleaseDeadLetter(context.Context, string, []string, string, string) ([]ReplayJob, error) { +func (f *fakeControlStore) BulkReleaseDeadLetter(context.Context, string, []string, string, string, string) ([]ReplayJob, error) { return nil, nil } func (f *fakeControlStore) ListQuarantine(context.Context, string, int) ([]map[string]any, error) { @@ -2016,7 +2779,7 @@ func (f *fakeControlStore) DryRunReplay(context.Context, string, ReplayRequest) } func (f *fakeControlStore) CreateReplay(_ context.Context, tenantID, actorID string, req ReplayRequest) (ReplayJob, error) { f.replayReq = req - return ReplayJob{ID: "rpl_1", State: "pending_approval", ScopeHash: "sha256:abc", TotalItems: 1, ApprovalRequired: req.RequireApproval}, nil + return ReplayJob{ID: "rpl_1", State: "pending_approval", ScopeHash: "sha256:abc", ReasonCode: req.ReasonCode, Reason: req.Reason, TotalItems: 1, ApprovalRequired: req.RequireApproval, ApprovalExpiresAt: req.ApprovalExpiresAt}, nil } func (f *fakeControlStore) ListReplayJobs(context.Context, string, int) ([]ReplayJob, error) { return nil, nil @@ -2036,6 +2799,23 @@ func (f *fakeControlStore) ResumeReplayJob(context.Context, string, string, stri func (f *fakeControlStore) CancelReplayJob(context.Context, string, string, string, string) (ReplayJob, error) { return ReplayJob{}, nil } +func (f *fakeControlStore) CreateReplayApprovalPolicy(_ context.Context, tenantID, actorID string, req CreateReplayApprovalPolicyRequest) (domain.ReplayApprovalPolicy, error) { + f.replayApprovalPolicyTenantID = tenantID + f.replayApprovalPolicyActorID = actorID + f.replayApprovalPolicyReq = req + return domain.ReplayApprovalPolicy{ID: "rap_1", TenantID: tenantID, ScopeType: req.ScopeType, ScopeID: req.ScopeID, RequireApproval: req.RequireApproval, DefaultExpirySeconds: req.DefaultExpirySeconds, State: domain.StateActive, Reason: req.Reason, CreatedBy: actorID}, nil +} +func (f *fakeControlStore) ListReplayApprovalPolicies(_ context.Context, tenantID string, limit int) ([]domain.ReplayApprovalPolicy, error) { + f.replayApprovalPolicyTenantID = tenantID + return []domain.ReplayApprovalPolicy{{ID: "rap_1", TenantID: tenantID, ScopeType: ReplayApprovalScopeTenant, RequireApproval: true, DefaultExpirySeconds: int(ReplayApprovalDefaultExpiry / time.Second), State: domain.StateActive}}, nil +} +func (f *fakeControlStore) DisableReplayApprovalPolicy(_ context.Context, tenantID, policyID, actorID, reason string) (domain.ReplayApprovalPolicy, error) { + f.replayApprovalPolicyTenantID = tenantID + f.replayApprovalPolicyActorID = actorID + f.replayApprovalPolicyID = policyID + f.replayApprovalPolicyReason = reason + return domain.ReplayApprovalPolicy{ID: policyID, TenantID: tenantID, ScopeType: ReplayApprovalScopeTenant, RequireApproval: true, DefaultExpirySeconds: int(ReplayApprovalDefaultExpiry / time.Second), State: domain.StateDisabled, CreatedBy: actorID}, nil +} func (f *fakeControlStore) CreateTransformation(_ context.Context, tenantID, actorID string, req CreateTransformationRequest) (domain.Transformation, error) { f.transformationTenantID = tenantID return domain.Transformation{ID: "trn_1", TenantID: tenantID, Name: req.Name, CreatedBy: actorID}, nil diff --git a/internal/app/delivery_fanout.go b/internal/app/delivery_fanout.go new file mode 100644 index 0000000..ae9232a --- /dev/null +++ b/internal/app/delivery_fanout.go @@ -0,0 +1,360 @@ +package app + +import ( + "context" + "time" + + "webhookery/internal/domain" + "webhookery/internal/worker" +) + +const ( + OutboxKindRouteEvent = "route_event" + OutboxKindRouteRecoveredEvent = "route_recovered_event" + OutboxKindReplayJob = "replay_job" + OutboxKindReconciliationJob = "reconciliation_job" +) + +type DeliveryFanoutStore interface { + GetEvent(ctx context.Context, tenantID, eventID string) (domain.Event, error) + ListDeliveryFanoutTargets(ctx context.Context, tenantID, sourceID, eventType string) ([]DeliveryFanoutTarget, error) + CreateDeliverySnapshot(ctx context.Context, req DeliverySnapshotRequest) (DeliverySnapshotResult, error) + GetReplayJobWork(ctx context.Context, tenantID, replayJobID string) (ReplayJobWork, error) + StartReplayJob(ctx context.Context, tenantID, replayJobID string) (bool, error) + ListOriginalDeliveryReplaySources(ctx context.Context, tenantID, eventID string) ([]DeliveryReplaySource, error) + GetDeliveryReplaySource(ctx context.Context, tenantID, deliveryID string) (DeliveryReplaySource, error) + GetCurrentDeliveryFanoutTarget(ctx context.Context, tenantID, routeID, subscriptionID string) (DeliveryFanoutTarget, bool, error) + InsertReplayNoopItem(ctx context.Context, tenantID, replayJobID, eventID, configMode, errorText string) error + CompleteReplayJob(ctx context.Context, tenantID, replayJobID string, processedItems int) error +} + +type DeliveryFanoutService struct { + store DeliveryFanoutStore + clock Clock +} + +func NewDeliveryFanoutService(store DeliveryFanoutStore, clock Clock) *DeliveryFanoutService { + if clock == nil { + clock = SystemClock{} + } + return &DeliveryFanoutService{store: store, clock: clock} +} + +type DeliveryFanoutTarget struct { + EndpointID string + RouteID string + RouteVersionID string + SubscriptionID string + SubscriptionVersionID string + RouteRetryPolicyID string + EndpointRetryPolicyID string + TransformationVersionID string +} + +func (t DeliveryFanoutTarget) retryPolicyID() string { + return firstNonEmpty(t.RouteRetryPolicyID, t.EndpointRetryPolicyID) +} + +type DeliveryPayloadMode string + +const ( + DeliveryPayloadMaterialize DeliveryPayloadMode = "materialize" + DeliveryPayloadClone DeliveryPayloadMode = "clone" +) + +type DeliverySnapshotRequest struct { + TenantID string + EventID string + EndpointID string + RouteID string + RouteVersionID string + SubscriptionID string + SubscriptionVersionID string + RetryPolicyID string + ReplayJobID string + OriginalDeliveryID string + AdapterVersionID string + NormalizedEnvelopeID string + TransformationVersionID string + DeliveryPayloadMode DeliveryPayloadMode + SourceDeliveryPayloadID string + RetrySeed string + NextAttemptAt time.Time + ConfigMode string +} + +type DeliverySnapshotResult struct { + DeliveryID string + DeliveryPayloadID string + DeliveryPayloadSHA256 string + AdapterVersionID string + NormalizedEnvelopeID string + TransformationVersionID string +} + +type DeliveryReplaySource struct { + ID string + EventID string + EndpointID string + RouteID string + RouteVersionID string + SubscriptionID string + SubscriptionVersionID string + RetryPolicyID string + AdapterVersionID string + NormalizedEnvelopeID string + TransformationVersionID string + DeliveryPayloadID string +} + +type ReplayDecisionEvidence struct { + TenantID string + ReplayJobID string + EventID string + OriginalDeliveryID string + NewDeliveryID string + ConfigMode string + RouteVersionID string + SubscriptionVersionID string + RetryPolicyID string + AdapterVersionID string + NormalizedEnvelopeID string + TransformationVersionID string + DeliveryPayloadID string + DeliveryPayloadSHA256 string +} + +type ReplayJobWork struct { + Request ReplayRequest + State string + ConfigMode string + RateLimitPerMinute int +} + +type DeliveryFanoutOptions struct { + ReplayJobID string + ConfigMode string + RateLimitPerMinute int + AllowRecovered bool +} + +func (s *DeliveryFanoutService) ProcessOutbox(ctx context.Context, item worker.OutboxItem) error { + switch item.Kind { + case OutboxKindRouteEvent: + _, err := s.CreateDeliveriesForEvent(ctx, item.TenantID, item.ResourceID, DeliveryFanoutOptions{}) + return err + case OutboxKindRouteRecoveredEvent: + _, err := s.CreateDeliveriesForEvent(ctx, item.TenantID, item.ResourceID, DeliveryFanoutOptions{AllowRecovered: true}) + return err + case OutboxKindReplayJob: + return s.CreateReplayDeliveries(ctx, item.TenantID, item.ResourceID) + default: + return nil + } +} + +func (s *DeliveryFanoutService) CreateDeliveriesForEvent(ctx context.Context, tenantID, eventID string, opts DeliveryFanoutOptions) (int, error) { + event, err := s.store.GetEvent(ctx, tenantID, eventID) + if err != nil { + return 0, err + } + if !event.Verified && (!opts.AllowRecovered || event.VerifyReason != domain.VerificationReasonProviderAPIReconcile) { + return 0, nil + } + targets, err := s.store.ListDeliveryFanoutTargets(ctx, tenantID, event.SourceID, event.Type) + if err != nil { + return 0, err + } + created := 0 + for _, target := range targets { + if _, err := s.createDeliveryFromTarget(ctx, tenantID, eventID, target, created, opts); err != nil { + return created, err + } + created++ + } + return created, nil +} + +func (s *DeliveryFanoutService) CreateReplayDeliveries(ctx context.Context, tenantID, replayJobID string) error { + work, err := s.store.GetReplayJobWork(ctx, tenantID, replayJobID) + if err != nil { + return err + } + if work.State == "paused" || work.State == "pending_approval" { + return worker.ErrDeferred + } + if work.State != "scheduled" { + return nil + } + started, err := s.store.StartReplayJob(ctx, tenantID, replayJobID) + if err != nil { + return err + } + if !started { + return worker.ErrDeferred + } + configMode := firstNonEmpty(work.ConfigMode, work.Request.ConfigMode, ReplayConfigCurrent) + rateLimitPerMinute := work.RateLimitPerMinute + if rateLimitPerMinute == 0 { + rateLimitPerMinute = work.Request.RateLimitPerMinute + } + created := 0 + if work.Request.EventID != "" { + var count int + var createErr error + if configMode == ReplayConfigOriginal { + count, createErr = s.createDeliveriesFromOriginalEvent(ctx, tenantID, work.Request.EventID, DeliveryFanoutOptions{ + ReplayJobID: replayJobID, ConfigMode: configMode, RateLimitPerMinute: rateLimitPerMinute, + }) + if createErr != nil { + return createErr + } + if count == 0 { + if err := s.store.InsertReplayNoopItem(ctx, tenantID, replayJobID, work.Request.EventID, configMode, "no original deliveries found"); err != nil { + return err + } + } + } else { + count, createErr = s.CreateDeliveriesForEvent(ctx, tenantID, work.Request.EventID, DeliveryFanoutOptions{ + ReplayJobID: replayJobID, ConfigMode: configMode, RateLimitPerMinute: rateLimitPerMinute, + }) + if createErr != nil { + return createErr + } + if count == 0 { + if err := s.store.InsertReplayNoopItem(ctx, tenantID, replayJobID, work.Request.EventID, configMode, "no current route or subscription matched"); err != nil { + return err + } + } + } + created += count + } + if work.Request.DeliveryID != "" { + count, err := s.createDeliveryFromExisting(ctx, tenantID, work.Request.DeliveryID, DeliveryFanoutOptions{ + ReplayJobID: replayJobID, ConfigMode: configMode, RateLimitPerMinute: rateLimitPerMinute, + }) + if err != nil { + return err + } + created += count + } + return s.store.CompleteReplayJob(ctx, tenantID, replayJobID, created) +} + +func (s *DeliveryFanoutService) createDeliveriesFromOriginalEvent(ctx context.Context, tenantID, eventID string, opts DeliveryFanoutOptions) (int, error) { + originals, err := s.store.ListOriginalDeliveryReplaySources(ctx, tenantID, eventID) + if err != nil { + return 0, err + } + for i, original := range originals { + req := deliverySnapshotRequestFromSource(tenantID, original, opts) + req.OriginalDeliveryID = original.ID + req.DeliveryPayloadMode = DeliveryPayloadClone + req.SourceDeliveryPayloadID = original.DeliveryPayloadID + req.NextAttemptAt = s.scheduledDeliveryAt(i, opts.RateLimitPerMinute) + if _, err := s.createDeliverySnapshot(ctx, req); err != nil { + return i, err + } + } + return len(originals), nil +} + +func (s *DeliveryFanoutService) createDeliveryFromExisting(ctx context.Context, tenantID, deliveryID string, opts DeliveryFanoutOptions) (int, error) { + source, err := s.store.GetDeliveryReplaySource(ctx, tenantID, deliveryID) + if err != nil { + return 0, err + } + req := deliverySnapshotRequestFromSource(tenantID, source, opts) + req.OriginalDeliveryID = deliveryID + req.NextAttemptAt = s.scheduledDeliveryAt(0, opts.RateLimitPerMinute) + if opts.ConfigMode != ReplayConfigOriginal && (source.RouteID != "" || source.SubscriptionID != "") { + current, ok, err := s.store.GetCurrentDeliveryFanoutTarget(ctx, tenantID, source.RouteID, source.SubscriptionID) + if err != nil { + return 0, err + } + if !ok { + return 0, nil + } + req.EndpointID = current.EndpointID + req.RouteVersionID = current.RouteVersionID + req.SubscriptionVersionID = current.SubscriptionVersionID + req.RetryPolicyID = current.retryPolicyID() + req.TransformationVersionID = current.TransformationVersionID + req.AdapterVersionID = "" + req.NormalizedEnvelopeID = "" + req.SourceDeliveryPayloadID = "" + } + if source.DeliveryPayloadID != "" && opts.ConfigMode == ReplayConfigOriginal { + req.DeliveryPayloadMode = DeliveryPayloadClone + req.SourceDeliveryPayloadID = source.DeliveryPayloadID + } else { + req.DeliveryPayloadMode = DeliveryPayloadMaterialize + } + if _, err := s.createDeliverySnapshot(ctx, req); err != nil { + return 0, err + } + return 1, nil +} + +func (s *DeliveryFanoutService) createDeliveryFromTarget(ctx context.Context, tenantID, eventID string, target DeliveryFanoutTarget, index int, opts DeliveryFanoutOptions) (DeliverySnapshotResult, error) { + req := DeliverySnapshotRequest{ + TenantID: tenantID, + EventID: eventID, + EndpointID: target.EndpointID, + RouteID: target.RouteID, + RouteVersionID: target.RouteVersionID, + SubscriptionID: target.SubscriptionID, + SubscriptionVersionID: target.SubscriptionVersionID, + RetryPolicyID: target.retryPolicyID(), + ReplayJobID: opts.ReplayJobID, + TransformationVersionID: target.TransformationVersionID, + DeliveryPayloadMode: DeliveryPayloadMaterialize, + NextAttemptAt: s.scheduledDeliveryAt(index, opts.RateLimitPerMinute), + ConfigMode: firstNonEmpty(opts.ConfigMode, ReplayConfigCurrent), + } + return s.createDeliverySnapshot(ctx, req) +} + +func (s *DeliveryFanoutService) createDeliverySnapshot(ctx context.Context, req DeliverySnapshotRequest) (DeliverySnapshotResult, error) { + if req.ConfigMode == "" { + req.ConfigMode = ReplayConfigCurrent + } + if req.DeliveryPayloadMode == "" { + req.DeliveryPayloadMode = DeliveryPayloadMaterialize + } + return s.store.CreateDeliverySnapshot(ctx, req) +} + +func deliverySnapshotRequestFromSource(tenantID string, source DeliveryReplaySource, opts DeliveryFanoutOptions) DeliverySnapshotRequest { + return DeliverySnapshotRequest{ + TenantID: tenantID, + EventID: source.EventID, + EndpointID: source.EndpointID, + RouteID: source.RouteID, + RouteVersionID: source.RouteVersionID, + SubscriptionID: source.SubscriptionID, + SubscriptionVersionID: source.SubscriptionVersionID, + RetryPolicyID: source.RetryPolicyID, + ReplayJobID: opts.ReplayJobID, + AdapterVersionID: source.AdapterVersionID, + NormalizedEnvelopeID: source.NormalizedEnvelopeID, + TransformationVersionID: source.TransformationVersionID, + SourceDeliveryPayloadID: source.DeliveryPayloadID, + ConfigMode: firstNonEmpty(opts.ConfigMode, ReplayConfigCurrent), + } +} + +func (s *DeliveryFanoutService) scheduledDeliveryAt(index, rateLimitPerMinute int) time.Time { + return s.clock.Now().Add(replayScheduleDelay(index, rateLimitPerMinute)) +} + +func replayScheduleDelay(index, rateLimitPerMinute int) time.Duration { + if index <= 0 || rateLimitPerMinute <= 0 { + return 0 + } + interval := time.Minute / time.Duration(rateLimitPerMinute) + if interval <= 0 { + return 0 + } + return time.Duration(index) * interval +} diff --git a/internal/app/delivery_fanout_test.go b/internal/app/delivery_fanout_test.go new file mode 100644 index 0000000..6fd798d --- /dev/null +++ b/internal/app/delivery_fanout_test.go @@ -0,0 +1,319 @@ +package app + +import ( + "context" + "errors" + "testing" + "time" + + "webhookery/internal/domain" + "webhookery/internal/worker" +) + +func TestReplayScheduleDelaySpacesItemsByRateLimit(t *testing.T) { + if got := replayScheduleDelay(0, 60); got != 0 { + t.Fatalf("first replay item should be immediately eligible, got %s", got) + } + if got := replayScheduleDelay(1, 60); got != time.Second { + t.Fatalf("second item at 60/min should be delayed 1s, got %s", got) + } + if got := replayScheduleDelay(2, 30); got != 4*time.Second { + t.Fatalf("third item at 30/min should be delayed 4s, got %s", got) + } +} + +func TestReplayScheduleDelayIgnoresInvalidRateLimit(t *testing.T) { + if got := replayScheduleDelay(10, 0); got != 0 { + t.Fatalf("zero rate limit should not delay replay, got %s", got) + } + if got := replayScheduleDelay(10, -1); got != 0 { + t.Fatalf("negative rate limit should not delay replay, got %s", got) + } + if got := replayScheduleDelay(-1, 60); got != 0 { + t.Fatalf("negative item index should not delay replay, got %s", got) + } +} + +func TestDeliveryFanoutSkipsUnverifiedEventsUnlessRecovered(t *testing.T) { + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + store := &fakeDeliveryFanoutStore{ + event: domain.Event{ID: "evt_1", TenantID: "ten_1", SourceID: "src_1", Type: "invoice.created", Verified: false, VerifyReason: "invalid_signature"}, + targets: []DeliveryFanoutTarget{{ + EndpointID: "end_1", RouteID: "rte_1", RouteVersionID: "rv_1", + }}, + } + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: now}) + + created, err := svc.CreateDeliveriesForEvent(context.Background(), "ten_1", "evt_1", DeliveryFanoutOptions{}) + if err != nil { + t.Fatal(err) + } + if created != 0 || len(store.creates) != 0 { + t.Fatalf("unverified event should not fan out without recovered allowance, created=%d requests=%d", created, len(store.creates)) + } + + store.event.VerifyReason = domain.VerificationReasonProviderAPIReconcile + created, err = svc.CreateDeliveriesForEvent(context.Background(), "ten_1", "evt_1", DeliveryFanoutOptions{AllowRecovered: true}) + if err != nil { + t.Fatal(err) + } + if created != 1 || len(store.creates) != 1 { + t.Fatalf("provider-recovered event should fan out when explicitly allowed, created=%d requests=%d", created, len(store.creates)) + } +} + +func TestDeliveryFanoutSkipsUnsignedCloudEventsEvenWithRecoveredOption(t *testing.T) { + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + store := &fakeDeliveryFanoutStore{ + event: domain.Event{ + ID: "evt_cloud", + TenantID: "ten_1", + SourceID: "src_cloud", + Type: "customer.created", + Verified: false, + VerifyReason: domain.VerificationReasonUnsignedCloudEvents, + }, + targets: []DeliveryFanoutTarget{{ + EndpointID: "end_1", RouteID: "rte_1", RouteVersionID: "rv_1", + }}, + } + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: now}) + + created, err := svc.CreateDeliveriesForEvent(context.Background(), "ten_1", "evt_cloud", DeliveryFanoutOptions{AllowRecovered: true}) + if err != nil { + t.Fatal(err) + } + if created != 0 || len(store.creates) != 0 { + t.Fatalf("unsigned CloudEvents must remain evidence-only by default, created=%d requests=%d", created, len(store.creates)) + } +} + +func TestDeliveryFanoutCreatesSubscriptionThenRouteSnapshots(t *testing.T) { + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + store := &fakeDeliveryFanoutStore{ + event: domain.Event{ID: "evt_1", TenantID: "ten_1", SourceID: "src_1", Type: "invoice.created", Verified: true}, + targets: []DeliveryFanoutTarget{ + {EndpointID: "end_sub", SubscriptionID: "sub_1", SubscriptionVersionID: "sv_1", EndpointRetryPolicyID: "rtp_endpoint", TransformationVersionID: "trv_sub"}, + {EndpointID: "end_route", RouteID: "rte_1", RouteVersionID: "rv_1", RouteRetryPolicyID: "rtp_route", EndpointRetryPolicyID: "rtp_endpoint_2", TransformationVersionID: "trv_route"}, + }, + } + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: now}) + + created, err := svc.CreateDeliveriesForEvent(context.Background(), "ten_1", "evt_1", DeliveryFanoutOptions{ReplayJobID: "rpl_1", RateLimitPerMinute: 60}) + if err != nil { + t.Fatal(err) + } + if created != 2 || len(store.creates) != 2 { + t.Fatalf("expected two delivery snapshots, created=%d requests=%d", created, len(store.creates)) + } + sub := store.creates[0] + if sub.SubscriptionID != "sub_1" || sub.RetryPolicyID != "rtp_endpoint" || sub.TransformationVersionID != "trv_sub" || sub.DeliveryPayloadMode != DeliveryPayloadMaterialize { + t.Fatalf("unexpected subscription snapshot request: %+v", sub) + } + if !sub.NextAttemptAt.Equal(now) { + t.Fatalf("first snapshot should be immediately eligible, got %s", sub.NextAttemptAt) + } + route := store.creates[1] + if route.RouteID != "rte_1" || route.RetryPolicyID != "rtp_route" || route.TransformationVersionID != "trv_route" { + t.Fatalf("unexpected route snapshot request: %+v", route) + } + if !route.NextAttemptAt.Equal(now.Add(time.Second)) { + t.Fatalf("second replay snapshot should be rate-limited by 1s, got %s", route.NextAttemptAt) + } +} + +func TestDeliveryFanoutReplayOriginalClonesPayloadsAndCompletesJob(t *testing.T) { + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + store := &fakeDeliveryFanoutStore{ + replayWork: ReplayJobWork{ + State: "scheduled", + ConfigMode: ReplayConfigOriginal, + RateLimitPerMinute: 30, + Request: ReplayRequest{EventID: "evt_1", ConfigMode: ReplayConfigOriginal}, + }, + originals: []DeliveryReplaySource{{ + ID: "del_original", EventID: "evt_1", EndpointID: "end_1", RouteID: "rte_1", RouteVersionID: "rv_1", + RetryPolicyID: "rtp_1", AdapterVersionID: "adv_1", NormalizedEnvelopeID: "nenv_1", TransformationVersionID: "trv_1", DeliveryPayloadID: "dpl_1", + }}, + } + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: now}) + + err := svc.ProcessOutbox(context.Background(), worker.OutboxItem{TenantID: "ten_1", Kind: OutboxKindReplayJob, ResourceID: "rpl_1"}) + if err != nil { + t.Fatal(err) + } + if store.completedReplayItems != 1 { + t.Fatalf("expected replay completion count 1, got %d", store.completedReplayItems) + } + if len(store.creates) != 1 { + t.Fatalf("expected one cloned delivery snapshot, got %d", len(store.creates)) + } + req := store.creates[0] + if req.DeliveryPayloadMode != DeliveryPayloadClone || req.SourceDeliveryPayloadID != "dpl_1" || req.OriginalDeliveryID != "del_original" { + t.Fatalf("expected original replay to clone payload evidence, got %+v", req) + } + if req.ConfigMode != ReplayConfigOriginal || req.ReplayJobID != "rpl_1" { + t.Fatalf("expected original replay evidence context, got %+v", req) + } + if store.originals[0].ID != "del_original" || store.originals[0].DeliveryPayloadID != "dpl_1" { + t.Fatalf("original replay source must remain unchanged, got %+v", store.originals[0]) + } +} + +func TestDeliveryFanoutReplayOriginalKeepsDuplicateSourcesVisible(t *testing.T) { + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + store := &fakeDeliveryFanoutStore{ + replayWork: ReplayJobWork{ + State: "scheduled", + ConfigMode: ReplayConfigOriginal, + RateLimitPerMinute: 60, + Request: ReplayRequest{EventID: "evt_1", ConfigMode: ReplayConfigOriginal}, + }, + originals: []DeliveryReplaySource{ + {ID: "del_duplicate_a", EventID: "evt_1", EndpointID: "end_1", RouteID: "rte_1", RouteVersionID: "rv_1", RetryPolicyID: "rtp_1", DeliveryPayloadID: "dpl_a"}, + {ID: "del_duplicate_b", EventID: "evt_1", EndpointID: "end_1", RouteID: "rte_1", RouteVersionID: "rv_1", RetryPolicyID: "rtp_1", DeliveryPayloadID: "dpl_b"}, + }, + } + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: now}) + + err := svc.ProcessOutbox(context.Background(), worker.OutboxItem{TenantID: "ten_1", Kind: OutboxKindReplayJob, ResourceID: "rpl_1"}) + if err != nil { + t.Fatal(err) + } + if store.completedReplayItems != 2 || len(store.creates) != 2 { + t.Fatalf("duplicate original deliveries must each create replay evidence, completed=%d creates=%d", store.completedReplayItems, len(store.creates)) + } + if store.creates[0].OriginalDeliveryID != "del_duplicate_a" || store.creates[0].SourceDeliveryPayloadID != "dpl_a" { + t.Fatalf("first duplicate replay linkage lost: %+v", store.creates[0]) + } + if store.creates[1].OriginalDeliveryID != "del_duplicate_b" || store.creates[1].SourceDeliveryPayloadID != "dpl_b" { + t.Fatalf("second duplicate replay linkage lost: %+v", store.creates[1]) + } +} + +func TestDeliveryFanoutReplayCurrentUsesCurrentRouteConfig(t *testing.T) { + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + store := &fakeDeliveryFanoutStore{ + replayWork: ReplayJobWork{ + State: "scheduled", + ConfigMode: ReplayConfigCurrent, + Request: ReplayRequest{DeliveryID: "del_old", ConfigMode: ReplayConfigCurrent}, + }, + deliverySource: DeliveryReplaySource{ + ID: "del_old", EventID: "evt_1", EndpointID: "end_old", RouteID: "rte_1", RouteVersionID: "rv_old", + RetryPolicyID: "rtp_old", AdapterVersionID: "adv_old", NormalizedEnvelopeID: "nenv_old", TransformationVersionID: "trv_old", DeliveryPayloadID: "dpl_old", + }, + currentTarget: DeliveryFanoutTarget{ + EndpointID: "end_new", RouteID: "rte_1", RouteVersionID: "rv_new", RouteRetryPolicyID: "rtp_new", TransformationVersionID: "trv_new", + }, + currentOK: true, + } + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: now}) + + if err := svc.CreateReplayDeliveries(context.Background(), "ten_1", "rpl_1"); err != nil { + t.Fatal(err) + } + if len(store.creates) != 1 { + t.Fatalf("expected one replay snapshot, got %d", len(store.creates)) + } + req := store.creates[0] + if req.EndpointID != "end_new" || req.RouteVersionID != "rv_new" || req.RetryPolicyID != "rtp_new" || req.TransformationVersionID != "trv_new" { + t.Fatalf("expected current route config, got %+v", req) + } + if req.DeliveryPayloadMode != DeliveryPayloadMaterialize || req.SourceDeliveryPayloadID != "" { + t.Fatalf("current replay must materialize a fresh payload snapshot, got %+v", req) + } + if req.AdapterVersionID != "" || req.NormalizedEnvelopeID != "" { + t.Fatalf("current replay should not reuse old envelope linkage before payload materialization, got %+v", req) + } +} + +func TestDeliveryFanoutDefersPausedReplay(t *testing.T) { + store := &fakeDeliveryFanoutStore{replayWork: ReplayJobWork{State: "paused"}} + svc := NewDeliveryFanoutService(store, fixedFanoutClock{now: time.Now().UTC()}) + + err := svc.CreateReplayDeliveries(context.Background(), "ten_1", "rpl_1") + if !errors.Is(err, worker.ErrDeferred) { + t.Fatalf("expected paused replay to defer, got %v", err) + } + if len(store.creates) != 0 || store.completedReplayItems != 0 { + t.Fatalf("deferred replay should not create or complete work, requests=%d completed=%d", len(store.creates), store.completedReplayItems) + } +} + +type fixedFanoutClock struct { + now time.Time +} + +func (c fixedFanoutClock) Now() time.Time { + return c.now +} + +type fakeDeliveryFanoutStore struct { + event domain.Event + targets []DeliveryFanoutTarget + creates []DeliverySnapshotRequest + replayWork ReplayJobWork + originals []DeliveryReplaySource + deliverySource DeliveryReplaySource + currentTarget DeliveryFanoutTarget + currentOK bool + noopItems []string + completedReplayItems int + reconciliationJobID string +} + +func (f *fakeDeliveryFanoutStore) GetEvent(context.Context, string, string) (domain.Event, error) { + return f.event, nil +} + +func (f *fakeDeliveryFanoutStore) ListDeliveryFanoutTargets(context.Context, string, string, string) ([]DeliveryFanoutTarget, error) { + return append([]DeliveryFanoutTarget(nil), f.targets...), nil +} + +func (f *fakeDeliveryFanoutStore) CreateDeliverySnapshot(_ context.Context, req DeliverySnapshotRequest) (DeliverySnapshotResult, error) { + f.creates = append(f.creates, req) + return DeliverySnapshotResult{ + DeliveryID: "del_new", + DeliveryPayloadID: "dpl_new", + DeliveryPayloadSHA256: "sha256:new", + AdapterVersionID: firstNonEmpty(req.AdapterVersionID, "adv_new"), + NormalizedEnvelopeID: firstNonEmpty(req.NormalizedEnvelopeID, "nenv_new"), + TransformationVersionID: req.TransformationVersionID, + }, nil +} + +func (f *fakeDeliveryFanoutStore) GetReplayJobWork(context.Context, string, string) (ReplayJobWork, error) { + return f.replayWork, nil +} + +func (f *fakeDeliveryFanoutStore) StartReplayJob(context.Context, string, string) (bool, error) { + return true, nil +} + +func (f *fakeDeliveryFanoutStore) ListOriginalDeliveryReplaySources(context.Context, string, string) ([]DeliveryReplaySource, error) { + return append([]DeliveryReplaySource(nil), f.originals...), nil +} + +func (f *fakeDeliveryFanoutStore) GetDeliveryReplaySource(context.Context, string, string) (DeliveryReplaySource, error) { + return f.deliverySource, nil +} + +func (f *fakeDeliveryFanoutStore) GetCurrentDeliveryFanoutTarget(context.Context, string, string, string) (DeliveryFanoutTarget, bool, error) { + return f.currentTarget, f.currentOK, nil +} + +func (f *fakeDeliveryFanoutStore) InsertReplayNoopItem(_ context.Context, _, _, eventID, _, errorText string) error { + f.noopItems = append(f.noopItems, eventID+":"+errorText) + return nil +} + +func (f *fakeDeliveryFanoutStore) CompleteReplayJob(_ context.Context, _, _ string, processedItems int) error { + f.completedReplayItems = processedItems + return nil +} + +func (f *fakeDeliveryFanoutStore) RunReconciliationJob(_ context.Context, _, jobID string) error { + f.reconciliationJobID = jobID + return nil +} diff --git a/internal/app/enterprise_identity.go b/internal/app/enterprise_identity.go index 0b12110..218ddf3 100644 --- a/internal/app/enterprise_identity.go +++ b/internal/app/enterprise_identity.go @@ -238,7 +238,7 @@ func (s *ControlService) enterpriseStore() (EnterpriseIdentityStore, error) { } func (s *ControlService) CreateIdentityProvider(ctx context.Context, actor authz.Actor, req CreateIdentityProviderRequest) (domain.IdentityProvider, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "identity_provider", "", "") { return domain.IdentityProvider{}, ErrForbidden } if err := validateIdentityProviderRequest(req); err != nil { @@ -252,7 +252,7 @@ func (s *ControlService) CreateIdentityProvider(ctx context.Context, actor authz } func (s *ControlService) ListIdentityProviders(ctx context.Context, actor authz.Actor, limit int) ([]domain.IdentityProvider, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "identity_provider", "", "") { return nil, ErrForbidden } store, err := s.enterpriseStore() @@ -263,7 +263,7 @@ func (s *ControlService) ListIdentityProviders(ctx context.Context, actor authz. } func (s *ControlService) GetIdentityProvider(ctx context.Context, actor authz.Actor, providerID string) (domain.IdentityProvider, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "identity_provider", providerID, "") { return domain.IdentityProvider{}, ErrForbidden } store, err := s.enterpriseStore() @@ -274,7 +274,7 @@ func (s *ControlService) GetIdentityProvider(ctx context.Context, actor authz.Ac } func (s *ControlService) UpdateIdentityProvider(ctx context.Context, actor authz.Actor, providerID string, req UpdateIdentityProviderRequest) (domain.IdentityProvider, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "identity_provider", providerID, "") { return domain.IdentityProvider{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -288,7 +288,7 @@ func (s *ControlService) UpdateIdentityProvider(ctx context.Context, actor authz } func (s *ControlService) DisableIdentityProvider(ctx context.Context, actor authz.Actor, providerID string, req StateChangeRequest) (domain.IdentityProvider, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "identity_provider", providerID, "") { return domain.IdentityProvider{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -302,7 +302,7 @@ func (s *ControlService) DisableIdentityProvider(ctx context.Context, actor auth } func (s *ControlService) TestIdentityProvider(ctx context.Context, actor authz.Actor, providerID string, req StateChangeRequest) (domain.IdentityProvider, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "identity_provider", providerID, "") { return domain.IdentityProvider{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -485,7 +485,7 @@ func (s *ControlService) CurrentAuthSession(ctx context.Context, actor authz.Act } func (s *ControlService) ListAuthSessions(ctx context.Context, actor authz.Actor, limit int) ([]domain.AuthSession, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "auth_session", "", "") { return nil, ErrForbidden } store, err := s.enterpriseStore() @@ -496,7 +496,7 @@ func (s *ControlService) ListAuthSessions(ctx context.Context, actor authz.Actor } func (s *ControlService) RevokeAuthSessionByID(ctx context.Context, actor authz.Actor, sessionID string, req StateChangeRequest) (domain.AuthSession, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "auth_session", sessionID, "") { return domain.AuthSession{}, ErrForbidden } if strings.TrimSpace(sessionID) == "" || strings.TrimSpace(req.Reason) == "" { @@ -521,7 +521,7 @@ func (s *ControlService) AuthenticateSCIMToken(ctx context.Context, rawToken str } func (s *ControlService) CreateSCIMToken(ctx context.Context, actor authz.Actor, req CreateSCIMTokenRequest) (SCIMTokenCreated, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "scim_token", "", "") { return SCIMTokenCreated{}, ErrForbidden } if strings.TrimSpace(req.Name) == "" { @@ -551,7 +551,7 @@ func (s *ControlService) CreateSCIMToken(ctx context.Context, actor authz.Actor, } func (s *ControlService) ListSCIMTokens(ctx context.Context, actor authz.Actor, limit int) ([]domain.SCIMToken, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "scim_token", "", "") { return nil, ErrForbidden } store, err := s.enterpriseStore() @@ -562,7 +562,7 @@ func (s *ControlService) ListSCIMTokens(ctx context.Context, actor authz.Actor, } func (s *ControlService) RevokeSCIMToken(ctx context.Context, actor authz.Actor, tokenID string, req StateChangeRequest) (domain.SCIMToken, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "scim_token", tokenID, "") { return domain.SCIMToken{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -684,7 +684,7 @@ func (s *ControlService) SCIMDeactivateGroup(ctx context.Context, actor authz.Ac } func (s *ControlService) CreateRoleBinding(ctx context.Context, actor authz.Actor, req CreateRoleBindingRequest) (domain.RoleBinding, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "role_binding", "", "") { return domain.RoleBinding{}, ErrForbidden } if err := validateRoleBinding(req.PrincipalType, req.PrincipalID, req.ResourceFamily, req.Environment, req.Reason); err != nil { @@ -701,7 +701,7 @@ func (s *ControlService) CreateRoleBinding(ctx context.Context, actor authz.Acto } func (s *ControlService) ListRoleBindings(ctx context.Context, actor authz.Actor, limit int) ([]domain.RoleBinding, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "role_binding", "", "") { return nil, ErrForbidden } store, err := s.enterpriseStore() @@ -712,7 +712,7 @@ func (s *ControlService) ListRoleBindings(ctx context.Context, actor authz.Actor } func (s *ControlService) UpdateRoleBinding(ctx context.Context, actor authz.Actor, bindingID string, req UpdateRoleBindingRequest) (domain.RoleBinding, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "role_binding", bindingID, "") { return domain.RoleBinding{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -726,7 +726,7 @@ func (s *ControlService) UpdateRoleBinding(ctx context.Context, actor authz.Acto } func (s *ControlService) DisableRoleBinding(ctx context.Context, actor authz.Actor, bindingID string, req StateChangeRequest) (domain.RoleBinding, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "role_binding", bindingID, "") { return domain.RoleBinding{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -740,7 +740,7 @@ func (s *ControlService) DisableRoleBinding(ctx context.Context, actor authz.Act } func (s *ControlService) CreateAccessPolicyRule(ctx context.Context, actor authz.Actor, req CreateAccessPolicyRuleRequest) (domain.AccessPolicyRule, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "access_policy", "", "") { return domain.AccessPolicyRule{}, ErrForbidden } if err := validateAccessPolicy(req.Name, req.Action, req.Effect, req.ResourceFamily, req.Environment, req.Conditions, req.Reason); err != nil { @@ -757,7 +757,7 @@ func (s *ControlService) CreateAccessPolicyRule(ctx context.Context, actor authz } func (s *ControlService) ListAccessPolicyRules(ctx context.Context, actor authz.Actor, limit int) ([]domain.AccessPolicyRule, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "access_policy", "", "") { return nil, ErrForbidden } store, err := s.enterpriseStore() @@ -768,7 +768,7 @@ func (s *ControlService) ListAccessPolicyRules(ctx context.Context, actor authz. } func (s *ControlService) UpdateAccessPolicyRule(ctx context.Context, actor authz.Actor, policyID string, req UpdateAccessPolicyRuleRequest) (domain.AccessPolicyRule, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "access_policy", policyID, "") { return domain.AccessPolicyRule{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -803,7 +803,7 @@ func (s *ControlService) UpdateAccessPolicyRule(ctx context.Context, actor authz } func (s *ControlService) DisableAccessPolicyRule(ctx context.Context, actor authz.Actor, policyID string, req StateChangeRequest) (domain.AccessPolicyRule, error) { - if !authz.Can(actor, "security:write", actor.TenantID) { + if !s.authorized(ctx, actor, "security:write", "access_policy", policyID, "") { return domain.AccessPolicyRule{}, ErrForbidden } if strings.TrimSpace(req.Reason) == "" { @@ -817,7 +817,7 @@ func (s *ControlService) DisableAccessPolicyRule(ctx context.Context, actor auth } func (s *ControlService) ExplainAuthorization(ctx context.Context, actor authz.Actor, req AuthzExplainRequest) (authz.Decision, error) { - if !authz.Can(actor, "security:read", actor.TenantID) { + if !s.authorized(ctx, actor, "security:read", "authz", "", "") { return authz.Decision{}, ErrForbidden } if strings.TrimSpace(req.Action) == "" || strings.TrimSpace(req.ResourceFamily) == "" { @@ -831,25 +831,22 @@ func (s *ControlService) ExplainAuthorization(ctx context.Context, actor authz.A if strings.TrimSpace(req.ActorID) != "" { targetActorID = strings.TrimSpace(req.ActorID) } - return store.ExplainAuthorization(ctx, actor.TenantID, targetActorID, req) + decision, err := store.ExplainAuthorization(ctx, actor.TenantID, targetActorID, req) + if err != nil { + return authz.Decision{}, err + } + return redactAuthorizationDecision(decision), nil } func (s *ControlService) authorized(ctx context.Context, actor authz.Actor, action, resourceFamily, resourceID, environment string) bool { - if actor.TenantID == "" { - return false - } - if store, ok := s.store.(EnterpriseIdentityStore); ok { - decision, err := store.ExplainAuthorization(ctx, actor.TenantID, actor.ID, AuthzExplainRequest{ - Action: action, - ResourceFamily: resourceFamily, - ResourceID: resourceID, - Environment: environment, - }) - if err == nil { - return decision.Allowed && actorScopesAllow(actor, action) - } - } - return authz.Can(actor, action, actor.TenantID) + return s.authorizer.Authorize(ctx, AuthorizationRequest{ + Actor: actor, + TenantID: actor.TenantID, + Action: action, + ResourceFamily: resourceFamily, + ResourceID: resourceID, + Environment: environment, + }).Allowed } func actorScopesAllow(actor authz.Actor, action string) bool { diff --git a/internal/app/enterprise_identity_test.go b/internal/app/enterprise_identity_test.go index 4142cc5..6f4e4fa 100644 --- a/internal/app/enterprise_identity_test.go +++ b/internal/app/enterprise_identity_test.go @@ -252,7 +252,7 @@ func (s *enterpriseFakeStore) DisableAccessPolicyRule(context.Context, string, s return domain.AccessPolicyRule{}, nil } func (s *enterpriseFakeStore) ExplainAuthorization(context.Context, string, string, AuthzExplainRequest) (authz.Decision, error) { - return authz.Decision{}, nil + return authz.Decision{}, ErrNotFound } func newFakeOIDCIssuer(t *testing.T, clientID, nonce string) *httptest.Server { diff --git a/internal/app/incidents.go b/internal/app/incidents.go new file mode 100644 index 0000000..f20af05 --- /dev/null +++ b/internal/app/incidents.go @@ -0,0 +1,323 @@ +package app + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "webhookery/internal/authz" + "webhookery/internal/domain" + "webhookery/internal/random" +) + +const incidentReportSchemaV1 = "webhookery.incident_report.v1" + +type CreateIncidentRequest struct { + Title string `json:"title"` + Reason string `json:"reason"` +} + +type AddIncidentEventRequest struct { + EventID string `json:"event_id"` + Reason string `json:"reason"` +} + +type IncidentReportRequest struct { + Reason string `json:"reason"` +} + +type CreateIncidentEvidenceExportRequest struct { + Reason string `json:"reason"` +} + +type IncidentReport struct { + SchemaVersion string `json:"schema_version"` + GeneratedAt time.Time `json:"generated_at"` + GeneratedBy string `json:"generated_by"` + Incident domain.Incident `json:"incident"` + Events []IncidentReportEvent `json:"events"` + VerificationCommand string `json:"verification_command"` + NonClaims []string `json:"non_claims"` +} + +type IncidentReportEvent struct { + IncidentEvent domain.IncidentEvent `json:"incident_event"` + Event domain.Event `json:"event"` + EventIdentity map[string]any `json:"event_identity"` + ProviderVerification map[string]any `json:"provider_verification"` + RawCaptureEvidence map[string]any `json:"raw_capture_evidence"` + Timeline []EventTimelineEntry `json:"timeline"` +} + +func (s *ControlService) CreateIncident(ctx context.Context, actor authz.Actor, req CreateIncidentRequest) (domain.Incident, error) { + if !s.authorized(ctx, actor, "incidents:write", "incident", "", "") { + return domain.Incident{}, ErrForbidden + } + title := strings.TrimSpace(req.Title) + reason := strings.TrimSpace(req.Reason) + if title == "" || reason == "" { + return domain.Incident{}, fmt.Errorf("%w: title and reason are required", ErrInvalidInput) + } + id, err := random.Token("inc", 18) + if err != nil { + return domain.Incident{}, err + } + return s.store.CreateIncident(ctx, domain.Incident{ + ID: id, + TenantID: actor.TenantID, + Title: title, + Reason: reason, + State: domain.StateActive, + CreatedBy: actor.ID, + }) +} + +func (s *ControlService) ListIncidents(ctx context.Context, actor authz.Actor, limit int) ([]domain.Incident, error) { + if !s.authorized(ctx, actor, "incidents:read", "incident", "", "") { + return nil, ErrForbidden + } + return s.store.ListIncidents(ctx, actor.TenantID, normalizeLimit(limit)) +} + +func (s *ControlService) GetIncident(ctx context.Context, actor authz.Actor, incidentID string) (domain.Incident, error) { + if !s.authorized(ctx, actor, "incidents:read", "incident", incidentID, "") { + return domain.Incident{}, ErrForbidden + } + incidentID = strings.TrimSpace(incidentID) + if incidentID == "" { + return domain.Incident{}, fmt.Errorf("%w: incident_id is required", ErrInvalidInput) + } + return s.store.GetIncident(ctx, actor.TenantID, incidentID) +} + +func (s *ControlService) AddIncidentEvent(ctx context.Context, actor authz.Actor, incidentID string, req AddIncidentEventRequest) (domain.IncidentEvent, error) { + if !s.authorized(ctx, actor, "incidents:write", "incident", incidentID, "") { + return domain.IncidentEvent{}, ErrForbidden + } + incidentID = strings.TrimSpace(incidentID) + eventID := strings.TrimSpace(req.EventID) + reason := strings.TrimSpace(req.Reason) + if incidentID == "" || eventID == "" || reason == "" { + return domain.IncidentEvent{}, fmt.Errorf("%w: incident_id, event_id, and reason are required", ErrInvalidInput) + } + if _, err := s.store.GetIncident(ctx, actor.TenantID, incidentID); err != nil { + return domain.IncidentEvent{}, err + } + if _, err := s.store.GetEvent(ctx, actor.TenantID, eventID); err != nil { + return domain.IncidentEvent{}, err + } + return s.store.AddIncidentEvent(ctx, actor.TenantID, incidentID, eventID, actor.ID, reason) +} + +func (s *ControlService) RemoveIncidentEvent(ctx context.Context, actor authz.Actor, incidentID, eventID string, req StateChangeRequest) (domain.IncidentEvent, error) { + if !s.authorized(ctx, actor, "incidents:write", "incident", incidentID, "") { + return domain.IncidentEvent{}, ErrForbidden + } + incidentID = strings.TrimSpace(incidentID) + eventID = strings.TrimSpace(eventID) + reason := strings.TrimSpace(req.Reason) + if incidentID == "" || eventID == "" || reason == "" { + return domain.IncidentEvent{}, fmt.Errorf("%w: incident_id, event_id, and reason are required", ErrInvalidInput) + } + return s.store.RemoveIncidentEvent(ctx, actor.TenantID, incidentID, eventID, actor.ID, reason) +} + +func (s *ControlService) GenerateIncidentReport(ctx context.Context, actor authz.Actor, incidentID string, req IncidentReportRequest) (domain.IncidentReportSnapshot, error) { + if !s.authorized(ctx, actor, "incidents:write", "incident", incidentID, "") { + return domain.IncidentReportSnapshot{}, ErrForbidden + } + if strings.TrimSpace(req.Reason) == "" { + return domain.IncidentReportSnapshot{}, fmt.Errorf("%w: reason is required", ErrInvalidInput) + } + report, markdown, err := s.buildIncidentReport(ctx, actor, incidentID) + if err != nil { + return domain.IncidentReportSnapshot{}, err + } + return s.store.CreateIncidentReportSnapshot(ctx, actor.TenantID, strings.TrimSpace(incidentID), actor.ID, strings.TrimSpace(req.Reason), report, markdown) +} + +func (s *ControlService) GetIncidentReport(ctx context.Context, actor authz.Actor, incidentID string) (domain.IncidentReportSnapshot, error) { + if !s.authorized(ctx, actor, "incidents:read", "incident", incidentID, "") { + return domain.IncidentReportSnapshot{}, ErrForbidden + } + incidentID = strings.TrimSpace(incidentID) + if incidentID == "" { + return domain.IncidentReportSnapshot{}, fmt.Errorf("%w: incident_id is required", ErrInvalidInput) + } + return s.store.GetIncidentReportSnapshot(ctx, actor.TenantID, incidentID) +} + +func (s *ControlService) CreateIncidentEvidenceExport(ctx context.Context, actor authz.Actor, incidentID string, req CreateIncidentEvidenceExportRequest) (domain.IncidentEvidenceExport, domain.EvidenceExport, error) { + if !s.authorized(ctx, actor, "incidents:write", "incident", incidentID, "") { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, ErrForbidden + } + if !s.authorized(ctx, actor, "audit:read", "audit_export", "", "") { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, ErrForbidden + } + incidentID = strings.TrimSpace(incidentID) + req.Reason = strings.TrimSpace(req.Reason) + if incidentID == "" || req.Reason == "" { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, fmt.Errorf("%w: incident_id and reason are required", ErrInvalidInput) + } + report, markdown, err := s.buildIncidentReport(ctx, actor, incidentID) + if err != nil { + return domain.IncidentEvidenceExport{}, domain.EvidenceExport{}, err + } + return s.store.CreateIncidentEvidenceExport(ctx, actor.TenantID, incidentID, actor.ID, req, report, markdown) +} + +func (s *ControlService) buildIncidentReport(ctx context.Context, actor authz.Actor, incidentID string) (IncidentReport, string, error) { + incidentID = strings.TrimSpace(incidentID) + if incidentID == "" { + return IncidentReport{}, "", fmt.Errorf("%w: incident_id is required", ErrInvalidInput) + } + incident, err := s.store.GetIncident(ctx, actor.TenantID, incidentID) + if err != nil { + return IncidentReport{}, "", err + } + links, err := s.store.ListIncidentEvents(ctx, actor.TenantID, incidentID) + if err != nil { + return IncidentReport{}, "", err + } + sort.Slice(links, func(i, j int) bool { + if links[i].CreatedAt.Equal(links[j].CreatedAt) { + return links[i].EventID < links[j].EventID + } + return links[i].CreatedAt.Before(links[j].CreatedAt) + }) + report := IncidentReport{ + SchemaVersion: incidentReportSchemaV1, + GeneratedAt: time.Now().UTC(), + GeneratedBy: actor.ID, + Incident: incident, + VerificationCommand: "whcp audit verify-bundle --file evidence.tar.gz", + NonClaims: []string{ + "Inbound capture does not prove downstream business success.", + "Webhookery records at-least-once delivery evidence and does not claim exactly-once delivery.", + "The report proves Webhookery evidence observed locally; it does not prove provider-side completeness.", + "Raw payload bodies, secrets, signatures, bearer tokens, and private keys are omitted by default.", + }, + } + for _, link := range links { + event, err := s.store.GetEvent(ctx, actor.TenantID, link.EventID) + if err != nil { + return IncidentReport{}, "", err + } + timeline, err := s.store.ListEventTimeline(ctx, actor.TenantID, link.EventID, 500) + if err != nil { + return IncidentReport{}, "", err + } + report.Events = append(report.Events, IncidentReportEvent{ + IncidentEvent: link, + Event: event, + EventIdentity: map[string]any{ + "event_id": event.ID, + "provider": event.Provider, + "type": event.Type, + "provider_event_id": event.ProviderID, + "source_id": event.SourceID, + "tenant_id_hash": domain.HashSHA256([]byte(event.TenantID)), + "received_at": event.ReceivedAt, + }, + ProviderVerification: map[string]any{ + "signature_verified": event.Verified, + "verification_reason": event.VerifyReason, + "dedupe_status": event.DedupeStatus, + }, + RawCaptureEvidence: map[string]any{ + "raw_payload_id": event.RawPayloadID, + "raw_payload_hash": event.RawPayloadHash, + "raw_body": "omitted", + }, + Timeline: sanitizeTimeline(timeline), + }) + } + markdown := markdownIncidentReport(report) + return report, markdown, nil +} + +func sanitizeTimeline(entries []EventTimelineEntry) []EventTimelineEntry { + out := make([]EventTimelineEntry, 0, len(entries)) + for _, entry := range entries { + if entry.SchemaVersion == "" { + entry.SchemaVersion = EventTimelineSchemaV1 + } + if strings.Contains(strings.ToLower(entry.Detail), "body=") || + strings.Contains(strings.ToLower(entry.Detail), "secret=") || + strings.Contains(strings.ToLower(entry.Detail), "signature=") || + strings.Contains(strings.ToLower(entry.Detail), "token=") { + entry.Detail = "[redacted]" + } + out = append(out, entry) + } + return out +} + +func markdownIncidentReport(report IncidentReport) string { + var b strings.Builder + fmt.Fprintf(&b, "# Webhookery Incident Report\n\n") + fmt.Fprintf(&b, "Schema version: `%s`\n\n", report.SchemaVersion) + fmt.Fprintf(&b, "Generated at: `%s`\n\n", report.GeneratedAt.Format(time.RFC3339)) + fmt.Fprintf(&b, "## 1. Summary\n\n") + fmt.Fprintf(&b, "- Incident: `%s`\n", report.Incident.ID) + fmt.Fprintf(&b, "- Title: %s\n", markdownText(report.Incident.Title)) + fmt.Fprintf(&b, "- Reason: %s\n", markdownText(report.Incident.Reason)) + fmt.Fprintf(&b, "- Events attached: %d\n\n", len(report.Events)) + for _, event := range report.Events { + fmt.Fprintf(&b, "## 2. Event Identity\n\n") + fmt.Fprintf(&b, "- Event ID: `%s`\n", event.Event.ID) + fmt.Fprintf(&b, "- Provider: `%s`\n", event.Event.Provider) + fmt.Fprintf(&b, "- Type: `%s`\n", event.Event.Type) + fmt.Fprintf(&b, "- Provider event ID: `%s`\n", event.Event.ProviderID) + fmt.Fprintf(&b, "- Received at: `%s`\n\n", event.Event.ReceivedAt.Format(time.RFC3339)) + fmt.Fprintf(&b, "## 3. Provider Verification\n\n") + fmt.Fprintf(&b, "- Signature verified: `%t`\n", event.Event.Verified) + fmt.Fprintf(&b, "- Verification reason: `%s`\n", event.Event.VerifyReason) + fmt.Fprintf(&b, "- Dedupe status: `%s`\n\n", event.Event.DedupeStatus) + fmt.Fprintf(&b, "## 4. Raw Capture Evidence\n\n") + fmt.Fprintf(&b, "- Raw payload ID: `%s`\n", event.Event.RawPayloadID) + fmt.Fprintf(&b, "- Raw payload hash: `%s`\n", event.Event.RawPayloadHash) + fmt.Fprintf(&b, "- Raw payload body: omitted by default\n\n") + fmt.Fprintf(&b, "## 5. Route And Configuration Snapshot\n\n") + writeTimelineKind(&b, event.Timeline, "delivery") + fmt.Fprintf(&b, "## 6. Delivery Attempt Timeline\n\n") + writeTimelineKind(&b, event.Timeline, "attempt") + fmt.Fprintf(&b, "## 7. Retry And DLQ State\n\n") + writeTimelineKind(&b, event.Timeline, "dead_letter") + fmt.Fprintf(&b, "## 8. Replay History\n\n") + writeTimelineKind(&b, event.Timeline, "replay") + fmt.Fprintf(&b, "## 9. Retention And Raw-Payload Access State\n\n") + writeTimelineKind(&b, event.Timeline, "raw_payload") + fmt.Fprintf(&b, "## 10. Audit-Chain Proof References\n\n") + writeTimelineKind(&b, event.Timeline, "audit") + } + fmt.Fprintf(&b, "## 11. Known Gaps And Non-Claims\n\n") + for _, nonClaim := range report.NonClaims { + fmt.Fprintf(&b, "- %s\n", nonClaim) + } + fmt.Fprintf(&b, "- Verify exported bundles with `%s`.\n", report.VerificationCommand) + return b.String() +} + +func writeTimelineKind(b *strings.Builder, timeline []EventTimelineEntry, kind string) { + wrote := false + for _, entry := range timeline { + if entry.Kind != kind { + continue + } + wrote = true + fmt.Fprintf(b, "- `%s` `%s` `%s`: %s\n", entry.OccurredAt.Format(time.RFC3339), entry.RefID, entry.State, markdownText(entry.Detail)) + } + if !wrote { + fmt.Fprintf(b, "- No `%s` entries recorded in the event timeline.\n", kind) + } + fmt.Fprintln(b) +} + +func markdownText(value string) string { + value = strings.ReplaceAll(value, "\r", " ") + value = strings.ReplaceAll(value, "\n", " ") + return strings.TrimSpace(value) +} diff --git a/internal/app/outbox_processor.go b/internal/app/outbox_processor.go new file mode 100644 index 0000000..21eacbf --- /dev/null +++ b/internal/app/outbox_processor.go @@ -0,0 +1,33 @@ +package app + +import ( + "context" + + "webhookery/internal/worker" +) + +type OutboxProcessorService struct { + fanout *DeliveryFanoutService + reconciliation *ReconciliationService +} + +func NewOutboxProcessorService(fanout *DeliveryFanoutService, reconciliation *ReconciliationService) *OutboxProcessorService { + return &OutboxProcessorService{fanout: fanout, reconciliation: reconciliation} +} + +func (s *OutboxProcessorService) ProcessOutbox(ctx context.Context, item worker.OutboxItem) error { + switch item.Kind { + case OutboxKindRouteEvent, OutboxKindRouteRecoveredEvent, OutboxKindReplayJob: + if s.fanout == nil { + return nil + } + return s.fanout.ProcessOutbox(ctx, item) + case OutboxKindReconciliationJob: + if s.reconciliation == nil { + return nil + } + return s.reconciliation.RunReconciliationJob(ctx, item.TenantID, item.ResourceID) + default: + return nil + } +} diff --git a/internal/app/reconciliation.go b/internal/app/reconciliation.go new file mode 100644 index 0000000..2d8d582 --- /dev/null +++ b/internal/app/reconciliation.go @@ -0,0 +1,338 @@ +package app + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + "time" + + "webhookery/internal/domain" + "webhookery/internal/reconcile" + "webhookery/internal/worker" +) + +type ReconciliationAdapterRegistry interface { + Adapter(provider string) (reconcile.Adapter, bool) +} + +type ReconciliationWorkStore interface { + GetReconciliationConnection(ctx context.Context, tenantID, connectionID string) (domain.ProviderConnection, string, error) + GetReconciliationWork(ctx context.Context, tenantID, jobID string) (ReconciliationWork, error) + StartReconciliationJob(ctx context.Context, tenantID, jobID string) (bool, error) + RecordProviderAPIEvidence(ctx context.Context, record ProviderAPIEvidenceRecord) (string, error) + FindLocalProviderEvent(ctx context.Context, tenantID string, conn domain.ProviderConnection, providerObjectID string) (string, error) + CaptureRecoveredProviderEvent(ctx context.Context, input RecoveredProviderEventCapture) (string, error) + InsertReconciliationItem(ctx context.Context, input ReconciliationItemRecord) (string, error) + AttachProviderEvidenceToItem(ctx context.Context, tenantID, itemID, evidenceID string) error + UpdateReconciliationCursor(ctx context.Context, tenantID, jobID, cursor string) error + CompleteReconciliationJob(ctx context.Context, tenantID, jobID string) error + FailReconciliationJob(ctx context.Context, tenantID, jobID, errorText string) error +} + +type ReconciliationWork struct { + Job domain.ReconciliationJob + Connection domain.ProviderConnection + Credential string +} + +type ProviderAPIEvidenceRecord struct { + TenantID string + JobID string + ItemID string + ConnectionID string + Provider string + Evidence ProviderAPIEvidence +} + +type ProviderAPIEvidence struct { + Method string + URL string + StatusCode int + Body []byte + Error string +} + +type RecoveredProviderEventCapture struct { + Connection domain.ProviderConnection + ObjectID string + EventType string + RawBody []byte + RequestHeaders map[string]string + RouteRecovered bool +} + +type ReconciliationItemRecord struct { + TenantID string + JobID string + Provider string + ObjectID string + ObjectType string + Outcome string + LocalEventID string + RecoveredEventID string + EvidenceID string + RedeliveryRequested bool + Error string + Metadata json.RawMessage +} + +type ReconciliationService struct { + store ReconciliationWorkStore + registry ReconciliationAdapterRegistry +} + +func NewReconciliationService(store ReconciliationWorkStore, registry ReconciliationAdapterRegistry) *ReconciliationService { + if registry == nil { + builtIn := reconcile.BuiltInRegistry(nil) + registry = builtIn + } + return &ReconciliationService{store: store, registry: registry} +} + +func (s *ReconciliationService) DryRunReconciliation(ctx context.Context, tenantID string, req ReconciliationJobRequest) (domain.ReconciliationJob, error) { + conn, credential, err := s.store.GetReconciliationConnection(ctx, tenantID, req.ConnectionID) + if err != nil { + return domain.ReconciliationJob{}, err + } + adapter, ok := s.registry.Adapter(conn.Provider) + if !ok { + return domain.ReconciliationJob{}, ErrInvalidInput + } + now := time.Now().UTC() + job := domain.ReconciliationJob{ + ID: "dry_run", + TenantID: tenantID, + ConnectionID: conn.ID, + Provider: conn.Provider, + State: domain.ReconciliationJobStateCompleted, + DryRun: true, + CaptureMissing: req.CaptureMissing, + RouteRecovered: req.RouteRecovered, + RedeliverFailed: req.RedeliverFailed, + ScopeObjectID: req.ScopeObjectID, + WindowStart: req.WindowStart, + WindowEnd: req.WindowEnd, + Reason: req.Reason, + CreatedAt: now, + CompletedAt: now, + } + caps := adapter.Capabilities(conn.Config) + if !caps.CanScanEvents { + job.TotalItems = 1 + job.UnrecoverableItems = 1 + job.Error = strings.Join(caps.Limitations, "; ") + return job, nil + } + scan, err := adapter.Scan(ctx, reconcile.ScanRequest{ + Connection: reconcile.Connection{ + ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config, + }, + WindowStart: req.WindowStart, WindowEnd: req.WindowEnd, ScopeObjectID: req.ScopeObjectID, + CaptureMissing: req.CaptureMissing, RedeliverFailed: req.RedeliverFailed, + }) + if err != nil { + job.State = domain.ReconciliationJobStateFailed + job.Error = providerErrorForDB(err) + return job, nil + } + for _, object := range scan.Objects { + job.TotalItems++ + localID, err := s.store.FindLocalProviderEvent(ctx, tenantID, conn, object.ID) + if err != nil { + return domain.ReconciliationJob{}, err + } + if localID != "" { + job.MatchedItems++ + } else { + job.MissingItems++ + } + if object.Failed && req.RedeliverFailed && object.Redeliverable { + job.RedeliveredItems++ + } + } + return job, nil +} + +func (s *ReconciliationService) RunReconciliationJob(ctx context.Context, tenantID, jobID string) error { + work, err := s.store.GetReconciliationWork(ctx, tenantID, jobID) + if errors.Is(err, ErrNotFound) { + return ErrNotFound + } + if err != nil { + return err + } + job := work.Job + if job.State == domain.ReconciliationJobStateCanceled || job.State == domain.ReconciliationJobStateCompleted { + return nil + } + adapter, ok := s.registry.Adapter(work.Connection.Provider) + if !ok { + return s.store.FailReconciliationJob(ctx, tenantID, jobID, providerErrorForDB(reconcile.ProviderError{Class: reconcile.ErrorUnsupported, Message: "unsupported provider"})) + } + started, err := s.store.StartReconciliationJob(ctx, tenantID, jobID) + if err != nil { + return err + } + if !started { + return worker.ErrDeferred + } + caps := adapter.Capabilities(work.Connection.Config) + if !caps.CanScanEvents { + metadata, _ := json.Marshal(map[string]any{"limitations": caps.Limitations}) + if _, err := s.store.InsertReconciliationItem(ctx, ReconciliationItemRecord{ + TenantID: tenantID, JobID: jobID, Provider: work.Connection.Provider, ObjectID: work.Connection.Provider + ":unsupported", ObjectType: "capability", + Outcome: domain.ReconciliationOutcomeUnrecoverable, Error: strings.Join(caps.Limitations, "; "), Metadata: metadata, + }); err != nil { + return err + } + return s.store.CompleteReconciliationJob(ctx, tenantID, jobID) + } + scan, err := adapter.Scan(ctx, reconcile.ScanRequest{ + Connection: reconcile.Connection{ + ID: work.Connection.ID, Provider: work.Connection.Provider, CredentialType: work.Connection.CredentialType, Credential: work.Credential, Config: work.Connection.Config, + }, + WindowStart: job.WindowStart, WindowEnd: job.WindowEnd, ScopeObjectID: job.ScopeObjectID, Cursor: job.Cursor, + CaptureMissing: job.CaptureMissing, RedeliverFailed: job.RedeliverFailed, + }) + for _, ev := range scan.Evidence { + if _, recErr := s.recordProviderAPIEvidence(ctx, tenantID, jobID, "", work.Connection, ev); recErr != nil { + return recErr + } + } + if err != nil { + return s.store.FailReconciliationJob(ctx, tenantID, jobID, providerErrorForDB(err)) + } + for _, object := range scan.Objects { + if err := s.reconcileProviderObject(ctx, job, work.Connection, work.Credential, adapter, object); err != nil { + return s.store.FailReconciliationJob(ctx, tenantID, jobID, providerErrorForDB(err)) + } + } + if scan.NextCursor != "" { + if err := s.store.UpdateReconciliationCursor(ctx, tenantID, jobID, scan.NextCursor); err != nil { + return err + } + } + return s.store.CompleteReconciliationJob(ctx, tenantID, jobID) +} + +func (s *ReconciliationService) reconcileProviderObject(ctx context.Context, job domain.ReconciliationJob, conn domain.ProviderConnection, credential string, adapter reconcile.Adapter, object reconcile.ProviderObject) error { + tenantID := job.TenantID + localEventID, err := s.store.FindLocalProviderEvent(ctx, tenantID, conn, object.ID) + if err != nil { + return err + } + outcome := domain.ReconciliationOutcomeMatched + if localEventID == "" { + outcome = domain.ReconciliationOutcomeMissing + } + metadata, _ := json.Marshal(object.Metadata) + var evidenceID string + var recoveredEventID string + var errText string + if localEventID == "" && job.CaptureMissing { + lookupObject := object + lookupEvidence := []reconcile.Evidence(nil) + if len(lookupObject.RawBody) == 0 || !lookupObject.Recoverable { + lookedUp, evs, lookupErr := adapter.Lookup(ctx, reconcile.Connection{ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config}, providerLookupID(object)) + lookupEvidence = evs + if lookupErr == nil { + lookupObject = lookedUp + } else if errors.Is(lookupErr, reconcile.ErrUnsupported) { + outcome = domain.ReconciliationOutcomeUnrecoverable + errText = "provider does not expose recoverable payload evidence for this object" + } else { + outcome = domain.ReconciliationOutcomeFailed + errText = providerErrorForDB(lookupErr) + } + } + for _, ev := range lookupEvidence { + id, recErr := s.recordProviderAPIEvidence(ctx, tenantID, job.ID, "", conn, ev) + if recErr != nil { + return recErr + } + evidenceID = id + } + if outcome == domain.ReconciliationOutcomeMissing && lookupObject.Recoverable && len(lookupObject.RawBody) > 0 { + recoveredEventID, err = s.store.CaptureRecoveredProviderEvent(ctx, RecoveredProviderEventCapture{ + Connection: conn, ObjectID: lookupObject.ID, EventType: lookupObject.EventType, + RawBody: append([]byte(nil), lookupObject.RawBody...), RequestHeaders: lookupObject.RequestHeaders, + RouteRecovered: job.RouteRecovered, + }) + if err != nil { + outcome = domain.ReconciliationOutcomeFailed + errText = err.Error() + } else { + outcome = domain.ReconciliationOutcomeCaptured + } + } else if outcome == domain.ReconciliationOutcomeMissing { + outcome = domain.ReconciliationOutcomeUnrecoverable + errText = "provider API did not include a recoverable payload body" + } + } + redeliveryRequested := false + if job.RedeliverFailed && object.Failed && object.Redeliverable { + evs, redeliverErr := adapter.RequestRedelivery(ctx, reconcile.Connection{ID: conn.ID, Provider: conn.Provider, CredentialType: conn.CredentialType, Credential: credential, Config: conn.Config}, providerLookupID(object)) + for _, ev := range evs { + id, recErr := s.recordProviderAPIEvidence(ctx, tenantID, job.ID, "", conn, ev) + if recErr != nil { + return recErr + } + evidenceID = id + } + if redeliverErr != nil { + outcome = domain.ReconciliationOutcomeFailed + errText = providerErrorForDB(redeliverErr) + } else { + outcome = domain.ReconciliationOutcomeRedeliveryRequested + redeliveryRequested = true + } + } + itemID, err := s.store.InsertReconciliationItem(ctx, ReconciliationItemRecord{ + TenantID: tenantID, JobID: job.ID, Provider: conn.Provider, ObjectID: object.ID, ObjectType: object.ObjectType, + Outcome: outcome, LocalEventID: localEventID, RecoveredEventID: recoveredEventID, EvidenceID: evidenceID, + RedeliveryRequested: redeliveryRequested, Error: errText, Metadata: metadata, + }) + if err != nil { + return err + } + if evidenceID != "" { + return s.store.AttachProviderEvidenceToItem(ctx, tenantID, itemID, evidenceID) + } + return nil +} + +func (s *ReconciliationService) recordProviderAPIEvidence(ctx context.Context, tenantID, jobID, itemID string, conn domain.ProviderConnection, ev reconcile.Evidence) (string, error) { + return s.store.RecordProviderAPIEvidence(ctx, ProviderAPIEvidenceRecord{ + TenantID: tenantID, JobID: jobID, ItemID: itemID, ConnectionID: conn.ID, Provider: conn.Provider, + Evidence: ProviderAPIEvidence{Method: ev.Method, URL: ev.URL, StatusCode: ev.StatusCode, Body: append([]byte(nil), ev.Body...), Error: ev.Error}, + }) +} + +func providerLookupID(object reconcile.ProviderObject) string { + if value, ok := object.Metadata["delivery_id"]; ok && fmt.Sprint(value) != "" { + return fmt.Sprint(value) + } + return object.ID +} + +func providerErrorForDB(err error) string { + if err == nil { + return "" + } + var providerErr reconcile.ProviderError + if errors.As(err, &providerErr) && providerErr.Class != "" { + return providerErr.Class + } + if errors.Is(err, reconcile.ErrUnsupported) { + return reconcile.ErrorUnsupported + } + msg := err.Error() + for _, marker := range []string{"sk_", "ghp_", "github_pat_", "xoxb-", "shpat_"} { + if strings.Contains(msg, marker) { + return "provider request failed" + } + } + return msg +} diff --git a/internal/app/reconciliation_test.go b/internal/app/reconciliation_test.go new file mode 100644 index 0000000..c2a1211 --- /dev/null +++ b/internal/app/reconciliation_test.go @@ -0,0 +1,252 @@ +package app + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "webhookery/internal/domain" + "webhookery/internal/reconcile" +) + +func TestReconciliationServiceCapturesMissingRecoverableObject(t *testing.T) { + store := newFakeReconciliationStore() + store.work.Job = domain.ReconciliationJob{ + ID: "rec_1", TenantID: "ten_1", ConnectionID: "pcn_1", Provider: "stripe", State: domain.ReconciliationJobStateScheduled, + CaptureMissing: true, RouteRecovered: true, + } + store.work.Connection = domain.ProviderConnection{ID: "pcn_1", TenantID: "ten_1", Provider: "stripe", CredentialType: "api_key", Config: map[string]string{"source_id": "src_1"}} + store.work.Credential = "sk_test_secret" + adapter := &fakeReconciliationAdapter{ + capabilities: reconcile.Capabilities{Provider: "stripe", CanScanEvents: true}, + scanResult: reconcile.ScanResult{ + Objects: []reconcile.ProviderObject{{ID: "evt_missing", ObjectType: "event", Recoverable: false, Metadata: map[string]any{"request_id": "req_1"}}}, + Evidence: []reconcile.Evidence{{Method: "GET", URL: "https://api.stripe.com/v1/events", StatusCode: 200, Body: []byte(`{"ok":true}`)}}, + NextCursor: "cursor_2", + }, + lookupObject: reconcile.ProviderObject{ID: "evt_missing", ObjectType: "event", EventType: "invoice.created", Recoverable: true, RawBody: []byte(`{"id":"evt_missing"}`), RequestHeaders: map[string]string{"Stripe-Signature": "redacted"}}, + lookupEvidence: []reconcile.Evidence{{Method: "GET", URL: "https://api.stripe.com/v1/events/evt_missing", StatusCode: 200, Body: []byte(`{"id":"evt_missing"}`)}}, + } + service := NewReconciliationService(store, fakeReconciliationRegistry{"stripe": adapter}) + + if err := service.RunReconciliationJob(context.Background(), "ten_1", "rec_1"); err != nil { + t.Fatal(err) + } + if !store.started || !store.completed { + t.Fatalf("expected reconciliation job to start and complete, started=%v completed=%v", store.started, store.completed) + } + if store.cursor != "cursor_2" { + t.Fatalf("expected cursor update, got %q", store.cursor) + } + if len(store.evidence) != 2 { + t.Fatalf("expected scan and lookup evidence, got %d", len(store.evidence)) + } + if len(store.captures) != 1 || string(store.captures[0].RawBody) != `{"id":"evt_missing"}` || !store.captures[0].RouteRecovered { + t.Fatalf("expected recovered event capture with route flag, got %+v", store.captures) + } + if len(store.items) != 1 || store.items[0].Outcome != domain.ReconciliationOutcomeCaptured || store.items[0].RecoveredEventID != "evt_recovered" { + t.Fatalf("expected captured reconciliation item, got %+v", store.items) + } +} + +func TestReconciliationServiceRequestsRedeliveryForFailedObject(t *testing.T) { + store := newFakeReconciliationStore() + store.work.Job = domain.ReconciliationJob{ + ID: "rec_1", TenantID: "ten_1", ConnectionID: "pcn_1", Provider: "github", State: domain.ReconciliationJobStateScheduled, + RedeliverFailed: true, + } + store.work.Connection = domain.ProviderConnection{ID: "pcn_1", TenantID: "ten_1", Provider: "github", CredentialType: "api_key", Config: map[string]string{}} + store.localEvents["evt_provider"] = "evt_local" + adapter := &fakeReconciliationAdapter{ + capabilities: reconcile.Capabilities{Provider: "github", CanScanEvents: true}, + scanResult: reconcile.ScanResult{Objects: []reconcile.ProviderObject{{ + ID: "evt_provider", ObjectType: "delivery", Failed: true, Redeliverable: true, Metadata: map[string]any{"delivery_id": "delivery_1"}, + }}}, + redeliveryEvidence: []reconcile.Evidence{{Method: "POST", URL: "https://api.github.com/redeliver", StatusCode: 202}}, + } + service := NewReconciliationService(store, fakeReconciliationRegistry{"github": adapter}) + + if err := service.RunReconciliationJob(context.Background(), "ten_1", "rec_1"); err != nil { + t.Fatal(err) + } + if adapter.redeliveryID != "delivery_1" { + t.Fatalf("expected redelivery lookup id from delivery metadata, got %q", adapter.redeliveryID) + } + if len(store.items) != 1 || store.items[0].Outcome != domain.ReconciliationOutcomeRedeliveryRequested || !store.items[0].RedeliveryRequested { + t.Fatalf("expected redelivery requested item, got %+v", store.items) + } + if len(store.evidence) != 1 || store.items[0].EvidenceID == "" { + t.Fatalf("expected redelivery evidence linked to item, evidence=%+v item=%+v", store.evidence, store.items) + } +} + +func TestReconciliationServiceUnsupportedScanRecordsUnrecoverableItem(t *testing.T) { + store := newFakeReconciliationStore() + store.work.Job = domain.ReconciliationJob{ID: "rec_1", TenantID: "ten_1", ConnectionID: "pcn_1", Provider: "slack", State: domain.ReconciliationJobStateScheduled} + store.work.Connection = domain.ProviderConnection{ID: "pcn_1", TenantID: "ten_1", Provider: "slack", Config: map[string]string{}} + adapter := &fakeReconciliationAdapter{capabilities: reconcile.Capabilities{Provider: "slack", CanScanEvents: false, Limitations: []string{"scan unsupported"}}} + service := NewReconciliationService(store, fakeReconciliationRegistry{"slack": adapter}) + + if err := service.RunReconciliationJob(context.Background(), "ten_1", "rec_1"); err != nil { + t.Fatal(err) + } + if adapter.scanCalled { + t.Fatal("unsupported provider should not scan") + } + if len(store.items) != 1 || store.items[0].Outcome != domain.ReconciliationOutcomeUnrecoverable { + t.Fatalf("expected unrecoverable capability item, got %+v", store.items) + } +} + +func TestReconciliationServiceDryRunCountsProviderObjects(t *testing.T) { + store := newFakeReconciliationStore() + store.connection = domain.ProviderConnection{ID: "pcn_1", TenantID: "ten_1", Provider: "stripe", CredentialType: "api_key", Config: map[string]string{}} + store.credential = "sk_test_secret" + store.localEvents["evt_local"] = "evt_1" + adapter := &fakeReconciliationAdapter{ + capabilities: reconcile.Capabilities{Provider: "stripe", CanScanEvents: true}, + scanResult: reconcile.ScanResult{Objects: []reconcile.ProviderObject{ + {ID: "evt_local", ObjectType: "event"}, + {ID: "evt_missing", ObjectType: "event", Failed: true, Redeliverable: true}, + }}, + } + service := NewReconciliationService(store, fakeReconciliationRegistry{"stripe": adapter}) + + job, err := service.DryRunReconciliation(context.Background(), "ten_1", ReconciliationJobRequest{ConnectionID: "pcn_1", RedeliverFailed: true}) + if err != nil { + t.Fatal(err) + } + if job.TotalItems != 2 || job.MatchedItems != 1 || job.MissingItems != 1 || job.RedeliveredItems != 1 { + t.Fatalf("unexpected dry-run counts: %+v", job) + } +} + +func TestProviderErrorForDBRedactsProviderSecrets(t *testing.T) { + got := providerErrorForDB(errors.New("provider failed with sk_live_secret")) + if got != "provider request failed" { + t.Fatalf("expected redacted provider error, got %q", got) + } +} + +type fakeReconciliationRegistry map[string]reconcile.Adapter + +func (r fakeReconciliationRegistry) Adapter(provider string) (reconcile.Adapter, bool) { + adapter, ok := r[provider] + return adapter, ok +} + +type fakeReconciliationAdapter struct { + capabilities reconcile.Capabilities + scanResult reconcile.ScanResult + scanErr error + scanCalled bool + lookupObject reconcile.ProviderObject + lookupEvidence []reconcile.Evidence + lookupErr error + lookupID string + redeliveryEvidence []reconcile.Evidence + redeliveryErr error + redeliveryID string +} + +func (f *fakeReconciliationAdapter) Name() string { return "fake" } + +func (f *fakeReconciliationAdapter) Capabilities(map[string]string) reconcile.Capabilities { + return f.capabilities +} + +func (f *fakeReconciliationAdapter) ValidateConnection(context.Context, reconcile.Connection) error { + return nil +} + +func (f *fakeReconciliationAdapter) Scan(context.Context, reconcile.ScanRequest) (reconcile.ScanResult, error) { + f.scanCalled = true + return f.scanResult, f.scanErr +} + +func (f *fakeReconciliationAdapter) Lookup(_ context.Context, _ reconcile.Connection, objectID string) (reconcile.ProviderObject, []reconcile.Evidence, error) { + f.lookupID = objectID + return f.lookupObject, f.lookupEvidence, f.lookupErr +} + +func (f *fakeReconciliationAdapter) RequestRedelivery(_ context.Context, _ reconcile.Connection, objectID string) ([]reconcile.Evidence, error) { + f.redeliveryID = objectID + return f.redeliveryEvidence, f.redeliveryErr +} + +type fakeReconciliationStore struct { + connection domain.ProviderConnection + credential string + work ReconciliationWork + started bool + completed bool + failed string + cursor string + localEvents map[string]string + evidence []ProviderAPIEvidenceRecord + captures []RecoveredProviderEventCapture + items []ReconciliationItemRecord +} + +func newFakeReconciliationStore() *fakeReconciliationStore { + return &fakeReconciliationStore{localEvents: map[string]string{}} +} + +func (f *fakeReconciliationStore) GetReconciliationConnection(context.Context, string, string) (domain.ProviderConnection, string, error) { + return f.connection, f.credential, nil +} + +func (f *fakeReconciliationStore) GetReconciliationWork(context.Context, string, string) (ReconciliationWork, error) { + return f.work, nil +} + +func (f *fakeReconciliationStore) StartReconciliationJob(context.Context, string, string) (bool, error) { + f.started = true + return true, nil +} + +func (f *fakeReconciliationStore) RecordProviderAPIEvidence(_ context.Context, record ProviderAPIEvidenceRecord) (string, error) { + id := fmt.Sprintf("pae_%d", len(f.evidence)+1) + f.evidence = append(f.evidence, record) + return id, nil +} + +func (f *fakeReconciliationStore) FindLocalProviderEvent(_ context.Context, _ string, _ domain.ProviderConnection, providerObjectID string) (string, error) { + return f.localEvents[providerObjectID], nil +} + +func (f *fakeReconciliationStore) CaptureRecoveredProviderEvent(_ context.Context, input RecoveredProviderEventCapture) (string, error) { + f.captures = append(f.captures, input) + return "evt_recovered", nil +} + +func (f *fakeReconciliationStore) InsertReconciliationItem(_ context.Context, input ReconciliationItemRecord) (string, error) { + if input.EvidenceID == "" && len(f.evidence) > 0 { + input.EvidenceID = fmt.Sprintf("pae_%d", len(f.evidence)) + } + f.items = append(f.items, input) + return "rci_1", nil +} + +func (f *fakeReconciliationStore) AttachProviderEvidenceToItem(context.Context, string, string, string) error { + return nil +} + +func (f *fakeReconciliationStore) UpdateReconciliationCursor(_ context.Context, _, _, cursor string) error { + f.cursor = cursor + return nil +} + +func (f *fakeReconciliationStore) CompleteReconciliationJob(context.Context, string, string) error { + f.completed = true + return nil +} + +func (f *fakeReconciliationStore) FailReconciliationJob(_ context.Context, _, _, errorText string) error { + f.failed = errorText + return nil +} + +var _ = time.Now diff --git a/internal/app/service_test.go b/internal/app/service_test.go index 895934e..2564a03 100644 --- a/internal/app/service_test.go +++ b/internal/app/service_test.go @@ -225,11 +225,61 @@ func TestIngestCloudEventsStructuredMetadata(t *testing.T) { t.Fatal(err) } if !res.Accepted { - t.Fatal("structured CloudEvents request should be accepted after capture") + t.Fatal("structured unsigned CloudEvents request should be accepted after evidence capture") + } + if store.last.VerificationOK || store.last.Event.Verified { + t.Fatalf("unsigned CloudEvents must not be marked verified: %+v", store.last.Event) + } + if store.last.VerifyReason != "unsigned_cloudevents" { + t.Fatalf("expected unsigned CloudEvents verification reason, got %q", store.last.VerifyReason) } if store.last.Event.ProviderID != "evt_cloud" || store.last.Event.Type != "customer.created" { t.Fatalf("unexpected CloudEvents metadata: %+v", store.last.Event) } + if len(store.last.Normalized.Envelope) != 0 { + t.Fatal("unsigned CloudEvents must not create a trusted normalized envelope") + } +} + +func TestIngestCloudEventsBinaryModeIsEvidenceOnly(t *testing.T) { + store := &fakeStore{source: domain.Source{ + ID: "src_cloud", + TenantID: "ten_123", + Provider: "cloudevents", + Adapter: "cloudevents", + State: domain.StateActive, + VerificationSecret: []byte("unused"), + }} + svc := NewIngestService(store, fixedClock(time.Unix(1_700_000_000, 0))) + + res, err := svc.Ingest(context.Background(), IngestRequest{ + TenantID: "ten_123", + SourceID: "src_cloud", + Provider: "cloudevents", + ContentType: "application/json", + RawBody: []byte(`{"amount":42}`), + Headers: []domain.HeaderPair{ + {Name: "Ce-Id", Value: "evt_binary"}, + {Name: "Ce-Type", Value: "customer.updated"}, + {Name: "Ce-Source", Value: "tests"}, + {Name: "Ce-Specversion", Value: "1.0"}, + }, + }) + if err != nil { + t.Fatal(err) + } + if !res.Accepted || !store.captured { + t.Fatalf("unsigned binary CloudEvents should be captured and acknowledged as evidence, result=%+v captured=%v", res, store.captured) + } + if store.last.VerificationOK || store.last.Event.Verified { + t.Fatalf("unsigned binary CloudEvents must not be marked verified: %+v", store.last.Event) + } + if store.last.Event.ProviderID != "evt_binary" || store.last.Event.Type != "customer.updated" { + t.Fatalf("unexpected binary CloudEvents metadata: %+v", store.last.Event) + } + if len(store.last.Normalized.Envelope) != 0 { + t.Fatal("unsigned binary CloudEvents must not create a trusted normalized envelope") + } } type fakeStore struct { diff --git a/internal/app/store_ports.go b/internal/app/store_ports.go new file mode 100644 index 0000000..9c552c8 --- /dev/null +++ b/internal/app/store_ports.go @@ -0,0 +1,218 @@ +package app + +import ( + "context" + + "webhookery/internal/domain" +) + +type APIKeyStore interface { + CreateAPIKey(ctx context.Context, input APIKeyCreateInput) (domain.APIKey, error) + ListAPIKeys(ctx context.Context, tenantID string, limit int) ([]domain.APIKey, error) + RevokeAPIKey(ctx context.Context, tenantID, apiKeyID, actorID, reason string) (domain.APIKey, error) +} + +type SourceStore interface { + CreateSource(ctx context.Context, source domain.Source) (domain.Source, error) + ListSources(ctx context.Context, tenantID string, limit int) ([]domain.Source, error) + GetSource(ctx context.Context, tenantID, sourceID string) (domain.Source, error) + UpdateSource(ctx context.Context, tenantID, sourceID, actorID string, req UpdateSourceRequest) (domain.Source, error) + DeleteSource(ctx context.Context, tenantID, sourceID, actorID, reason string) (domain.Source, error) + RotateSourceSecret(ctx context.Context, tenantID, sourceID, actorID string, req RotateSourceSecretRequest) (domain.SourceSecretVersion, error) +} + +type EndpointStore interface { + CreateEndpoint(ctx context.Context, endpoint domain.Endpoint) (domain.Endpoint, error) + ListEndpoints(ctx context.Context, tenantID string, limit int) ([]domain.Endpoint, error) + GetEndpoint(ctx context.Context, tenantID, endpointID string) (domain.Endpoint, error) + UpdateEndpoint(ctx context.Context, tenantID, endpointID, actorID string, req UpdateEndpointRequest) (domain.Endpoint, error) + DeleteEndpoint(ctx context.Context, tenantID, endpointID, actorID, reason string) (domain.Endpoint, error) + TestEndpoint(ctx context.Context, tenantID, endpointID, actorID, reason string) (domain.Delivery, error) + RotateEndpointSecret(ctx context.Context, tenantID, endpointID, actorID string, req RotateEndpointSecretRequest) (domain.EndpointSecretVersion, error) + ListEndpointHealth(ctx context.Context, tenantID string, limit int) ([]domain.EndpointHealth, error) +} + +type SubscriptionStore interface { + CreateSubscription(ctx context.Context, subscription domain.Subscription) (domain.Subscription, error) + ListSubscriptions(ctx context.Context, tenantID string, limit int) ([]domain.Subscription, error) + GetSubscription(ctx context.Context, tenantID, subscriptionID string) (domain.Subscription, error) + UpdateSubscription(ctx context.Context, tenantID, subscriptionID, actorID string, req UpdateSubscriptionRequest) (domain.Subscription, error) + DeleteSubscription(ctx context.Context, tenantID, subscriptionID, actorID, reason string) (domain.Subscription, error) +} + +type RouteStore interface { + CreateRoute(ctx context.Context, route domain.Route) (domain.Route, error) + ListRoutes(ctx context.Context, tenantID string, limit int) ([]domain.Route, error) + GetRoute(ctx context.Context, tenantID, routeID string) (domain.Route, error) + UpdateRoute(ctx context.Context, tenantID, routeID, actorID string, req UpdateRouteRequest) (domain.Route, error) + DeleteRoute(ctx context.Context, tenantID, routeID, actorID, reason string) (domain.Route, error) + ListRouteVersions(ctx context.Context, tenantID, routeID string, limit int) ([]domain.RouteVersion, error) + ActivateRoute(ctx context.Context, tenantID, routeID, actorID, reason string) (domain.Route, error) + DryRunRoute(ctx context.Context, tenantID, routeID, eventID string) (RouteDryRun, error) + CreateRetryPolicy(ctx context.Context, tenantID, actorID string, req CreateRetryPolicyRequest) (domain.RetryPolicy, error) + ListRetryPolicies(ctx context.Context, tenantID string, limit int) ([]domain.RetryPolicy, error) + GetRetryPolicy(ctx context.Context, tenantID, retryPolicyID string) (domain.RetryPolicy, error) + UpdateRetryPolicy(ctx context.Context, tenantID, retryPolicyID, actorID string, req UpdateRetryPolicyRequest) (domain.RetryPolicy, error) + DeleteRetryPolicy(ctx context.Context, tenantID, retryPolicyID, actorID, reason string) (domain.RetryPolicy, error) +} + +type SchemaStore interface { + CreateEventType(ctx context.Context, eventType domain.EventType) (domain.EventType, error) + ListEventTypes(ctx context.Context, tenantID string, limit int) ([]domain.EventType, error) + GetEventType(ctx context.Context, tenantID, eventType string) (domain.EventType, error) + UpdateEventType(ctx context.Context, tenantID, eventType, actorID string, req UpdateEventTypeRequest) (domain.EventType, error) + DeleteEventType(ctx context.Context, tenantID, eventType, actorID, reason string) (domain.EventType, error) + CreateEventSchema(ctx context.Context, schema domain.EventSchema) (domain.EventSchema, error) + ListEventSchemas(ctx context.Context, tenantID, eventType string, limit int) ([]domain.EventSchema, error) + GetEventSchema(ctx context.Context, tenantID, eventType, version string) (domain.EventSchema, error) + UpdateEventSchema(ctx context.Context, tenantID, eventType, version, actorID string, req UpdateEventSchemaRequest) (domain.EventSchema, error) + DeleteEventSchema(ctx context.Context, tenantID, eventType, version, actorID, reason string) (domain.EventSchema, error) +} + +type EventStore interface { + ListEvents(ctx context.Context, tenantID string, req EventSearchRequest) ([]domain.Event, error) + GetEvent(ctx context.Context, tenantID, eventID string) (domain.Event, error) + GetRawPayload(ctx context.Context, tenantID, eventID, actorID, reason string) (domain.RawPayload, error) + GetNormalizedEvent(ctx context.Context, tenantID, eventID, actorID string, includeData bool) (domain.NormalizedEnvelope, error) + ListEventTimeline(ctx context.Context, tenantID, eventID string, limit int) ([]EventTimelineEntry, error) +} + +type IncidentStore interface { + CreateIncident(ctx context.Context, incident domain.Incident) (domain.Incident, error) + ListIncidents(ctx context.Context, tenantID string, limit int) ([]domain.Incident, error) + GetIncident(ctx context.Context, tenantID, incidentID string) (domain.Incident, error) + AddIncidentEvent(ctx context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) + RemoveIncidentEvent(ctx context.Context, tenantID, incidentID, eventID, actorID, reason string) (domain.IncidentEvent, error) + ListIncidentEvents(ctx context.Context, tenantID, incidentID string) ([]domain.IncidentEvent, error) + CreateIncidentReportSnapshot(ctx context.Context, tenantID, incidentID, actorID, reason string, report IncidentReport, markdown string) (domain.IncidentReportSnapshot, error) + GetIncidentReportSnapshot(ctx context.Context, tenantID, incidentID string) (domain.IncidentReportSnapshot, error) + CreateIncidentEvidenceExport(ctx context.Context, tenantID, incidentID, actorID string, req CreateIncidentEvidenceExportRequest, report IncidentReport, markdown string) (domain.IncidentEvidenceExport, domain.EvidenceExport, error) +} + +type DeliveryStore interface { + ListDeliveries(ctx context.Context, tenantID string, limit int) ([]domain.Delivery, error) + ListDeliveryAttempts(ctx context.Context, tenantID, deliveryID string, limit int) ([]domain.DeliveryAttempt, error) + GetDeliveryAttempt(ctx context.Context, tenantID, attemptID string) (domain.DeliveryAttempt, error) + RetryDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.Delivery, error) + CancelDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.Delivery, error) +} + +type OpsStore interface { + OpsMetrics(ctx context.Context, tenantID string) (domain.OpsMetrics, error) + ListWorkers(ctx context.Context, tenantID string, limit int) ([]domain.WorkerStatus, error) + GetWorker(ctx context.Context, tenantID, workerID string) (domain.WorkerStatus, error) + ListQueues(ctx context.Context, tenantID string) ([]domain.QueueStats, error) + OpsStorage(ctx context.Context, tenantID string) (domain.OpsStorageStatus, error) + ListMetricRollups(ctx context.Context, tenantID, metricName string, limit int) ([]domain.MetricRollup, error) +} + +type SignalStore interface { + CreateAlertRule(ctx context.Context, tenantID, actorID string, req CreateAlertRuleRequest) (domain.AlertRule, error) + ListAlertRules(ctx context.Context, tenantID string, limit int) ([]domain.AlertRule, error) + GetAlertRule(ctx context.Context, tenantID, alertID string) (domain.AlertRule, error) + UpdateAlertRule(ctx context.Context, tenantID, alertID, actorID string, req UpdateAlertRuleRequest) (domain.AlertRule, error) + DeleteAlertRule(ctx context.Context, tenantID, alertID, actorID, reason string) (domain.AlertRule, error) + ListAlertFirings(ctx context.Context, tenantID, state string, limit int) ([]domain.AlertFiring, error) + GetAlertFiring(ctx context.Context, tenantID, firingID string) (domain.AlertFiring, error) + AcknowledgeAlertFiring(ctx context.Context, tenantID, firingID, actorID, reason string) (domain.AlertFiring, error) + CreateNotificationChannel(ctx context.Context, tenantID, actorID string, req CreateNotificationChannelRequest) (domain.NotificationChannel, error) + ListNotificationChannels(ctx context.Context, tenantID string, limit int) ([]domain.NotificationChannel, error) + GetNotificationChannel(ctx context.Context, tenantID, channelID string) (domain.NotificationChannel, error) + UpdateNotificationChannel(ctx context.Context, tenantID, channelID, actorID string, req UpdateNotificationChannelRequest) (domain.NotificationChannel, error) + DeleteNotificationChannel(ctx context.Context, tenantID, channelID, actorID, reason string) (domain.NotificationChannel, error) + TestNotificationChannel(ctx context.Context, tenantID, channelID, actorID, reason string) (domain.NotificationDelivery, error) + ListNotificationDeliveries(ctx context.Context, tenantID, state string, limit int) ([]domain.NotificationDelivery, error) + ListNotificationDeliveryAttempts(ctx context.Context, tenantID, deliveryID string, limit int) ([]domain.NotificationDeliveryAttempt, error) + RetryNotificationDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.NotificationDelivery, error) + CreateSIEMSink(ctx context.Context, tenantID, actorID string, req CreateSIEMSinkRequest) (domain.SIEMSink, error) + ListSIEMSinks(ctx context.Context, tenantID string, limit int) ([]domain.SIEMSink, error) + GetSIEMSink(ctx context.Context, tenantID, sinkID string) (domain.SIEMSink, error) + UpdateSIEMSink(ctx context.Context, tenantID, sinkID, actorID string, req UpdateSIEMSinkRequest) (domain.SIEMSink, error) + DeleteSIEMSink(ctx context.Context, tenantID, sinkID, actorID, reason string) (domain.SIEMSink, error) + TestSIEMSink(ctx context.Context, tenantID, sinkID, actorID, reason string) (domain.SIEMDelivery, error) + ListSIEMDeliveries(ctx context.Context, tenantID, state string, limit int) ([]domain.SIEMDelivery, error) + ListSIEMDeliveryAttempts(ctx context.Context, tenantID, deliveryID string, limit int) ([]domain.SIEMDeliveryAttempt, error) + RetrySIEMDelivery(ctx context.Context, tenantID, deliveryID, actorID, reason string) (domain.SIEMDelivery, error) +} + +type AuditStore interface { + ListAuditEvents(ctx context.Context, tenantID string, limit int) ([]domain.AuditEvent, error) + GetAuditChainHead(ctx context.Context, tenantID string) (domain.AuditChainHead, error) + VerifyAuditChain(ctx context.Context, tenantID string, req AuditChainVerifyRequest) (domain.AuditChainVerification, error) + CreateAuditChainAnchor(ctx context.Context, tenantID, actorID string, req AuditChainAnchorRequest) (domain.AuditChainAnchor, error) + ListAuditChainAnchors(ctx context.Context, tenantID string, limit int) ([]domain.AuditChainAnchor, error) + GetAuditChainAnchor(ctx context.Context, tenantID, anchorID string) (domain.AuditChainAnchor, error) +} + +type RetentionStore interface { + ListRetentionPolicies(ctx context.Context, tenantID string, limit int) ([]domain.RetentionPolicy, error) + CreateRetentionPolicy(ctx context.Context, tenantID, actorID string, req CreateRetentionPolicyRequest) (domain.RetentionPolicy, error) + UpdateRetentionPolicy(ctx context.Context, tenantID, policyID, actorID string, req UpdateRetentionPolicyRequest) (domain.RetentionPolicy, error) +} + +type ProviderConnectionStore interface { + CreateProviderConnection(ctx context.Context, tenantID, actorID string, req CreateProviderConnectionRequest) (domain.ProviderConnection, error) + ListProviderConnections(ctx context.Context, tenantID string, limit int) ([]domain.ProviderConnection, error) + GetProviderConnection(ctx context.Context, tenantID, connectionID string) (domain.ProviderConnection, error) + VerifyProviderConnection(ctx context.Context, tenantID, connectionID, actorID, reason string) (domain.ProviderConnection, error) + RevokeProviderConnection(ctx context.Context, tenantID, connectionID, actorID, reason string) (domain.ProviderConnection, error) +} + +type ReconciliationStore interface { + CreateReconciliationJob(ctx context.Context, tenantID, actorID string, req ReconciliationJobRequest) (domain.ReconciliationJob, error) + ListReconciliationJobs(ctx context.Context, tenantID string, limit int) ([]domain.ReconciliationJob, error) + GetReconciliationJob(ctx context.Context, tenantID, jobID string) (domain.ReconciliationJob, error) + ListReconciliationItems(ctx context.Context, tenantID, jobID string, limit int) ([]domain.ReconciliationItem, error) + CancelReconciliationJob(ctx context.Context, tenantID, jobID, actorID, reason string) (domain.ReconciliationJob, error) +} + +type ProviderAdapterStore interface { + CreateProviderAdapter(ctx context.Context, tenantID, actorID string, req CreateProviderAdapterRequest) (domain.ProviderAdapter, error) + ListProviderAdapters(ctx context.Context, tenantID string, limit int) ([]domain.ProviderAdapter, error) + GetProviderAdapter(ctx context.Context, tenantID, adapterID string) (domain.ProviderAdapter, error) + CreateAdapterVersion(ctx context.Context, tenantID, adapterID, actorID string, req CreateAdapterVersionRequest) (domain.AdapterVersion, error) + ListAdapterVersions(ctx context.Context, tenantID, adapterID string, limit int) ([]domain.AdapterVersion, error) + CreateAdapterTestVector(ctx context.Context, tenantID, adapterID, versionID, actorID string, req CreateAdapterTestVectorRequest) (domain.AdapterTestVector, error) + TransitionAdapterVersion(ctx context.Context, tenantID, adapterID, versionID, actorID string, req AdapterVersionTransitionRequest) (domain.AdapterVersion, error) +} + +type EvidenceExportStore interface { + CreateAuditExport(ctx context.Context, tenantID, actorID string, req CreateAuditExportRequest) (domain.EvidenceExport, error) + ListAuditExports(ctx context.Context, tenantID string, limit int) ([]domain.EvidenceExport, error) + GetAuditExport(ctx context.Context, tenantID, exportID string) (domain.EvidenceExport, error) + DownloadAuditExport(ctx context.Context, tenantID, exportID, actorID string) (EvidenceExportDownload, error) +} + +type DeadLetterStore interface { + ListDeadLetter(ctx context.Context, tenantID string, limit int) ([]map[string]any, error) + ReleaseDeadLetter(ctx context.Context, tenantID, entryID, actorID, reasonCode, reason string) (ReplayJob, error) + BulkReleaseDeadLetter(ctx context.Context, tenantID string, entryIDs []string, actorID, reasonCode, reason string) ([]ReplayJob, error) + ListQuarantine(ctx context.Context, tenantID string, limit int) ([]map[string]any, error) + ApproveQuarantine(ctx context.Context, tenantID, entryID, actorID, reason string, routeAfterRelease bool) (map[string]any, error) + RejectQuarantine(ctx context.Context, tenantID, entryID, actorID, reason string) (map[string]any, error) +} + +type ReplayStore interface { + DryRunReplay(ctx context.Context, tenantID string, req ReplayRequest) (ReplayDryRun, error) + CreateReplay(ctx context.Context, tenantID, actorID string, req ReplayRequest) (ReplayJob, error) + ListReplayJobs(ctx context.Context, tenantID string, limit int) ([]ReplayJob, error) + ApproveReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) + PauseReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) + ResumeReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) + CancelReplayJob(ctx context.Context, tenantID, replayJobID, actorID, reason string) (ReplayJob, error) +} + +type ReplayApprovalPolicyStore interface { + CreateReplayApprovalPolicy(ctx context.Context, tenantID, actorID string, req CreateReplayApprovalPolicyRequest) (domain.ReplayApprovalPolicy, error) + ListReplayApprovalPolicies(ctx context.Context, tenantID string, limit int) ([]domain.ReplayApprovalPolicy, error) + DisableReplayApprovalPolicy(ctx context.Context, tenantID, policyID, actorID, reason string) (domain.ReplayApprovalPolicy, error) +} + +type TransformationStore interface { + CreateTransformation(ctx context.Context, tenantID, actorID string, req CreateTransformationRequest) (domain.Transformation, error) + ListTransformations(ctx context.Context, tenantID string, limit int) ([]domain.Transformation, error) + GetTransformation(ctx context.Context, tenantID, transformationID string) (domain.Transformation, error) + CreateTransformationVersion(ctx context.Context, tenantID, transformationID, actorID string, req CreateTransformationVersionRequest) (domain.TransformationVersion, error) + ListTransformationVersions(ctx context.Context, tenantID, transformationID string, limit int) ([]domain.TransformationVersion, error) + ActivateTransformationVersion(ctx context.Context, tenantID, transformationID, versionID, actorID, reason string) (domain.TransformationVersion, error) +} diff --git a/internal/authz/policy.go b/internal/authz/policy.go index e185422..4cb7ac1 100644 --- a/internal/authz/policy.go +++ b/internal/authz/policy.go @@ -73,19 +73,20 @@ func roleAllows(role Role, scope string) bool { "events:read", "events:write", "deliveries:read", "deliveries:retry", "replay:read", "replay:write", + "incidents:read", "incidents:write", }) case RoleOperator: return hasScope(scope, []string{ "sources:read", "endpoints:read", "subscriptions:read", "routes:read", "schemas:read", "events:read", "events:write", "deliveries:read", "deliveries:retry", "replay:read", "replay:write", - "ops:read", "ops:write", + "incidents:read", "incidents:write", "ops:read", "ops:write", }) case RoleSecurity: - return hasScope(scope, []string{"security:read", "security:write", "audit:read", "events:read", "events:raw"}) + return hasScope(scope, []string{"security:read", "security:write", "audit:read", "events:read", "events:raw", "incidents:read", "incidents:write"}) case RoleAuditor: - return hasScope(scope, []string{"audit:read", "events:read", "events:raw", "deliveries:read", "replay:read", "security:read"}) + return hasScope(scope, []string{"audit:read", "events:read", "events:raw", "deliveries:read", "replay:read", "security:read", "incidents:read"}) case RoleSupport: - return hasScope(scope, []string{"events:read", "deliveries:read", "replay:read"}) + return hasScope(scope, []string{"events:read", "deliveries:read", "replay:read", "incidents:read"}) default: return false } diff --git a/internal/config/config.go b/internal/config/config.go index 4aa2059..9c9bad0 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -3,8 +3,10 @@ package config import ( "encoding/base64" "fmt" + "net/netip" "os" "strconv" + "strings" ) type Config struct { @@ -34,6 +36,7 @@ type Config struct { BootstrapTenantID string BootstrapAPIKeyHash string BootstrapAPIKeyPrefix string + TrustedProxyCIDRs []netip.Prefix } func Load() (Config, error) { @@ -63,6 +66,11 @@ func Load() (Config, error) { BootstrapAPIKeyHash: os.Getenv("WEBHOOKERY_BOOTSTRAP_API_KEY_HASH"), BootstrapAPIKeyPrefix: os.Getenv("WEBHOOKERY_BOOTSTRAP_API_KEY_PREFIX"), } + trustedProxyCIDRs, err := parseTrustedProxyCIDRs(os.Getenv("WEBHOOKERY_TRUSTED_PROXY_CIDRS")) + if err != nil { + return Config{}, err + } + cfg.TrustedProxyCIDRs = trustedProxyCIDRs enableUI, err := strconv.ParseBool(envDefault("WEBHOOKERY_ENABLE_UI", "false")) if err != nil { return Config{}, fmt.Errorf("WEBHOOKERY_ENABLE_UI must be boolean: %w", err) @@ -118,3 +126,24 @@ func envDefault(name, fallback string) string { } return fallback } + +func parseTrustedProxyCIDRs(raw string) ([]netip.Prefix, error) { + raw = strings.TrimSpace(raw) + if raw == "" { + return nil, nil + } + parts := strings.Split(raw, ",") + out := make([]netip.Prefix, 0, len(parts)) + for _, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + prefix, err := netip.ParsePrefix(part) + if err != nil { + return nil, fmt.Errorf("WEBHOOKERY_TRUSTED_PROXY_CIDRS contains invalid CIDR %q: %w", part, err) + } + out = append(out, prefix.Masked()) + } + return out, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index bd62521..55817f6 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1,6 +1,9 @@ package config -import "testing" +import ( + "net/netip" + "testing" +) func TestLoadDefaultsRawStorageToPostgres(t *testing.T) { t.Setenv("WEBHOOKERY_DATABASE_URL", "postgres://example") @@ -92,3 +95,31 @@ func TestLoadRequiresTLSFilesForProducerMTLSCA(t *testing.T) { t.Fatal("expected producer mTLS CA to require API TLS certificate and key files") } } + +func TestLoadParsesTrustedProxyCIDRs(t *testing.T) { + t.Setenv("WEBHOOKERY_DATABASE_URL", "postgres://example") + t.Setenv("WEBHOOKERY_TRUSTED_PROXY_CIDRS", "10.0.0.0/8,2001:db8::/32") + + cfg, err := Load() + if err != nil { + t.Fatal(err) + } + expected := []netip.Prefix{netip.MustParsePrefix("10.0.0.0/8"), netip.MustParsePrefix("2001:db8::/32")} + if len(cfg.TrustedProxyCIDRs) != len(expected) { + t.Fatalf("trusted proxy CIDRs=%v want %v", cfg.TrustedProxyCIDRs, expected) + } + for i := range expected { + if cfg.TrustedProxyCIDRs[i] != expected[i] { + t.Fatalf("trusted proxy CIDR[%d]=%v want %v", i, cfg.TrustedProxyCIDRs[i], expected[i]) + } + } +} + +func TestLoadRejectsInvalidTrustedProxyCIDR(t *testing.T) { + t.Setenv("WEBHOOKERY_DATABASE_URL", "postgres://example") + t.Setenv("WEBHOOKERY_TRUSTED_PROXY_CIDRS", "not-a-cidr") + + if _, err := Load(); err == nil { + t.Fatal("expected invalid trusted proxy CIDR error") + } +} diff --git a/internal/domain/metrics_test.go b/internal/domain/metrics_test.go index ddca658..1df94fb 100644 --- a/internal/domain/metrics_test.go +++ b/internal/domain/metrics_test.go @@ -12,3 +12,21 @@ func TestMetricDimensionsHashIsDeterministic(t *testing.T) { t.Fatal("different dimensions should produce a different hash") } } + +func TestCanonicalHeadersLowercasesNamesAndPreservesDuplicateOrder(t *testing.T) { + headers := CanonicalHeaders([]HeaderPair{ + {Name: "Stripe-Signature", Value: "v1=first"}, + {Name: "stripe-signature", Value: "v1=second"}, + {Name: "X-Webhookery-Trace", Value: "trace-1"}, + }) + + if got := headers["stripe-signature"]; len(got) != 2 || got[0] != "v1=first" || got[1] != "v1=second" { + t.Fatalf("signature headers were not canonicalized in order: %#v", got) + } + if got := headers["x-webhookery-trace"]; len(got) != 1 || got[0] != "trace-1" { + t.Fatalf("trace header was not canonicalized: %#v", got) + } + if _, ok := headers["Stripe-Signature"]; ok { + t.Fatalf("mixed-case key should not remain in canonical map: %#v", headers) + } +} diff --git a/internal/domain/types.go b/internal/domain/types.go index c380641..f718635 100644 --- a/internal/domain/types.go +++ b/internal/domain/types.go @@ -67,6 +67,7 @@ const ( ReconciliationOutcomeUnrecoverable = "unrecoverable" ReconciliationOutcomeFailed = "failed" VerificationReasonProviderAPIReconcile = "provider_api_reconciliation" + VerificationReasonUnsignedCloudEvents = "unsigned_cloudevents" ProviderAPIEvidenceStorageStatusStored = "stored" ProviderAPIEvidenceStorageStatusDeleted = "deleted" ProviderAPIEvidenceStorageStatusMetadata = "metadata_only" @@ -372,6 +373,20 @@ type AccessPolicyRule struct { UpdatedAt time.Time `json:"updated_at"` } +type ReplayApprovalPolicy struct { + ID string `json:"id"` + TenantID string `json:"tenant_id"` + ScopeType string `json:"scope_type"` + ScopeID string `json:"scope_id,omitempty"` + RequireApproval bool `json:"require_approval"` + DefaultExpirySeconds int `json:"default_expiry_seconds"` + State string `json:"state"` + Reason string `json:"reason,omitempty"` + CreatedBy string `json:"created_by,omitempty"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + type AuthzDecisionLog struct { ID string `json:"id"` TenantID string `json:"tenant_id"` @@ -543,6 +558,47 @@ type Event struct { TraceID string `json:"trace_id"` } +type Incident struct { + ID string `json:"id"` + TenantID string `json:"tenant_id"` + Title string `json:"title"` + Reason string `json:"reason"` + State string `json:"state"` + CreatedBy string `json:"created_by"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type IncidentEvent struct { + ID string `json:"id"` + TenantID string `json:"tenant_id"` + IncidentID string `json:"incident_id"` + EventID string `json:"event_id"` + AddedBy string `json:"added_by"` + Reason string `json:"reason"` + CreatedAt time.Time `json:"created_at"` +} + +type IncidentReportSnapshot struct { + ID string `json:"id"` + TenantID string `json:"tenant_id"` + IncidentID string `json:"incident_id"` + SchemaVersion string `json:"schema_version"` + Report json.RawMessage `json:"report"` + Markdown string `json:"markdown"` + GeneratedBy string `json:"generated_by"` + GeneratedAt time.Time `json:"generated_at"` +} + +type IncidentEvidenceExport struct { + ID string `json:"id"` + TenantID string `json:"tenant_id"` + IncidentID string `json:"incident_id"` + ExportID string `json:"export_id"` + CreatedBy string `json:"created_by"` + CreatedAt time.Time `json:"created_at"` +} + type ProviderAdapter struct { ID string `json:"id"` TenantID string `json:"tenant_id,omitempty"` diff --git a/internal/e2e/perf_smoke_test.go b/internal/e2e/perf_smoke_test.go new file mode 100644 index 0000000..a63dcd2 --- /dev/null +++ b/internal/e2e/perf_smoke_test.go @@ -0,0 +1,273 @@ +package e2e + +import ( + "context" + "encoding/json" + "fmt" + "math" + "net/http" + "os" + "path/filepath" + "sort" + "strings" + "testing" + "time" + + "webhookery/internal/adapters/postgres" + "webhookery/internal/app" + "webhookery/internal/authz" + "webhookery/internal/domain" + "webhookery/internal/ssrf" + "webhookery/internal/worker" + "webhookery/pkg/verifier" +) + +type perfSmokeReport struct { + GeneratedAtUTC string `json:"generated_at_utc"` + Scenario string `json:"scenario"` + EventCount int `json:"event_count"` + IngestP50MS float64 `json:"ingest_p50_ms"` + IngestP95MS float64 `json:"ingest_p95_ms"` + IngestP99MS float64 `json:"ingest_p99_ms"` + DeliveryDrainMS float64 `json:"delivery_drain_ms"` + DeliveryThroughputPerSec float64 `json:"delivery_throughput_per_sec"` + ReplayCreateAndDrainMS float64 `json:"replay_create_and_drain_ms"` + RetryScheduledDeliveries int `json:"retry_scheduled_deliveries"` + SuccessfulDeliveries int `json:"successful_deliveries"` + ErrorCount int `json:"error_count"` + SanitizedEvidenceStatement string `json:"sanitized_evidence_statement"` + Notes []string `json:"notes"` +} + +func TestPerfSmoke(t *testing.T) { + ctx, store, actor := openRCStore(t) + defer store.Close() + + now := time.Date(2026, 5, 26, 12, 0, 0, 0, time.UTC) + control := app.NewControlService(store, ssrf.Validator{Resolver: rcResolver}) + source, _ := createRCRoute(t, ctx, control, actor, "stripe", "stripe", "invoice.perf_smoke") + ingest := app.NewIngestService(store, fixedClock{now: now}) + + const eventCount = 25 + ingestDurations := make([]time.Duration, 0, eventCount) + eventIDs := make([]string, 0, eventCount) + for i := 0; i < eventCount; i++ { + body := []byte(fmt.Sprintf(`{"id":"evt_perf_%s_%02d","type":"invoice.perf_smoke","data":{"object":{"id":"in_%02d"}}}`, testSuffix(t), i, i)) + start := time.Now() + result, err := ingest.Ingest(ctx, app.IngestRequest{ + TenantID: actor.TenantID, + SourceID: source.ID, + Provider: "stripe", + RawBody: body, + Headers: []domain.HeaderPair{{Name: "Stripe-Signature", Value: stripeSignature(now, body)}}, + ContentType: "application/json", + RemoteIP: "198.51.100.10", + }) + ingestDurations = append(ingestDurations, time.Since(start)) + if err != nil { + t.Fatalf("perf ingest event %d: %v", i, err) + } + if !result.Accepted || result.EventID == "" { + t.Fatalf("perf ingest event %d was not accepted durably: %+v", i, result) + } + eventIDs = append(eventIDs, result.EventID) + } + + delivery := &recordingDeliveryClient{ + t: t, + now: now.Add(time.Second), + result: workerSuccessResult(), + } + drainStart := time.Now() + for i := 0; i < 4 && len(delivery.calls) < eventCount; i++ { + runWorkerOnce(t, ctx, store, delivery, "perf-drain-"+testSuffix(t)) + } + deliveryDrain := time.Since(drainStart) + if len(delivery.calls) != eventCount { + t.Fatalf("expected %d successful deliveries, got %d", eventCount, len(delivery.calls)) + } + + replayStart := time.Now() + if _, err := control.CreateReplay(ctx, actor, app.ReplayRequest{ + EventID: eventIDs[0], + ReasonCode: app.ReplayReasonTestDrill, + Reason: "performance smoke replay", + ConfigMode: app.ReplayConfigCurrent, + }); err != nil { + t.Fatalf("create perf replay: %v", err) + } + for i := 0; i < 3 && len(delivery.calls) < eventCount+1; i++ { + runWorkerOnce(t, ctx, store, delivery, "perf-replay-"+testSuffix(t)) + } + replayDrain := time.Since(replayStart) + if len(delivery.calls) < eventCount+1 { + t.Fatalf("expected replay delivery to drain, got %d calls", len(delivery.calls)) + } + + retryScheduled := scheduleOneRetry(t, ctx, store, control, actor, source.ID, now) + report := perfSmokeReport{ + GeneratedAtUTC: time.Now().UTC().Format(time.RFC3339), + Scenario: "local-postgres-fake-provider-fake-receiver", + EventCount: eventCount, + IngestP50MS: millis(percentileDuration(ingestDurations, 50)), + IngestP95MS: millis(percentileDuration(ingestDurations, 95)), + IngestP99MS: millis(percentileDuration(ingestDurations, 99)), + DeliveryDrainMS: millis(deliveryDrain), + DeliveryThroughputPerSec: safeRate(float64(eventCount), deliveryDrain), + ReplayCreateAndDrainMS: millis(replayDrain), + RetryScheduledDeliveries: retryScheduled, + SuccessfulDeliveries: len(delivery.calls), + ErrorCount: 0, + SanitizedEvidenceStatement: "contains aggregate timings and counts only; no database URLs, endpoint URLs, secrets, signatures, raw payloads, tenant IDs, or customer data", + Notes: []string{ + "local fake Stripe-style signatures only", + "local fake receiver only", + "smoke values are release evidence, not universal performance guarantees", + }, + } + writePerfSmokeReport(t, report) +} + +func scheduleOneRetry(t *testing.T, ctx context.Context, store *postgres.Store, control *app.ControlService, actor authz.Actor, sourceID string, now time.Time) int { + t.Helper() + retryPolicy, err := control.CreateRetryPolicy(ctx, actor, app.CreateRetryPolicyRequest{ + Name: "perf retry", + MaxAttempts: 2, + MaxDurationSeconds: 60, + InitialDelaySeconds: 1, + MaxDelaySeconds: 1, + }) + if err != nil { + t.Fatalf("create perf retry policy: %v", err) + } + endpoint, _, err := control.CreateEndpoint(ctx, actor, app.CreateEndpointRequest{ + Name: "Perf retry receiver", + URL: "https://receiver.example.com/retry", + }) + if err != nil { + t.Fatalf("create perf retry endpoint: %v", err) + } + if _, err := control.CreateRoute(ctx, actor, app.CreateRouteRequest{ + SourceID: sourceID, + Name: "Perf retry route", + Priority: 20, + EventTypes: []string{"invoice.perf_retry"}, + EndpointID: endpoint.ID, + RetryPolicyID: retryPolicy.ID, + State: domain.StateActive, + }); err != nil { + t.Fatalf("create perf retry route: %v", err) + } + ingest := app.NewIngestService(store, fixedClock{now: now}) + body := []byte(`{"id":"evt_perf_retry_` + testSuffix(t) + `","type":"invoice.perf_retry","data":{"object":{"id":"retry"}}}`) + result, err := ingest.Ingest(ctx, app.IngestRequest{ + TenantID: actor.TenantID, + SourceID: sourceID, + Provider: "stripe", + RawBody: body, + Headers: []domain.HeaderPair{{Name: "Stripe-Signature", Value: stripeSignature(now, body)}}, + ContentType: "application/json", + RemoteIP: "198.51.100.10", + }) + if err != nil { + t.Fatalf("ingest perf retry event: %v", err) + } + failingDelivery := &recordingDeliveryClient{ + t: t, + now: now.Add(2 * time.Second), + result: worker.DeliveryResult{ + StatusCode: http.StatusServiceUnavailable, + ResponseBody: []byte("temporary failure"), + FailureClass: "http_5xx", + }, + } + runWorkerOnce(t, ctx, store, failingDelivery, "perf-retry-"+testSuffix(t)) + deliveries, err := control.ListDeliveries(ctx, actor, 200) + if err != nil { + t.Fatalf("list perf retry deliveries: %v", err) + } + scheduled := 0 + for _, delivery := range deliveries { + if delivery.EventID == result.EventID && delivery.State == "scheduled" && delivery.AttemptCount == 1 { + scheduled++ + } + } + if scheduled == 0 { + t.Fatalf("expected retry delivery to be rescheduled after first failure") + } + return scheduled +} + +func workerSuccessResult() worker.DeliveryResult { + return worker.DeliveryResult{ + StatusCode: http.StatusAccepted, + ResponseBody: []byte("ok"), + FailureClass: "success", + } +} + +func stripeSignature(now time.Time, body []byte) string { + return verifier.TimestampedHeader("v1", now, []byte("whsec_rc"), body) +} + +func writePerfSmokeReport(t *testing.T, report perfSmokeReport) { + t.Helper() + outDir := os.Getenv("WEBHOOKERY_PERF_OUTPUT_DIR") + if strings.TrimSpace(outDir) == "" { + outDir = filepath.Join("..", "..", "tmp", "perf-smoke") + } + if err := os.MkdirAll(outDir, 0o700); err != nil { // #nosec G703 -- perf smoke writes to an operator-selected local evidence directory. + t.Fatalf("create perf smoke output dir: %v", err) + } + jsonBody, err := json.MarshalIndent(report, "", " ") + if err != nil { + t.Fatalf("marshal perf smoke report: %v", err) + } + if err := os.WriteFile(filepath.Join(outDir, "perf-smoke.json"), append(jsonBody, '\n'), 0o600); err != nil { // #nosec G703 -- perf smoke writes sanitized evidence to an operator-selected local directory. + t.Fatalf("write perf smoke json: %v", err) + } + markdown := fmt.Sprintf(`# Webhookery Performance Smoke + +- Scenario: %s +- Events: %d +- Ingest p50/p95/p99: %.3f / %.3f / %.3f ms +- Delivery drain: %.3f ms +- Delivery throughput: %.3f deliveries/sec +- Replay create and drain: %.3f ms +- Retry scheduled deliveries: %d +- Successful deliveries: %d +- Errors: %d + +%s. +`, report.Scenario, report.EventCount, report.IngestP50MS, report.IngestP95MS, report.IngestP99MS, report.DeliveryDrainMS, report.DeliveryThroughputPerSec, report.ReplayCreateAndDrainMS, report.RetryScheduledDeliveries, report.SuccessfulDeliveries, report.ErrorCount, report.SanitizedEvidenceStatement) + if err := os.WriteFile(filepath.Join(outDir, "perf-smoke.md"), []byte(markdown), 0o600); err != nil { // #nosec G703 -- perf smoke writes sanitized evidence to an operator-selected local directory. + t.Fatalf("write perf smoke markdown: %v", err) + } +} + +func percentileDuration(values []time.Duration, p float64) time.Duration { + if len(values) == 0 { + return 0 + } + sorted := append([]time.Duration(nil), values...) + sort.Slice(sorted, func(i, j int) bool { return sorted[i] < sorted[j] }) + rank := int(math.Ceil((p/100)*float64(len(sorted)))) - 1 + if rank < 0 { + rank = 0 + } + if rank >= len(sorted) { + rank = len(sorted) - 1 + } + return sorted[rank] +} + +func millis(d time.Duration) float64 { + return float64(d.Microseconds()) / 1000 +} + +func safeRate(count float64, d time.Duration) float64 { + if d <= 0 { + return count + } + return count / d.Seconds() +} diff --git a/internal/e2e/rc_e2e_test.go b/internal/e2e/rc_e2e_test.go index c82ba9c..e5ebebc 100644 --- a/internal/e2e/rc_e2e_test.go +++ b/internal/e2e/rc_e2e_test.go @@ -1,9 +1,15 @@ package e2e import ( + "archive/tar" + "bytes" + "compress/gzip" "context" "encoding/base64" + "encoding/json" "errors" + "fmt" + "io" "net/http" "net/netip" "os" @@ -192,6 +198,7 @@ func TestRCE2EProviderIngestToSignedDelivery(t *testing.T) { } otherTenant := authz.Actor{ID: "usr_other", TenantID: actor.TenantID + "_other", Role: authz.RoleOwner, Scopes: []string{"*"}} + createRCActorMembership(t, ctx, store, otherTenant) if _, err := control.GetEvent(ctx, otherTenant, result.EventID); !errors.Is(err, app.ErrNotFound) { t.Fatalf("expected wrong-tenant event read to be hidden as not found, got %v", err) } @@ -330,7 +337,7 @@ func TestRCE2ERetryExhaustionDLQReleaseAndReplayModes(t *testing.T) { t.Fatalf("expected dead-lettered delivery for event %s: %+v", result.EventID, deliveries) } - released, err := control.ReleaseDeadLetter(ctx, actor, dlqEntryID, app.DeadLetterReleaseRequest{Reason: "RC release drill"}) + released, err := control.ReleaseDeadLetter(ctx, actor, dlqEntryID, app.DeadLetterReleaseRequest{ReasonCode: app.ReplayReasonTestDrill, Reason: "RC release drill"}) if err != nil { t.Fatalf("release dead letter: %v", err) } @@ -382,11 +389,11 @@ func TestRCE2ERetryExhaustionDLQReleaseAndReplayModes(t *testing.T) { t.Fatalf("expected initial replay source delivery, got %d", len(successClient.calls)) } - currentReplay, err := control.CreateReplay(ctx, actor, app.ReplayRequest{EventID: replayIngest.EventID, Reason: "RC current replay", ConfigMode: app.ReplayConfigCurrent}) + currentReplay, err := control.CreateReplay(ctx, actor, app.ReplayRequest{EventID: replayIngest.EventID, ReasonCode: app.ReplayReasonTestDrill, Reason: "RC current replay", ConfigMode: app.ReplayConfigCurrent}) if err != nil { t.Fatalf("create current-config replay: %v", err) } - originalReplay, err := control.CreateReplay(ctx, actor, app.ReplayRequest{EventID: replayIngest.EventID, Reason: "RC original replay", ConfigMode: app.ReplayConfigOriginal}) + originalReplay, err := control.CreateReplay(ctx, actor, app.ReplayRequest{EventID: replayIngest.EventID, ReasonCode: app.ReplayReasonTestDrill, Reason: "RC original replay", ConfigMode: app.ReplayConfigOriginal}) if err != nil { t.Fatalf("create original-config replay: %v", err) } @@ -450,7 +457,8 @@ func TestRCE2EEvidenceLifecycleRetentionExportAndPermissionGates(t *testing.T) { }, "rc-evidence-"+testSuffix(t)) reader := authz.Actor{ID: "usr_reader_" + testSuffix(t), TenantID: actor.TenantID, Role: authz.RoleSupport} - if _, err := control.GetRawPayload(ctx, reader, result.EventID); !errors.Is(err, app.ErrForbidden) { + createRCActorMembership(t, ctx, store, reader) + if _, err := control.GetRawPayload(ctx, reader, result.EventID, "permission regression"); !errors.Is(err, app.ErrForbidden) { t.Fatalf("expected raw payload read without events:raw to be forbidden, got %v", err) } if _, err := control.GetNormalizedEvent(ctx, reader, result.EventID, true); !errors.Is(err, app.ErrForbidden) { @@ -463,7 +471,7 @@ func TestRCE2EEvidenceLifecycleRetentionExportAndPermissionGates(t *testing.T) { if len(metadataOnly.Data) != 0 || metadataOnly.EnvelopeSHA256 == "" || metadataOnly.MetadataSHA256 == "" { t.Fatalf("expected metadata-only normalized event with hashes and no data body: %+v", metadataOnly) } - raw, err := control.GetRawPayload(ctx, actor, result.EventID) + raw, err := control.GetRawPayload(ctx, actor, result.EventID, "verify raw payload evidence") if err != nil { t.Fatalf("owner raw payload read before retention: %v", err) } @@ -472,6 +480,7 @@ func TestRCE2EEvidenceLifecycleRetentionExportAndPermissionGates(t *testing.T) { } auditOnly := authz.Actor{ID: "usr_audit_" + testSuffix(t), TenantID: actor.TenantID, Role: authz.RoleAuditor, Scopes: []string{"audit:read"}} + createRCActorMembership(t, ctx, store, auditOnly) if _, err := control.CreateAuditExport(ctx, auditOnly, app.CreateAuditExportRequest{IncludePayloadBodies: true, Reason: "forbidden payload export"}); !errors.Is(err, app.ErrForbidden) { t.Fatalf("expected payload-inclusive export without events:raw to be forbidden, got %v", err) } @@ -485,7 +494,7 @@ func TestRCE2EEvidenceLifecycleRetentionExportAndPermissionGates(t *testing.T) { if err := store.ApplyRetentionPolicies(ctx, "rc-retention-"+testSuffix(t), 20); err != nil { t.Fatalf("apply retention policies: %v", err) } - if _, err := control.GetRawPayload(ctx, actor, result.EventID); !errors.Is(err, app.ErrGone) { + if _, err := control.GetRawPayload(ctx, actor, result.EventID, "verify retention tombstone"); !errors.Is(err, app.ErrGone) { t.Fatalf("expected retained raw payload body to be gone, got %v", err) } retainedMetadata, err := control.GetNormalizedEvent(ctx, reader, result.EventID, false) @@ -591,6 +600,177 @@ func TestRCE2EStorageFailureDrillsRejectInboundSuccess(t *testing.T) { } } +func TestRCE2EFailedPaymentWebhookIncidentPacketDemo(t *testing.T) { + outputDir := os.Getenv("WEBHOOKERY_DEMO_OUTPUT_DIR") + if outputDir == "" { + t.Skip("WEBHOOKERY_DEMO_OUTPUT_DIR is required to write demo artifacts") + } + ctx, store, actor := openRCStore(t) + defer store.Close() + + now := time.Date(2026, 5, 26, 16, 0, 0, 0, time.UTC) + control := app.NewControlService(store, ssrf.Validator{Resolver: rcResolver}) + retryPolicy, err := control.CreateRetryPolicy(ctx, actor, app.CreateRetryPolicyRequest{ + Name: "Demo single attempt", + MaxAttempts: 1, + MaxDurationSeconds: 60, + InitialDelaySeconds: 1, + MaxDelaySeconds: 1, + State: domain.StateActive, + }) + if err != nil { + t.Fatalf("create demo retry policy: %v", err) + } + source, endpoint := createRCRouteWithOptions(t, ctx, control, actor, "stripe", "stripe", "invoice.paid", rcRouteOptions{RetryPolicyID: retryPolicy.ID}) + body := []byte(`{"id":"evt_demo_payment_` + testSuffix(t) + `","type":"invoice.paid","account":"acct_demo","data":{"object":{"id":"in_demo","customer":"cus_demo"}}}`) + + ingest := app.NewIngestService(store, fixedClock{now: now}) + result, err := ingest.Ingest(ctx, app.IngestRequest{ + TenantID: actor.TenantID, + SourceID: source.ID, + Provider: "stripe", + RawBody: body, + Headers: []domain.HeaderPair{{Name: "Stripe-Signature", Value: verifier.TimestampedHeader("v1", now, []byte("whsec_rc"), body)}}, + ContentType: "application/json", + RemoteIP: "198.51.100.20", + }) + if err != nil { + t.Fatalf("ingest demo payment event: %v", err) + } + if !result.Accepted { + t.Fatalf("expected demo event to be accepted after durable capture: %+v", result) + } + + failingDelivery := &recordingDeliveryClient{ + t: t, + now: now.Add(time.Second), + result: worker.DeliveryResult{ + StatusCode: http.StatusInternalServerError, + ResponseBody: []byte("demo receiver is down"), + FailureClass: "temporary_http", + }, + } + runWorkerOnce(t, ctx, store, failingDelivery, "demo-fail-"+testSuffix(t)) + if len(failingDelivery.calls) != 1 { + t.Fatalf("expected one failed downstream delivery, got %d", len(failingDelivery.calls)) + } + deadLetters, err := control.ListDeadLetter(ctx, actor, 20) + if err != nil { + t.Fatalf("list demo dead letters: %v", err) + } + dlqEntryID := findMapID(deadLetters, "event_id", result.EventID) + if dlqEntryID == "" { + t.Fatalf("expected demo event %s to enter DLQ: %+v", result.EventID, deadLetters) + } + + incident, err := control.CreateIncident(ctx, actor, app.CreateIncidentRequest{ + Title: "Stripe payment webhook failed", + Reason: "local demo failure/replay investigation", + }) + if err != nil { + t.Fatalf("create demo incident: %v", err) + } + if _, err := control.AddIncidentEvent(ctx, actor, incident.ID, app.AddIncidentEventRequest{ + EventID: result.EventID, + Reason: "attach failed payment webhook evidence", + }); err != nil { + t.Fatalf("attach demo event to incident: %v", err) + } + + replayJob, err := control.ReleaseDeadLetter(ctx, actor, dlqEntryID, app.DeadLetterReleaseRequest{ReasonCode: app.ReplayReasonReceiverFixed, Reason: "receiver fixed during local evidence demo"}) + if err != nil { + t.Fatalf("release demo DLQ entry: %v", err) + } + recoveredName := "Demo receiver recovered" + if _, _, err := control.UpdateEndpoint(ctx, actor, endpoint.ID, app.UpdateEndpointRequest{Name: &recoveredName, Reason: "receiver fixed during local evidence demo"}); err != nil { + t.Fatalf("record demo endpoint recovery: %v", err) + } + resetDemoEndpointCircuit(t, ctx, actor.TenantID, endpoint.ID) + successDelivery := &recordingDeliveryClient{ + t: t, + now: now.Add(2 * time.Second), + result: worker.DeliveryResult{ + StatusCode: http.StatusAccepted, + ResponseBody: []byte("ok"), + FailureClass: "success", + }, + } + runWorkerUntilDeliveryCalls(t, ctx, store, successDelivery, "demo-replay-"+testSuffix(t), 1, 3) + if len(successDelivery.calls) == 0 { + t.Fatalf("expected at least one successful replay delivery, got %d", len(successDelivery.calls)) + } + deadLetters, err = control.ListDeadLetter(ctx, actor, 20) + if err != nil { + t.Fatalf("list released demo dead letters: %v", err) + } + if !containsMapState(deadLetters, dlqEntryID, "released") { + t.Fatalf("expected demo DLQ entry to be released: %+v", deadLetters) + } + + report, err := control.GenerateIncidentReport(ctx, actor, incident.ID, app.IncidentReportRequest{Reason: "generate local demo incident packet"}) + if err != nil { + t.Fatalf("generate demo incident report: %v", err) + } + assertDemoReportSections(t, report.Markdown) + _, export, err := control.CreateIncidentEvidenceExport(ctx, actor, incident.ID, app.CreateIncidentEvidenceExportRequest{Reason: "export local demo incident packet"}) + if err != nil { + t.Fatalf("create demo incident evidence export: %v", err) + } + download, err := control.DownloadAuditExport(ctx, actor, export.ID) + if err != nil { + t.Fatalf("download demo incident evidence export: %v", err) + } + verification, err := evidence.VerifyTarGzipBundle(download.Body) + if err != nil { + t.Fatalf("verify demo evidence bundle: %v", err) + } + if !verification.Valid || verification.CheckedFiles == 0 { + t.Fatalf("expected valid demo evidence bundle: %+v", verification) + } + writeDemoPacketOutput(t, outputDir, actor.TenantID, incident, result.EventID, dlqEntryID, replayJob, report, download, verification) +} + +func TestRCE2EObjectReadFailuresAreRedacted(t *testing.T) { + blob := &flakyBlobStore{} + ctx, store, actor := openRCStoreWithOptions(t, postgres.StoreOptions{ + RawStorageMode: domain.RawStorageS3, + ObjectStore: blob, + ObjectBucket: "bucket-secret", + }) + defer store.Close() + + now := time.Date(2026, 5, 26, 18, 0, 0, 0, time.UTC) + control := app.NewControlService(store, ssrf.Validator{Resolver: rcResolver}) + source, _ := createRCRoute(t, ctx, control, actor, "stripe", "stripe", "invoice.object_read") + body := []byte(`{"id":"evt_rc_object_read_` + testSuffix(t) + `","type":"invoice.object_read","data":{"object":{"id":"in_read"}}}`) + result, err := app.NewIngestService(store, fixedClock{now: now}).Ingest(ctx, app.IngestRequest{ + TenantID: actor.TenantID, + SourceID: source.ID, + Provider: "stripe", + RawBody: body, + Headers: []domain.HeaderPair{{Name: "Stripe-Signature", Value: verifier.TimestampedHeader("v1", now, []byte("whsec_rc"), body)}}, + ContentType: "application/json", + RemoteIP: "198.51.100.10", + }) + if err != nil { + t.Fatalf("ingest object-backed event: %v", err) + } + blob.getErr = errors.New("backend timeout for bucket-secret/raw-payloads with raw body " + string(body)) + if _, err := control.GetRawPayload(ctx, actor, result.EventID, "object read failure drill"); err == nil || leaksSensitiveValue(err.Error(), "bucket-secret", string(body), "raw-payloads") { + t.Fatalf("object raw read failure must be redacted, got %v", err) + } + + blob.getErr = nil + export, err := control.CreateAuditExport(ctx, actor, app.CreateAuditExportRequest{IncludeTimelines: true, Reason: "object read failure drill"}) + if err != nil { + t.Fatalf("create object-backed audit export: %v", err) + } + blob.getErr = errors.New("backend timeout for bucket-secret/evidence-export object key") + if _, err := control.DownloadAuditExport(ctx, actor, export.ID); err == nil || leaksSensitiveValue(err.Error(), "bucket-secret", "evidence-export") { + t.Fatalf("object export read failure must be redacted, got %v", err) + } +} + func TestRCRestoreDrill(t *testing.T) { sourceDatabaseURL := os.Getenv("WEBHOOKERY_TEST_DATABASE_URL") restoreDatabaseURL := os.Getenv("WEBHOOKERY_RESTORE_DRILL_DATABASE_URL") @@ -733,6 +913,31 @@ func openRCStoreWithOptions(t *testing.T, opts postgres.StoreOptions) (context.C return ctx, store, actor } +func createRCActorMembership(t *testing.T, ctx context.Context, store *postgres.Store, actor authz.Actor) { + t.Helper() + suffix := testSuffix(t) + "_" + strings.ReplaceAll(actor.ID, "-", "_") + scopes := actor.Scopes + if scopes == nil { + scopes = []string{} + } + if _, err := store.CreateAPIKey(ctx, app.APIKeyCreateInput{ + Key: domain.APIKey{ + TenantID: actor.TenantID, + UserID: actor.ID, + Name: "RC E2E actor " + actor.ID, + Prefix: "rc-e2e", + Last4: "test", + Hash: app.HashToken("rc-e2e-" + suffix), + Scopes: scopes, + State: domain.StateActive, + }, + Role: actor.Role, + ActorID: actor.ID, + }); err != nil { + t.Fatalf("create RC E2E actor membership for %s: %v", actor.ID, err) + } +} + func clearPriorRCE2EWork(t *testing.T, ctx context.Context, databaseURL string) { t.Helper() pool, err := pgxpool.New(ctx, databaseURL) @@ -801,6 +1006,35 @@ func (s *failingBlobStore) Delete(context.Context, string, string) error { return nil } +type flakyBlobStore struct { + objects map[string][]byte + getErr error +} + +func (s *flakyBlobStore) Put(_ context.Context, object blobstore.Object, body []byte) error { + if s.objects == nil { + s.objects = map[string][]byte{} + } + s.objects[object.Bucket+"/"+object.Key] = append([]byte(nil), body...) + return nil +} + +func (s *flakyBlobStore) Get(_ context.Context, bucket, key string) ([]byte, error) { + if s.getErr != nil { + return nil, s.getErr + } + body, ok := s.objects[bucket+"/"+key] + if !ok { + return nil, blobstore.ErrNotFound + } + return append([]byte(nil), body...), nil +} + +func (s *flakyBlobStore) Delete(_ context.Context, bucket, key string) error { + delete(s.objects, bucket+"/"+key) + return nil +} + func createRCRoute(t *testing.T, ctx context.Context, control *app.ControlService, actor authz.Actor, providerName, adapterName, eventType string) (domain.Source, domain.Endpoint) { t.Helper() return createRCRouteWithOptions(t, ctx, control, actor, providerName, adapterName, eventType, rcRouteOptions{}) @@ -848,9 +1082,12 @@ func createRCRouteWithOptions(t *testing.T, ctx context.Context, control *app.Co func runWorkerOnce(t *testing.T, ctx context.Context, store *postgres.Store, delivery worker.DeliveryClient, workerID string) { t.Helper() + fanout := app.NewDeliveryFanoutService(store, fixedClock{now: time.Now().UTC()}) + reconciliation := app.NewReconciliationService(store, nil) + processor := app.NewOutboxProcessorService(fanout, reconciliation) err := (worker.Worker{ Store: store, - Processor: store, + Processor: processor, DeliveryStore: store, DeliveryClient: delivery, WorkerID: workerID, @@ -861,7 +1098,34 @@ func runWorkerOnce(t *testing.T, ctx context.Context, store *postgres.Store, del } } -func assertTimelineKinds(t *testing.T, timeline []map[string]any, expected ...string) { +func runWorkerUntilDeliveryCalls(t *testing.T, ctx context.Context, store *postgres.Store, delivery *recordingDeliveryClient, workerID string, wantCalls, maxRuns int) { + t.Helper() + for i := 0; i < maxRuns && len(delivery.calls) < wantCalls; i++ { + runWorkerOnce(t, ctx, store, delivery, fmt.Sprintf("%s-%d", workerID, i+1)) + } +} + +func resetDemoEndpointCircuit(t *testing.T, ctx context.Context, tenantID, endpointID string) { + t.Helper() + databaseURL := os.Getenv("WEBHOOKERY_TEST_DATABASE_URL") + if databaseURL == "" { + t.Fatal("WEBHOOKERY_TEST_DATABASE_URL is required to reset demo endpoint circuit") + } + pool, err := pgxpool.New(ctx, databaseURL) + if err != nil { + t.Fatalf("open demo endpoint reset pool: %v", err) + } + defer pool.Close() + tag, err := pool.Exec(ctx, `UPDATE endpoints SET circuit_state='closed', failure_count=0, disabled_until=NULL WHERE tenant_id=$1 AND id=$2`, tenantID, endpointID) + if err != nil { + t.Fatalf("reset demo endpoint circuit: %v", err) + } + if tag.RowsAffected() != 1 { + t.Fatalf("expected to reset one demo endpoint circuit, reset %d", tag.RowsAffected()) + } +} + +func assertTimelineKinds(t *testing.T, timeline []app.EventTimelineEntry, expected ...string) { t.Helper() for _, kind := range expected { if !containsKind(timeline, kind) { @@ -870,9 +1134,9 @@ func assertTimelineKinds(t *testing.T, timeline []map[string]any, expected ...st } } -func containsKind(items []map[string]any, kind string) bool { +func containsKind(items []app.EventTimelineEntry, kind string) bool { for _, item := range items { - if item["kind"] == kind { + if item.Kind == kind { return true } } @@ -957,6 +1221,235 @@ func leaksSensitiveValue(text string, values ...string) bool { return false } +func assertDemoReportSections(t *testing.T, markdown string) { + t.Helper() + for _, section := range []string{ + "## 1. Summary", + "## 2. Event Identity", + "## 3. Provider Verification", + "## 4. Raw Capture Evidence", + "## 5. Route And Configuration Snapshot", + "## 6. Delivery Attempt Timeline", + "## 7. Retry And DLQ State", + "## 8. Replay History", + "## 9. Retention And Raw-Payload Access State", + "## 10. Audit-Chain Proof References", + "## 11. Known Gaps And Non-Claims", + } { + if !strings.Contains(markdown, section) { + t.Fatalf("demo incident report missing section %q", section) + } + } + for _, text := range []string{ + "Inbound capture does not prove downstream business success.", + "does not claim exactly-once delivery", + "whcp audit verify-bundle --file evidence.tar.gz", + } { + if !strings.Contains(markdown, text) { + t.Fatalf("demo incident report missing non-claim or verification text %q", text) + } + } +} + +func writeDemoPacketOutput(t *testing.T, outputDir, tenantID string, incident domain.Incident, eventID, dlqEntryID string, replayJob app.ReplayJob, report domain.IncidentReportSnapshot, download app.EvidenceExportDownload, verification evidence.BundleVerification) { + t.Helper() + if err := os.MkdirAll(outputDir, 0o700); err != nil { // #nosec G703 -- WEBHOOKERY_DEMO_OUTPUT_DIR is an explicit local test artifact directory. + t.Fatalf("create demo output directory: %v", err) + } + manifestBytes := readDemoBundleFile(t, download.Body, "manifest.json") + var manifest evidence.Manifest + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + t.Fatalf("decode demo evidence manifest: %v", err) + } + writeDemoBytes(t, filepath.Join(outputDir, "incident-report.md"), []byte(report.Markdown)) + writeDemoJSON(t, filepath.Join(outputDir, "incident-report.json"), sanitizedDemoReportJSON(t, report.Report, tenantID)) + writeDemoJSON(t, filepath.Join(outputDir, "evidence-manifest.json"), map[string]any{ + "schema_version": "webhookery.demo_evidence_manifest.v1", + "source_manifest_sha256": evidence.SHA256(manifestBytes), + "bundle_sha256": download.Export.SHA256, + "export_id": manifest.ExportID, + "tenant_id_hash": manifest.TenantIDHash, + "generated_at": manifest.GeneratedAt, + "included_events": manifest.IncludedEvents, + "included_incidents": manifest.IncludedIncidents, + "include_raw_payloads": manifest.IncludeRawPayloads, + "include_payload_bodies": manifest.IncludePayloadBodies, + "include_timelines": manifest.IncludeTimelines, + "files": manifest.Files, + "redaction_policy": "raw payload bodies, webhook secrets, provider signatures, bearer tokens, private keys, and tenant identifiers are omitted or hashed in demo-visible files", + "local_verification_file": "verify-output.json", + "non_claims": []string{ + "Inbound capture does not prove downstream business success.", + "Webhookery records at-least-once delivery evidence and does not claim exactly-once delivery.", + "The local demo does not prove provider-side completeness or provider certification.", + }, + }) + writeDemoJSON(t, filepath.Join(outputDir, "verify-output.json"), map[string]any{ + "schema_version": "webhookery.demo_verify_output.v1", + "command": "whcp audit verify-bundle --file evidence.tar.gz", + "bundle_sha256": download.Export.SHA256, + "result": verification, + }) + writeDemoBytes(t, filepath.Join(outputDir, "evidence.tar.gz"), download.Body) + writeDemoBytes(t, filepath.Join(outputDir, "README.md"), []byte(demoPacketREADME(incident, eventID, dlqEntryID, replayJob, download.Export))) + assertDemoPacketOutputRedacted(t, outputDir) +} + +func sanitizedDemoReportJSON(t *testing.T, raw json.RawMessage, tenantID string) any { + t.Helper() + var value any + if err := json.Unmarshal(raw, &value); err != nil { + t.Fatalf("decode demo incident report: %v", err) + } + return redactDemoValue(value, tenantID) +} + +func redactDemoValue(value any, tenantID string) any { + switch v := value.(type) { + case map[string]any: + out := make(map[string]any, len(v)) + for key, item := range v { + lower := strings.ToLower(key) + switch { + case lower == "tenant_id": + out["tenant_id_hash"] = domain.HashSHA256([]byte(fmt.Sprint(item))) + case lower == "raw_body": + out[key] = "omitted" + case lower == "body" || strings.Contains(lower, "secret") || strings.Contains(lower, "token") || strings.Contains(lower, "authorization") || strings.Contains(lower, "signature_header") || strings.Contains(lower, "signature_value"): + out[key] = "[redacted]" + default: + out[key] = redactDemoValue(item, tenantID) + } + } + return out + case []any: + out := make([]any, 0, len(v)) + for _, item := range v { + out = append(out, redactDemoValue(item, tenantID)) + } + return out + case string: + if v == tenantID { + return domain.HashSHA256([]byte(v)) + } + return v + default: + return value + } +} + +func readDemoBundleFile(t *testing.T, body []byte, name string) []byte { + t.Helper() + gz, err := gzip.NewReader(bytes.NewReader(body)) + if err != nil { + t.Fatalf("open demo evidence bundle: %v", err) + } + defer func() { _ = gz.Close() }() + tr := tar.NewReader(gz) + for { + header, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("read demo evidence bundle: %v", err) + } + if header.Name != name { + continue + } + data, err := io.ReadAll(tr) + if err != nil { + t.Fatalf("read demo bundle file %s: %v", name, err) + } + return data + } + t.Fatalf("demo evidence bundle missing %s", name) + return nil +} + +func writeDemoJSON(t *testing.T, path string, value any) { + t.Helper() + body, err := json.MarshalIndent(value, "", " ") + if err != nil { + t.Fatalf("marshal demo output %s: %v", path, err) + } + body = append(body, '\n') + writeDemoBytes(t, path, body) +} + +func writeDemoBytes(t *testing.T, path string, body []byte) { + t.Helper() + if err := os.WriteFile(path, body, 0o600); err != nil { // #nosec G703 -- demo test writes only expected artifact paths under WEBHOOKERY_DEMO_OUTPUT_DIR. + t.Fatalf("write demo output %s: %v", path, err) + } +} + +func demoPacketREADME(incident domain.Incident, eventID, dlqEntryID string, replayJob app.ReplayJob, export domain.EvidenceExport) string { + return fmt.Sprintf(`# Failed Payment Webhook Evidence Packet + +This folder was generated by `+"`examples/webhook-evidence-demo/run.sh`"+`. + +## Scenario + +1. A synthetic Stripe-style `+"`invoice.paid`"+` webhook was signed with a local test secret and ingested by Webhookery. +2. Webhookery durably captured the event before returning success. +3. The downstream receiver returned HTTP 500, so the delivery attempt failed and the event entered DLQ. +4. The operator released the DLQ entry after the receiver was fixed. +5. Replay created new delivery work linked to the original event, and replay delivery succeeded. +6. Webhookery generated an incident report and a local evidence bundle. + +## Local IDs + +- Incident: `+"`%s`"+` +- Event: `+"`%s`"+` +- DLQ entry: `+"`%s`"+` +- Replay job: `+"`%s`"+` +- Evidence export: `+"`%s`"+` + +These IDs are local disposable demo identifiers. Tenant identifiers are hashed in human-visible demo files. + +## Files + +- `+"`incident-report.md`"+`: support/SRE-readable report. +- `+"`incident-report.json`"+`: sanitized JSON report. +- `+"`evidence-manifest.json`"+`: sanitized manifest summary for the generated bundle. +- `+"`verify-output.json`"+`: local bundle verification result. A successful demo has `+"`result.valid: true`"+`. +- `+"`evidence.tar.gz`"+`: generated local evidence bundle used for verification. + +## What This Proves + +- Inbound success followed durable capture in the local PostgreSQL-backed test path. +- The downstream failure, DLQ transition, replay, and successful replay delivery are visible as evidence. +- The evidence bundle hash and included file hashes verify locally. + +## What This Does Not Prove + +- It does not prove provider-side completeness or Stripe certification. +- It does not prove downstream business success. +- It does not claim exactly-once delivery or global ordering. +- It does not replace restore drills, deployment review, or live-provider proof. + +## Safety + +The demo output omits raw payload bodies, webhook secrets, provider signature headers, bearer tokens, private keys, and database URLs. Do not replace the synthetic fixture with real customer data for screenshots, issues, support packets, or launch materials. +`, incident.ID, eventID, dlqEntryID, replayJob.ID, export.ID) +} + +func assertDemoPacketOutputRedacted(t *testing.T, outputDir string) { + t.Helper() + for _, name := range []string{"incident-report.md", "incident-report.json", "evidence-manifest.json", "verify-output.json", "README.md"} { + body, err := os.ReadFile(filepath.Join(outputDir, name)) // #nosec G304,G703 -- test reads files it just wrote under an explicit demo output directory. + if err != nil { + t.Fatalf("read demo output %s: %v", name, err) + } + for _, marker := range []string{"whsec_rc", "acct_demo", "cus_demo", "in_demo", "Stripe-Signature", "v1="} { + if strings.Contains(string(body), marker) { + t.Fatalf("demo output %s leaked sensitive marker %q", name, marker) + } + } + } +} + func testSuffix(t *testing.T) string { name := strings.NewReplacer("/", "_", " ", "_").Replace(t.Name()) return name + "_" + strings.ReplaceAll(time.Now().UTC().Format("20060102150405.000000000"), ".", "") diff --git a/internal/e2e/rc_restore_contract_static_test.go b/internal/e2e/rc_restore_contract_static_test.go new file mode 100644 index 0000000..ed56c55 --- /dev/null +++ b/internal/e2e/rc_restore_contract_static_test.go @@ -0,0 +1,44 @@ +package e2e + +import ( + "os" + "strings" + "testing" +) + +func TestRCRestoreDrillPreservesEvidenceAuthorityChecks(t *testing.T) { + rcBody, err := os.ReadFile("rc_e2e_test.go") + if err != nil { + t.Fatal(err) + } + rcTest := string(rcBody) + required := map[string]string{ + "restore before migration": "runRestoreCommand(t, drillCtx, restoreDatabaseURL, dumpFile)", + "migrate restored database": "postgres.MigrateUp(drillCtx, restoreDatabaseURL", + "read restored event evidence": "restoredControl.GetEvent(drillCtx, actor, result.EventID)", + "download restored evidence export": "restoredControl.DownloadAuditExport(drillCtx, actor, export.ID)", + "verify restored evidence export bundle": "evidence.VerifyTarGzipBundle(download.Body)", + "prove audit chain entries in bundle": "verification.CheckedChainEntries == 0", + "verify restored audit chain": "restoredControl.VerifyAuditChain(drillCtx, actor, app.AuditChainVerifyRequest{})", + "compare restored audit chain hash": "after.EndChainHash != before.EndChainHash", + } + for name, want := range required { + if !strings.Contains(rcTest, want) { + t.Fatalf("RC restore drill no longer proves %s with %q", name, want) + } + } + + rcScript, err := os.ReadFile("../../scripts/rc_acceptance.sh") + if err != nil { + t.Fatal(err) + } + script := string(rcScript) + for _, want := range []string{ + "WEBHOOKERY_RC_RESTORE_DATABASE_URL", + "WEBHOOKERY_RESTORE_DRILL_DATABASE_URL=\"$WEBHOOKERY_RC_RESTORE_DATABASE_URL\" go test ./internal/e2e -run TestRCRestoreDrill -count=1", + } { + if !strings.Contains(script, want) { + t.Fatalf("rc-check no longer wires the restore drill through %q", want) + } + } +} diff --git a/internal/evidence/bundle.go b/internal/evidence/bundle.go index 983e3f9..b0b99f3 100644 --- a/internal/evidence/bundle.go +++ b/internal/evidence/bundle.go @@ -18,17 +18,36 @@ import ( "webhookery/internal/domain" ) +const ManifestSchemaV1 = "webhookery.evidence_bundle.v1" +const BundleViewSchemaV1 = "webhookery.evidence_bundle_view.v1" + type Manifest struct { - ExportID string `json:"export_id"` - TenantID string `json:"tenant_id"` - CreatedAt time.Time `json:"created_at"` - From time.Time `json:"from,omitempty"` - To time.Time `json:"to,omitempty"` - IncludeRawPayloads bool `json:"include_raw_payloads"` - IncludeTimelines bool `json:"include_timelines"` - IncludePayloadBodies bool `json:"include_payload_bodies"` - AuditChain *AuditChain `json:"audit_chain,omitempty"` - Files []ManifestFile `json:"files"` + SchemaVersion string `json:"schema_version"` + GeneratedAt time.Time `json:"generated_at"` + TenantIDHash string `json:"tenant_id_hash"` + BundleID string `json:"bundle_id"` + ExportID string `json:"export_id,omitempty"` + TenantID string `json:"-"` + CreatedAt time.Time `json:"-"` + From *time.Time `json:"from,omitempty"` + To *time.Time `json:"to,omitempty"` + IncludedEvents []string `json:"included_events"` + IncludedIncidents []string `json:"included_incidents"` + Hashes map[string]string `json:"hashes"` + IncludeRawPayloads bool `json:"include_raw_payloads"` + IncludeTimelines bool `json:"include_timelines"` + IncludePayloadBodies bool `json:"include_payload_bodies"` + RedactionPolicy ManifestRedactionPolicy `json:"redaction_policy"` + AuditChain *AuditChain `json:"audit_chain,omitempty"` + NonClaims []string `json:"non_claims"` + Files []ManifestFile `json:"files"` +} + +type ManifestRedactionPolicy struct { + TenantIdentifiers string `json:"tenant_identifiers"` + Secrets string `json:"secrets"` + RawPayloadBodies string `json:"raw_payload_bodies"` + PayloadBodies string `json:"payload_bodies"` } type AuditChain struct { @@ -69,6 +88,28 @@ type BundleVerification struct { Failures []string `json:"failures"` } +type BundleView struct { + SchemaVersion string `json:"schema_version"` + Manifest Manifest `json:"manifest"` + Summary BundleViewSummary `json:"summary"` + Verification BundleVerification `json:"verification"` + Warnings []string `json:"warnings,omitempty"` +} + +type BundleViewSummary struct { + FileCount int `json:"file_count"` + TotalSizeBytes int64 `json:"total_size_bytes"` + IncludedEventCount int `json:"included_event_count"` + IncludedIncidentCount int `json:"included_incident_count"` + TimelineEntryCount int `json:"timeline_entry_count"` + TimelineKinds map[string]int `json:"timeline_kinds,omitempty"` + AuditEventCount int `json:"audit_event_count"` + HasIncidentReportJSON bool `json:"has_incident_report_json"` + HasIncidentReportMarkdown bool `json:"has_incident_report_markdown"` + HasAuditChainProof bool `json:"has_audit_chain_proof"` + AuditChainStatus string `json:"audit_chain_status"` +} + func JSONLines(items []any) ([]byte, error) { var out bytes.Buffer enc := json.NewEncoder(&out) @@ -96,6 +137,7 @@ func BuildTarGzipBundle(manifest Manifest, files map[string][]byte) (Bundle, err SizeBytes: int64(len(file)), }) } + manifest = normalizeManifest(manifest) manifestBytes, err := json.MarshalIndent(manifest, "", " ") if err != nil { @@ -135,11 +177,43 @@ func BuildTarGzipBundle(manifest Manifest, files map[string][]byte) (Bundle, err } func VerifyTarGzipBundle(body []byte) (BundleVerification, error) { - result := BundleVerification{Valid: true} files, err := readTarGzipFiles(body) if err != nil { return BundleVerification{}, err } + return verifyTarGzipFiles(files) +} + +func InspectTarGzipBundle(body []byte) (BundleView, error) { + view := BundleView{SchemaVersion: BundleViewSchemaV1} + files, err := readTarGzipFiles(body) + if err != nil { + return BundleView{}, err + } + verification, err := verifyTarGzipFiles(files) + if err != nil { + return BundleView{}, err + } + view.Verification = verification + + manifestBytes, ok := files["manifest.json"] + if !ok { + view.Warnings = append(view.Warnings, "manifest.json is missing") + return view, nil + } + var manifest Manifest + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + return BundleView{}, err + } + manifest = cleanInspectedManifest(manifest) + view.Manifest = manifest + view.Summary = bundleViewSummary(manifest, files, verification) + view.Warnings = bundleViewWarnings(manifest, files, verification) + return view, nil +} + +func verifyTarGzipFiles(files map[string][]byte) (BundleVerification, error) { + result := BundleVerification{Valid: true} manifestBytes, ok := files["manifest.json"] if !ok { result.Valid = false @@ -151,6 +225,7 @@ func VerifyTarGzipBundle(body []byte) (BundleVerification, error) { if err := json.Unmarshal(manifestBytes, &manifest); err != nil { return BundleVerification{}, err } + validateManifest(manifest, &result) for _, file := range manifest.Files { body, ok := files[file.Name] if !ok { @@ -167,6 +242,13 @@ func VerifyTarGzipBundle(body []byte) (BundleVerification, error) { result.Valid = false result.Failures = append(result.Failures, "size mismatch: "+file.Name) } + if want, ok := manifest.Hashes[file.Name]; !ok { + result.Valid = false + result.Failures = append(result.Failures, "manifest hash missing: "+file.Name) + } else if want != file.SHA256 { + result.Valid = false + result.Failures = append(result.Failures, "manifest hash mismatch: "+file.Name) + } } if proof, ok := files["audit_chain_proof.jsonl"]; ok { failures, checked, err := verifyAuditChainProof(proof) @@ -182,11 +264,240 @@ func VerifyTarGzipBundle(body []byte) (BundleVerification, error) { return result, nil } +func bundleViewSummary(manifest Manifest, files map[string][]byte, verification BundleVerification) BundleViewSummary { + _, hasIncidentReportJSON := files["incident_report.json"] + _, hasIncidentReportMarkdown := files["incident_report.md"] + _, hasAuditChainProof := files["audit_chain_proof.jsonl"] + summary := BundleViewSummary{ + FileCount: len(manifest.Files), + IncludedEventCount: len(manifest.IncludedEvents), + IncludedIncidentCount: len(manifest.IncludedIncidents), + HasIncidentReportJSON: hasIncidentReportJSON, + HasIncidentReportMarkdown: hasIncidentReportMarkdown, + HasAuditChainProof: hasAuditChainProof, + AuditChainStatus: "not_included", + } + for _, file := range manifest.Files { + summary.TotalSizeBytes += file.SizeBytes + } + if raw, ok := files["timelines.jsonl"]; ok { + count, kinds, err := countTimelineJSONLines(raw) + if err == nil { + summary.TimelineEntryCount = count + summary.TimelineKinds = kinds + } + } + if raw, ok := files["audit_events.jsonl"]; ok { + count, err := countJSONLines(raw) + if err == nil { + summary.AuditEventCount = count + } + } + if summary.HasAuditChainProof { + summary.AuditChainStatus = "included" + if verification.CheckedChainEntries > 0 { + summary.AuditChainStatus = "verified" + } + for _, failure := range verification.Failures { + if strings.Contains(failure, "chain proof") { + summary.AuditChainStatus = "invalid" + break + } + } + } + return summary +} + +func bundleViewWarnings(manifest Manifest, files map[string][]byte, verification BundleVerification) []string { + var warnings []string + if manifest.IncludeRawPayloads { + warnings = append(warnings, "bundle manifest indicates raw payload bodies may be included; handle as sensitive") + } + if manifest.IncludePayloadBodies { + warnings = append(warnings, "bundle manifest indicates normalized or delivery payload bodies may be included; handle as sensitive") + } + if _, ok := files["timelines.jsonl"]; ok { + if _, _, err := countTimelineJSONLines(files["timelines.jsonl"]); err != nil { + warnings = append(warnings, "timelines.jsonl could not be summarized: "+err.Error()) + } + } + if _, ok := files["audit_events.jsonl"]; ok { + if _, err := countJSONLines(files["audit_events.jsonl"]); err != nil { + warnings = append(warnings, "audit_events.jsonl could not be summarized: "+err.Error()) + } + } + if !verification.Valid { + warnings = append(warnings, "bundle verification failed; inspect verification.failures") + } + if !manifest.IncludeRawPayloads && !manifest.IncludePayloadBodies { + warnings = append(warnings, "raw payload bodies and payload bodies are omitted according to the manifest") + } + return warnings +} + +func cleanInspectedManifest(manifest Manifest) Manifest { + if manifest.From != nil && manifest.From.IsZero() { + manifest.From = nil + } + if manifest.To != nil && manifest.To.IsZero() { + manifest.To = nil + } + return manifest +} + +func normalizeManifest(manifest Manifest) Manifest { + if manifest.SchemaVersion == "" { + manifest.SchemaVersion = ManifestSchemaV1 + } + if manifest.GeneratedAt.IsZero() { + manifest.GeneratedAt = manifest.CreatedAt + } + if manifest.GeneratedAt.IsZero() { + manifest.GeneratedAt = time.Now().UTC() + } else { + manifest.GeneratedAt = manifest.GeneratedAt.UTC() + } + if manifest.BundleID == "" { + manifest.BundleID = manifest.ExportID + } + if manifest.TenantIDHash == "" && manifest.TenantID != "" { + manifest.TenantIDHash = domain.HashSHA256([]byte(manifest.TenantID)) + } + if manifest.From != nil { + from := manifest.From.UTC() + manifest.From = &from + } + if manifest.To != nil { + to := manifest.To.UTC() + manifest.To = &to + } + manifest.IncludedEvents = normalizedStringSet(manifest.IncludedEvents) + manifest.IncludedIncidents = normalizedStringSet(manifest.IncludedIncidents) + manifest.Hashes = make(map[string]string, len(manifest.Files)) + for _, file := range manifest.Files { + manifest.Hashes[file.Name] = file.SHA256 + } + if manifest.RedactionPolicy == (ManifestRedactionPolicy{}) { + manifest.RedactionPolicy = defaultRedactionPolicy(manifest.IncludeRawPayloads, manifest.IncludePayloadBodies) + } + if len(manifest.NonClaims) == 0 { + manifest.NonClaims = DefaultNonClaims() + } + return manifest +} + +func validateManifest(manifest Manifest, result *BundleVerification) { + if manifest.SchemaVersion != ManifestSchemaV1 { + result.Valid = false + result.Failures = append(result.Failures, "unsupported manifest schema_version: "+manifest.SchemaVersion) + } + if manifest.BundleID == "" { + result.Valid = false + result.Failures = append(result.Failures, "bundle_id is missing") + } + if manifest.TenantIDHash == "" { + result.Valid = false + result.Failures = append(result.Failures, "tenant_id_hash is missing") + } + if len(manifest.NonClaims) == 0 { + result.Valid = false + result.Failures = append(result.Failures, "non_claims are missing") + } + if manifest.RedactionPolicy == (ManifestRedactionPolicy{}) { + result.Valid = false + result.Failures = append(result.Failures, "redaction_policy is missing") + } + if manifest.Hashes == nil { + result.Valid = false + result.Failures = append(result.Failures, "hashes are missing") + } +} + +func defaultRedactionPolicy(includeRawPayloads, includePayloadBodies bool) ManifestRedactionPolicy { + rawPayloadBodies := "omitted" + if includeRawPayloads { + rawPayloadBodies = "included only when explicitly requested with elevated raw-payload permission" + } + payloadBodies := "omitted" + if includePayloadBodies { + payloadBodies = "included only when explicitly requested with elevated raw-payload permission" + } + return ManifestRedactionPolicy{ + TenantIdentifiers: "tenant identifiers are represented by tenant_id_hash", + Secrets: "webhook secrets, provider signatures, bearer tokens, private keys, and credentials are excluded", + RawPayloadBodies: rawPayloadBodies, + PayloadBodies: payloadBodies, + } +} + +func DefaultNonClaims() []string { + return []string{ + "Inbound capture does not prove downstream business success.", + "Webhookery records at-least-once delivery evidence and does not claim exactly-once delivery.", + "The bundle proves Webhookery evidence observed locally; it does not prove provider-side completeness.", + "The bundle is not compliance certification, legal evidentiary certification, or managed-service availability evidence.", + } +} + +func normalizedStringSet(values []string) []string { + if len(values) == 0 { + return []string{} + } + seen := map[string]bool{} + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.TrimSpace(value) + if value == "" || seen[value] { + continue + } + seen[value] = true + out = append(out, value) + } + sort.Strings(out) + return out +} + func SHA256(data []byte) string { sum := sha256.Sum256(data) return "sha256:" + hex.EncodeToString(sum[:]) } +func countJSONLines(raw []byte) (int, error) { + dec := json.NewDecoder(bytes.NewReader(raw)) + count := 0 + for { + var item map[string]any + if err := dec.Decode(&item); err != nil { + if errors.Is(err, io.EOF) { + break + } + return count, err + } + count++ + } + return count, nil +} + +func countTimelineJSONLines(raw []byte) (int, map[string]int, error) { + dec := json.NewDecoder(bytes.NewReader(raw)) + kinds := map[string]int{} + count := 0 + for { + var item map[string]any + if err := dec.Decode(&item); err != nil { + if errors.Is(err, io.EOF) { + break + } + return count, kinds, err + } + count++ + if kind, ok := item["kind"].(string); ok && strings.TrimSpace(kind) != "" { + kinds[kind]++ + } + } + return count, kinds, nil +} + func readTarGzipFiles(body []byte) (map[string][]byte, error) { gz, err := gzip.NewReader(bytes.NewReader(body)) if err != nil { diff --git a/internal/evidence/bundle_test.go b/internal/evidence/bundle_test.go index 80d6402..da2247e 100644 --- a/internal/evidence/bundle_test.go +++ b/internal/evidence/bundle_test.go @@ -1,6 +1,12 @@ package evidence import ( + "archive/tar" + "bytes" + "compress/gzip" + "encoding/json" + "sort" + "strings" "testing" "time" @@ -92,6 +98,51 @@ func TestVerifyTarGzipBundleChecksManifestFiles(t *testing.T) { } } +func TestBuildTarGzipBundleWritesVersionedSanitizedManifest(t *testing.T) { + bundle, err := BuildTarGzipBundle(Manifest{ + ExportID: "exp_1", + TenantID: "ten_whsec_secret_marker", + CreatedAt: time.Unix(123, 0).UTC(), + IncludedEvents: []string{"evt_2", "evt_1", "evt_1"}, + IncludedIncidents: []string{"inc_1"}, + }, map[string][]byte{ + "audit_events.jsonl": []byte("{\"id\":\"aud_1\"}\n"), + }) + if err != nil { + t.Fatal(err) + } + files, err := readTarGzipFiles(bundle.Bytes) + if err != nil { + t.Fatal(err) + } + manifestBytes := files["manifest.json"] + if bytes.Contains(manifestBytes, []byte(`"tenant_id":`)) { + t.Fatalf("manifest leaked raw tenant id: %s", string(manifestBytes)) + } + if bytes.Contains(manifestBytes, []byte("ten_whsec_secret_marker")) || bytes.Contains(manifestBytes, []byte("whsec_secret_marker")) { + t.Fatalf("manifest leaked secret-shaped tenant id: %s", string(manifestBytes)) + } + if bytes.Contains(manifestBytes, []byte(`"from":`)) || bytes.Contains(manifestBytes, []byte(`"to":`)) { + t.Fatalf("manifest serialized empty time window: %s", string(manifestBytes)) + } + var manifest Manifest + if err := json.Unmarshal(manifestBytes, &manifest); err != nil { + t.Fatal(err) + } + if manifest.SchemaVersion != ManifestSchemaV1 { + t.Fatalf("unexpected schema version %q", manifest.SchemaVersion) + } + if manifest.BundleID != "exp_1" || manifest.TenantIDHash == "" || manifest.GeneratedAt.IsZero() { + t.Fatalf("manifest missing bundle id, tenant hash, or generated time: %+v", manifest) + } + if got := strings.Join(manifest.IncludedEvents, ","); got != "evt_1,evt_2" { + t.Fatalf("included events not normalized: %s", got) + } + if manifest.Hashes["audit_events.jsonl"] == "" || len(manifest.NonClaims) == 0 || manifest.RedactionPolicy.Secrets == "" { + t.Fatalf("manifest missing hashes, non-claims, or redaction policy: %+v", manifest) + } +} + func TestVerifyTarGzipBundleChecksAuditChainProof(t *testing.T) { bundle, err := BuildTarGzipBundle(Manifest{ExportID: "exp_1", TenantID: "ten_1", CreatedAt: time.Unix(123, 0).UTC()}, map[string][]byte{ "audit_events.jsonl": []byte("{\"id\":\"aud_1\"}\n"), @@ -108,3 +159,165 @@ func TestVerifyTarGzipBundleChecksAuditChainProof(t *testing.T) { t.Fatalf("unexpected chain verification result: %+v", result) } } + +func TestVerifyTarGzipBundleRejectsMissingSchemaVersion(t *testing.T) { + bundle, err := BuildTarGzipBundle(Manifest{ExportID: "exp_1", TenantID: "ten_1", CreatedAt: time.Unix(123, 0).UTC()}, map[string][]byte{ + "audit_events.jsonl": []byte("{\"id\":\"aud_1\"}\n"), + }) + if err != nil { + t.Fatal(err) + } + files, err := readTarGzipFiles(bundle.Bytes) + if err != nil { + t.Fatal(err) + } + var manifest map[string]any + if err := json.Unmarshal(files["manifest.json"], &manifest); err != nil { + t.Fatal(err) + } + delete(manifest, "schema_version") + files["manifest.json"], err = json.MarshalIndent(manifest, "", " ") + if err != nil { + t.Fatal(err) + } + files["manifest.json"] = append(files["manifest.json"], '\n') + + result, err := VerifyTarGzipBundle(tarGzipTestFiles(t, files)) + if err != nil { + t.Fatal(err) + } + if result.Valid { + t.Fatalf("expected missing schema version to be invalid: %+v", result) + } + if !hasFailure(result.Failures, "unsupported manifest schema_version") { + t.Fatalf("expected schema version failure, got %+v", result.Failures) + } +} + +func TestVerifyTarGzipBundleRejectsTamperedFile(t *testing.T) { + bundle, err := BuildTarGzipBundle(Manifest{ExportID: "exp_1", TenantID: "ten_1", CreatedAt: time.Unix(123, 0).UTC()}, map[string][]byte{ + "audit_events.jsonl": []byte("{\"id\":\"aud_1\"}\n"), + }) + if err != nil { + t.Fatal(err) + } + files, err := readTarGzipFiles(bundle.Bytes) + if err != nil { + t.Fatal(err) + } + files["audit_events.jsonl"] = []byte("{\"id\":\"aud_tampered\"}\n") + + result, err := VerifyTarGzipBundle(tarGzipTestFiles(t, files)) + if err != nil { + t.Fatal(err) + } + if result.Valid { + t.Fatalf("expected tampered bundle to be invalid: %+v", result) + } + if !hasFailure(result.Failures, "hash mismatch: audit_events.jsonl") { + t.Fatalf("expected audit_events hash mismatch, got %+v", result.Failures) + } +} + +func TestVerifyTarGzipBundleRejectsMissingManifestHash(t *testing.T) { + bundle, err := BuildTarGzipBundle(Manifest{ExportID: "exp_1", TenantID: "ten_1", CreatedAt: time.Unix(123, 0).UTC()}, map[string][]byte{ + "audit_events.jsonl": []byte("{\"id\":\"aud_1\"}\n"), + }) + if err != nil { + t.Fatal(err) + } + files, err := readTarGzipFiles(bundle.Bytes) + if err != nil { + t.Fatal(err) + } + var manifest map[string]any + if err := json.Unmarshal(files["manifest.json"], &manifest); err != nil { + t.Fatal(err) + } + manifest["hashes"] = map[string]string{} + files["manifest.json"], err = json.MarshalIndent(manifest, "", " ") + if err != nil { + t.Fatal(err) + } + files["manifest.json"] = append(files["manifest.json"], '\n') + + result, err := VerifyTarGzipBundle(tarGzipTestFiles(t, files)) + if err != nil { + t.Fatal(err) + } + if result.Valid { + t.Fatalf("expected missing manifest hash to be invalid: %+v", result) + } + if !hasFailure(result.Failures, "manifest hash missing: audit_events.jsonl") { + t.Fatalf("expected manifest hash missing failure, got %+v", result.Failures) + } +} + +func TestVerifyTarGzipBundleToleratesUnknownOptionalManifestField(t *testing.T) { + bundle, err := BuildTarGzipBundle(Manifest{ExportID: "exp_1", TenantID: "ten_1", CreatedAt: time.Unix(123, 0).UTC()}, map[string][]byte{ + "audit_events.jsonl": []byte("{\"id\":\"aud_1\"}\n"), + }) + if err != nil { + t.Fatal(err) + } + files, err := readTarGzipFiles(bundle.Bytes) + if err != nil { + t.Fatal(err) + } + var manifest map[string]any + if err := json.Unmarshal(files["manifest.json"], &manifest); err != nil { + t.Fatal(err) + } + manifest["future_optional_field"] = map[string]any{"ignored": true} + files["manifest.json"], err = json.MarshalIndent(manifest, "", " ") + if err != nil { + t.Fatal(err) + } + files["manifest.json"] = append(files["manifest.json"], '\n') + + result, err := VerifyTarGzipBundle(tarGzipTestFiles(t, files)) + if err != nil { + t.Fatal(err) + } + if !result.Valid { + t.Fatalf("expected unknown optional field to be tolerated: %+v", result) + } +} + +func tarGzipTestFiles(t *testing.T, files map[string][]byte) []byte { + t.Helper() + names := make([]string, 0, len(files)) + for name := range files { + names = append(names, name) + } + sort.Strings(names) + var out bytes.Buffer + gz, err := gzip.NewWriterLevel(&out, gzip.BestCompression) + if err != nil { + t.Fatal(err) + } + gz.Name = "webhookery-evidence-export.tar" + gz.ModTime = time.Unix(0, 0).UTC() + tw := tar.NewWriter(gz) + for _, name := range names { + if err := writeTarFile(tw, name, files[name]); err != nil { + t.Fatal(err) + } + } + if err := tw.Close(); err != nil { + t.Fatal(err) + } + if err := gz.Close(); err != nil { + t.Fatal(err) + } + return out.Bytes() +} + +func hasFailure(failures []string, want string) bool { + for _, failure := range failures { + if strings.Contains(failure, want) { + return true + } + } + return false +} diff --git a/internal/problem/problem.go b/internal/problem/problem.go index b47abf9..5c73c46 100644 --- a/internal/problem/problem.go +++ b/internal/problem/problem.go @@ -1,14 +1,15 @@ package problem type Problem struct { - Type string `json:"type"` - Title string `json:"title"` - Status int `json:"status"` - Detail string `json:"detail,omitempty"` - Code string `json:"code"` - RequestID string `json:"request_id"` - Fields []Field `json:"fields,omitempty"` - Retryable bool `json:"retryable"` + Type string `json:"type"` + Title string `json:"title"` + Status int `json:"status"` + Detail string `json:"detail,omitempty"` + Code string `json:"code"` + StableCode string `json:"stable_code"` + RequestID string `json:"request_id"` + Fields []Field `json:"fields,omitempty"` + Retryable bool `json:"retryable"` } type Field struct { @@ -18,13 +19,58 @@ type Field struct { func New(status int, code, title, detail, requestID string, retryable bool) Problem { return Problem{ - Type: "https://docs.webhookery.local/errors/" + code, - Title: title, - Status: status, - Detail: detail, - Code: code, - RequestID: requestID, - Retryable: retryable, + Type: "https://docs.webhookery.local/errors/" + code, + Title: title, + Status: status, + Detail: detail, + Code: code, + StableCode: StableCodeFor(code), + RequestID: requestID, + Retryable: retryable, + } +} + +const ( + CodeAuthenticationRequired = "WEBHOOKERY_AUTHENTICATION_REQUIRED" + CodeDurableCaptureUnavailable = "WEBHOOKERY_DURABLE_CAPTURE_UNAVAILABLE" + CodeHeadersTooLarge = "WEBHOOKERY_HEADERS_TOO_LARGE" + CodeInternalError = "WEBHOOKERY_INTERNAL_ERROR" + CodePayloadTooLarge = "WEBHOOKERY_PAYLOAD_TOO_LARGE" + CodeProviderSignatureInvalid = "WEBHOOKERY_PROVIDER_SIGNATURE_INVALID" + CodeRawPayloadRetainedMetadataOnly = "WEBHOOKERY_RAW_PAYLOAD_RETAINED_METADATA_ONLY" + CodeResourceNotFound = "WEBHOOKERY_RESOURCE_NOT_FOUND" + CodeSSRFBlockedDestination = "WEBHOOKERY_SSRF_BLOCKED_DESTINATION" + CodeTenantAccessDenied = "WEBHOOKERY_TENANT_ACCESS_DENIED" + CodeValidationFailed = "WEBHOOKERY_VALIDATION_FAILED" + CodeUnknownError = "WEBHOOKERY_UNKNOWN_ERROR" +) + +func StableCodeFor(code string) string { + switch code { + case "authentication_error": + return CodeAuthenticationRequired + case "authorization_error": + return CodeTenantAccessDenied + case "headers_too_large": + return CodeHeadersTooLarge + case "internal_error": + return CodeInternalError + case "invalid_signature": + return CodeProviderSignatureInvalid + case "not_found": + return CodeResourceNotFound + case "not_ready", "storage_unavailable": + return CodeDurableCaptureUnavailable + case "payload_expired": + return CodeRawPayloadRetainedMetadataOnly + case "payload_too_large": + return CodePayloadTooLarge + case "notification_channel_url_blocked", "siem_sink_url_blocked": + return CodeSSRFBlockedDestination + case "invalid_json", "invalid_request", "unsupported_grant_type", "validation_error": + return CodeValidationFailed + default: + return CodeUnknownError } } diff --git a/internal/problem/problem_test.go b/internal/problem/problem_test.go index 5cf177a..18e5eb9 100644 --- a/internal/problem/problem_test.go +++ b/internal/problem/problem_test.go @@ -17,6 +17,83 @@ func TestProblemDoesNotExposeInternalDetail(t *testing.T) { if p.Status != 500 || p.Code != "internal_error" || p.RequestID != "req_123" { t.Fatalf("unexpected internal problem: %+v", p) } + if p.StableCode != CodeInternalError { + t.Fatalf("unexpected stable code %q", p.StableCode) + } +} + +func TestProblemConstructorsSetStableStatusCodesAndRequestIDs(t *testing.T) { + tests := []struct { + name string + problem Problem + status int + code string + stable string + title string + detail string + }{ + { + name: "unauthorized", + problem: Unauthorized("req_auth"), + status: 401, + code: "authentication_error", + stable: CodeAuthenticationRequired, + title: "Authentication required", + detail: "A valid bearer token is required.", + }, + { + name: "forbidden", + problem: Forbidden("req_forbidden"), + status: 403, + code: "authorization_error", + stable: CodeTenantAccessDenied, + title: "Forbidden", + detail: "The authenticated actor is not allowed to perform this action.", + }, + { + name: "bad request", + problem: BadRequest("req_bad", "invalid_json", "body must be JSON"), + status: 400, + code: "invalid_json", + stable: CodeValidationFailed, + title: "Bad request", + detail: "body must be JSON", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.problem.Status != tt.status || tt.problem.Code != tt.code || tt.problem.StableCode != tt.stable || tt.problem.Title != tt.title || tt.problem.Detail != tt.detail { + t.Fatalf("unexpected problem: %+v", tt.problem) + } + if tt.problem.Type != "https://docs.webhookery.local/errors/"+tt.code { + t.Fatalf("unexpected type URI %q", tt.problem.Type) + } + if tt.problem.RequestID == "" { + t.Fatal("request id should be preserved") + } + if tt.problem.Retryable { + t.Fatal("client error constructors must not mark problems retryable") + } + }) + } +} + +func TestStableCodeForKnownProblemCodes(t *testing.T) { + tests := map[string]string{ + "invalid_signature": CodeProviderSignatureInvalid, + "not_ready": CodeDurableCaptureUnavailable, + "storage_unavailable": CodeDurableCaptureUnavailable, + "payload_expired": CodeRawPayloadRetainedMetadataOnly, + "notification_channel_url_blocked": CodeSSRFBlockedDestination, + "siem_sink_url_blocked": CodeSSRFBlockedDestination, + "unknown_future_code": CodeUnknownError, + } + for input, want := range tests { + if got := StableCodeFor(input); got != want { + t.Fatalf("StableCodeFor(%q)=%q want %q", input, got, want) + } + } } func contains(s, needle string) bool { diff --git a/internal/provider/provider.go b/internal/provider/provider.go index 1a44fdb..9ea2949 100644 --- a/internal/provider/provider.go +++ b/internal/provider/provider.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "webhookery/internal/domain" "webhookery/pkg/verifier" ) @@ -230,7 +231,7 @@ func (CloudEventsAdapter) Name() string { return "cloudevents" } func (CloudEventsAdapter) Verify(input VerifyInput) VerifyResult { if firstHeader(input.Headers, "ce-id") != "" && firstHeader(input.Headers, "ce-type") != "" { - return result("cloudevents", true, verifier.ReasonOK) + return result("cloudevents", false, domain.VerificationReasonUnsignedCloudEvents) } contentType := strings.ToLower(firstHeader(input.Headers, "content-type")) if strings.HasPrefix(contentType, "application/cloudevents+json") { @@ -244,7 +245,7 @@ func (CloudEventsAdapter) Verify(input VerifyInput) VerifyResult { return result("cloudevents", false, verifier.ReasonMalformedHeader) } if envelope.SpecVersion != "" && envelope.ID != "" && envelope.Type != "" && envelope.Source != "" { - return result("cloudevents", true, verifier.ReasonOK) + return result("cloudevents", false, domain.VerificationReasonUnsignedCloudEvents) } return result("cloudevents", false, "missing_cloudevents_headers") } diff --git a/internal/provider/provider_test.go b/internal/provider/provider_test.go index a56c926..766d5f6 100644 --- a/internal/provider/provider_test.go +++ b/internal/provider/provider_test.go @@ -4,78 +4,58 @@ import ( "crypto/hmac" "crypto/sha256" "encoding/base64" - "encoding/hex" "encoding/json" - "fmt" + "os" + "strings" "testing" "time" ) func TestProviderSignatureVectors(t *testing.T) { - now := time.Unix(1_700_000_000, 0) - body := []byte(`{"id":"evt_123","type":"payment_intent.succeeded","event_id":"slack_evt"}`) - - tests := []struct { - name string - adapter string - headers map[string][]string - }{ - { - name: "stripe", - adapter: "stripe", - headers: map[string][]string{ - "stripe-signature": {fmt.Sprintf("t=%d,v1=%s", now.Unix(), hmacHex([]byte("whsec_test"), []byte(fmt.Sprintf("%d.%s", now.Unix(), body))))}, - }, - }, - { - name: "github", - adapter: "github", - headers: map[string][]string{ - "x-hub-signature-256": {"sha256=" + hmacHex([]byte("whsec_test"), body)}, - "x-github-delivery": {"delivery-guid"}, - "x-github-event": {"push"}, - }, - }, - { - name: "shopify", - adapter: "shopify", - headers: map[string][]string{ - "x-shopify-hmac-sha256": {hmacBase64([]byte("whsec_test"), body)}, - "x-shopify-topic": {"orders/create"}, - "x-shopify-shop-domain": {"example.myshopify.com"}, - "x-shopify-webhook-id": {"webhook-id"}, - }, - }, - { - name: "slack", - adapter: "slack", - headers: map[string][]string{ - "x-slack-request-timestamp": {fmt.Sprint(now.Unix())}, - "x-slack-signature": {"v0=" + hmacHex([]byte("whsec_test"), []byte(fmt.Sprintf("v0:%d:%s", now.Unix(), body)))}, - }, - }, + registry := loadSignatureVectorRegistry(t) + if registry.SchemaVersion != "webhookery.provider_signature_vectors.v1" { + t.Fatalf("unexpected provider vector registry schema_version %q", registry.SchemaVersion) + } + if len(registry.Vectors) == 0 { + t.Fatal("provider vector registry must contain at least one vector") } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - adapter, ok := BuiltInRegistry().Adapter(tt.adapter) + seen := map[string]bool{} + for _, vector := range registry.Vectors { + vector := vector + t.Run(vector.Name, func(t *testing.T) { + if vector.Provider == "" || vector.Source == "" || vector.CheckedDate == "" || vector.Expected.Reason == "" { + t.Fatalf("vector must include provider, source, checked_date, and expected reason: %+v", vector) + } + if _, err := time.Parse("2006-01-02", vector.CheckedDate); err != nil { + t.Fatalf("vector checked_date must use YYYY-MM-DD: %v", err) + } + if seen[vector.Provider] { + t.Fatalf("duplicate provider vector for %s", vector.Provider) + } + seen[vector.Provider] = true + adapter, ok := BuiltInRegistry().Adapter(vector.Provider) if !ok { - t.Fatalf("missing adapter %s", tt.adapter) + t.Fatalf("missing adapter %s", vector.Provider) + } + now, err := time.Parse(time.RFC3339, vector.Now) + if err != nil { + t.Fatalf("vector now must use RFC3339: %v", err) } result := adapter.Verify(VerifyInput{ - RawBody: body, - Headers: tt.headers, - Secret: []byte("whsec_test"), + RawBody: []byte(vector.RawBody), + Headers: vector.Headers, + Secret: []byte(vector.Secret), Now: now, }) - if !result.Verified { - t.Fatalf("expected verified signature, got %s", result.Reason) + if result.Verified != vector.Expected.Verified || result.Reason != vector.Expected.Reason { + t.Fatalf("expected verified=%v reason=%s, got verified=%v reason=%s", vector.Expected.Verified, vector.Expected.Reason, result.Verified, result.Reason) } bad := adapter.Verify(VerifyInput{ - RawBody: []byte(`{"type":"payment_intent.succeeded","id":"evt_123"}`), - Headers: tt.headers, - Secret: []byte("whsec_test"), + RawBody: []byte(vector.MutatedRawBody), + Headers: vector.Headers, + Secret: []byte(vector.Secret), Now: now, }) if bad.Verified { @@ -83,16 +63,98 @@ func TestProviderSignatureVectors(t *testing.T) { } }) } + + for _, provider := range []string{"stripe", "github", "shopify", "slack"} { + if !seen[provider] { + t.Fatalf("provider vector registry missing %s", provider) + } + } +} + +type signatureVectorRegistry struct { + SchemaVersion string `json:"schema_version"` + Vectors []providerSignatureVector `json:"vectors"` +} + +type providerSignatureVector struct { + Name string `json:"name"` + Provider string `json:"provider"` + Source string `json:"source"` + CheckedDate string `json:"checked_date"` + Now string `json:"now"` + Secret string `json:"secret"` + RawBody string `json:"raw_body"` + MutatedRawBody string `json:"mutated_raw_body"` + Headers map[string][]string `json:"headers"` + Expected struct { + Verified bool `json:"verified"` + Reason string `json:"reason"` + } `json:"expected"` +} + +func loadSignatureVectorRegistry(t *testing.T) signatureVectorRegistry { + t.Helper() + raw, err := os.ReadFile("testdata/signature_vectors.json") + if err != nil { + t.Fatal(err) + } + var registry signatureVectorRegistry + if err := json.Unmarshal(raw, ®istry); err != nil { + t.Fatal(err) + } + return registry } -func TestCloudEventsAdapterAcceptsStructuredMode(t *testing.T) { +func TestFailedVerificationResultDoesNotExposeSensitiveInputs(t *testing.T) { + result := StripeAdapter{}.Verify(VerifyInput{ + RawBody: []byte(`{"customer":"cus_secret","raw_body":"raw-body-secret"}`), + Headers: map[string][]string{ + "stripe-signature": {"t=1700000000,v1=signature-secret-marker"}, + }, + Secret: []byte("whsec_secret_marker"), + Now: time.Unix(1_700_000_000, 0), + }) + raw, err := json.Marshal(result) + if err != nil { + t.Fatal(err) + } + if result.Verified { + t.Fatal("expected failed verification") + } + for _, forbidden := range []string{"whsec_secret_marker", "signature-secret-marker", "raw-body-secret", "cus_secret"} { + if strings.Contains(string(raw), forbidden) { + t.Fatalf("failed verification result leaked sensitive input %q: %s", forbidden, raw) + } + } + if !strings.Contains(string(raw), "invalid_signature") { + t.Fatalf("failed verification should retain safe reason, got %s", raw) + } +} + +func TestCloudEventsAdapterDoesNotVerifyUnsignedStructuredMode(t *testing.T) { adapter := CloudEventsAdapter{} result := adapter.Verify(VerifyInput{ Headers: map[string][]string{"content-type": {"application/cloudevents+json"}}, RawBody: []byte(`{"specversion":"1.0","id":"evt_1","type":"invoice.paid","source":"tests"}`), }) - if !result.Verified { - t.Fatalf("structured CloudEvents request should verify as a trusted envelope, got %+v", result) + if result.Verified || result.Reason != "unsigned_cloudevents" { + t.Fatalf("structured CloudEvents validity must not imply trust, got %+v", result) + } +} + +func TestCloudEventsAdapterDoesNotVerifyUnsignedBinaryMode(t *testing.T) { + adapter := CloudEventsAdapter{} + result := adapter.Verify(VerifyInput{ + Headers: map[string][]string{ + "ce-id": {"evt_1"}, + "ce-type": {"invoice.paid"}, + "ce-source": {"tests"}, + "ce-specversion": {"1.0"}, + }, + RawBody: []byte(`{"amount":42}`), + }) + if result.Verified || result.Reason != "unsigned_cloudevents" { + t.Fatalf("binary CloudEvents validity must not imply trust, got %+v", result) } } @@ -187,18 +249,6 @@ func TestGenericJWTAdapterRejectsAlgNone(t *testing.T) { } } -func hmacHex(secret, payload []byte) string { - mac := hmac.New(sha256.New, secret) - _, _ = mac.Write(payload) - return hex.EncodeToString(mac.Sum(nil)) -} - -func hmacBase64(secret, payload []byte) string { - mac := hmac.New(sha256.New, secret) - _, _ = mac.Write(payload) - return base64.StdEncoding.EncodeToString(mac.Sum(nil)) -} - func jwtHS256(t *testing.T, secret []byte, claims map[string]any) string { t.Helper() header := base64.RawURLEncoding.EncodeToString(mustJSON(t, map[string]any{"alg": "HS256", "typ": "JWT"})) diff --git a/internal/provider/testdata/signature_vectors.json b/internal/provider/testdata/signature_vectors.json new file mode 100644 index 0000000..ca2060a --- /dev/null +++ b/internal/provider/testdata/signature_vectors.json @@ -0,0 +1,100 @@ +{ + "schema_version": "webhookery.provider_signature_vectors.v1", + "description": "Synthetic local provider signature vectors. These fixtures do not call live providers and do not contain real provider secrets or customer payloads.", + "vectors": [ + { + "name": "stripe-valid-v1-hmac-sha256", + "provider": "stripe", + "source": "https://docs.stripe.com/webhooks/signature", + "checked_date": "2026-06-04", + "now": "2023-11-14T22:13:20Z", + "secret": "whsec_test", + "raw_body": "{\"id\":\"evt_123\",\"type\":\"payment_intent.succeeded\",\"event_id\":\"slack_evt\"}", + "mutated_raw_body": "{\"type\":\"payment_intent.succeeded\",\"id\":\"evt_123\"}", + "headers": { + "stripe-signature": [ + "t=1700000000,v1=9aae435784965c3b24b12c58e70ee43ff01381fd450c94f8a32ce6ea17d4fa46" + ] + }, + "expected": { + "verified": true, + "reason": "ok" + } + }, + { + "name": "github-valid-sha256-hmac", + "provider": "github", + "source": "https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries", + "checked_date": "2026-06-04", + "now": "2023-11-14T22:13:20Z", + "secret": "whsec_test", + "raw_body": "{\"id\":\"evt_123\",\"type\":\"payment_intent.succeeded\",\"event_id\":\"slack_evt\"}", + "mutated_raw_body": "{\"type\":\"payment_intent.succeeded\",\"id\":\"evt_123\"}", + "headers": { + "x-hub-signature-256": [ + "sha256=d0377f7b5aedf7b5f9d70900dd7d10e4f3a25fdd83350ffbefac41e357848153" + ], + "x-github-delivery": [ + "delivery-guid" + ], + "x-github-event": [ + "push" + ] + }, + "expected": { + "verified": true, + "reason": "ok" + } + }, + { + "name": "shopify-valid-base64-hmac-sha256", + "provider": "shopify", + "source": "https://shopify.dev/docs/apps/build/webhooks/verify-deliveries", + "checked_date": "2026-06-04", + "now": "2023-11-14T22:13:20Z", + "secret": "whsec_test", + "raw_body": "{\"id\":\"evt_123\",\"type\":\"payment_intent.succeeded\",\"event_id\":\"slack_evt\"}", + "mutated_raw_body": "{\"type\":\"payment_intent.succeeded\",\"id\":\"evt_123\"}", + "headers": { + "x-shopify-hmac-sha256": [ + "0Dd/e1rt97X51wkA3X0Q5POiX92DNQ/776xB41eEgVM=" + ], + "x-shopify-topic": [ + "orders/create" + ], + "x-shopify-shop-domain": [ + "example.myshopify.com" + ], + "x-shopify-webhook-id": [ + "webhook-id" + ] + }, + "expected": { + "verified": true, + "reason": "ok" + } + }, + { + "name": "slack-valid-v0-hmac-sha256", + "provider": "slack", + "source": "https://api.slack.com/authentication/verifying-requests-from-slack", + "checked_date": "2026-06-04", + "now": "2023-11-14T22:13:20Z", + "secret": "whsec_test", + "raw_body": "{\"id\":\"evt_123\",\"type\":\"payment_intent.succeeded\",\"event_id\":\"slack_evt\"}", + "mutated_raw_body": "{\"type\":\"payment_intent.succeeded\",\"id\":\"evt_123\"}", + "headers": { + "x-slack-request-timestamp": [ + "1700000000" + ], + "x-slack-signature": [ + "v0=63d2c60275d7b0eb2c88a6cd88fc3c0942be3b12eb190d42a267fcbfb90c1f14" + ] + }, + "expected": { + "verified": true, + "reason": "ok" + } + } + ] +} diff --git a/internal/ssrf/ssrf_test.go b/internal/ssrf/ssrf_test.go index 5955878..0ce0b69 100644 --- a/internal/ssrf/ssrf_test.go +++ b/internal/ssrf/ssrf_test.go @@ -2,7 +2,9 @@ package ssrf import ( "context" + "errors" "net/netip" + "strings" "testing" ) @@ -36,6 +38,36 @@ func TestValidateURLRejectsCredentialsAndHTTP(t *testing.T) { } } +func TestValidateURLRejectsAddressAndParserEdgeCases(t *testing.T) { + validator := Validator{Resolver: StaticResolver{ + "localhost": {netip.MustParseAddr("127.0.0.1")}, + "loopback.example.com": {netip.MustParseAddr("::1")}, + }} + tests := []string{ + "https://localhost/hook", + "https://loopback.example.com/hook", + "https://10.0.0.1/hook", + "https://[fd00::1]/hook", + "https://[fe80::1]/hook", + "https://169.254.169.254/latest/meta-data", + "https://[::ffff:169.254.169.254]/latest/meta-data", + "https://0177.0.0.1/hook", + "gopher://customer.example.com/hook", + "ftp://customer.example.com/hook", + } + for _, rawURL := range tests { + t.Run(rawURL, func(t *testing.T) { + result := validator.Validate(context.Background(), rawURL, DefaultPolicy()) + if result.Allowed { + t.Fatalf("expected %q to be blocked", rawURL) + } + if len(result.BlockedReasons) == 0 { + t.Fatalf("expected blocked reason for %q", rawURL) + } + }) + } +} + func TestValidateURLAllowsPublicHTTPS(t *testing.T) { validator := Validator{Resolver: StaticResolver{ "customer.example.com": {netip.MustParseAddr("93.184.216.34")}, @@ -45,3 +77,24 @@ func TestValidateURLAllowsPublicHTTPS(t *testing.T) { t.Fatalf("expected public https URL to be allowed: %v", result.BlockedReasons) } } + +func TestPinnedDialerBlocksDNSRebindingAfterInitialValidation(t *testing.T) { + initial := Validator{Resolver: StaticResolver{ + "customer.example.com": {netip.MustParseAddr("93.184.216.34")}, + }} + if result := initial.Validate(context.Background(), "https://customer.example.com/webhooks", DefaultPolicy()); !result.Allowed { + t.Fatalf("expected initial endpoint validation to allow public address: %+v", result) + } + + dialer := PinnedDialer{Resolver: StaticResolver{ + "customer.example.com": {netip.MustParseAddr("10.0.0.10")}, + }, Policy: DefaultPolicy()} + _, err := dialer.DialContext(context.Background(), "tcp", "customer.example.com:443") + var policyErr PolicyError + if err == nil || !strings.Contains(err.Error(), "blocked_ip_range") { + t.Fatalf("expected delivery-time rebinding block, got %v", err) + } + if !errors.As(err, &policyErr) { + t.Fatalf("expected typed policy error, got %T", err) + } +} diff --git a/internal/ssrf/transport.go b/internal/ssrf/transport.go new file mode 100644 index 0000000..dba5486 --- /dev/null +++ b/internal/ssrf/transport.go @@ -0,0 +1,135 @@ +package ssrf + +import ( + "context" + "errors" + "net" + "net/http" + "net/netip" + "strings" + "time" + + "golang.org/x/net/idna" +) + +type ContextDialer interface { + DialContext(ctx context.Context, network, address string) (net.Conn, error) +} + +type PinnedDialer struct { + Resolver Resolver + Policy Policy + Dialer ContextDialer +} + +type PolicyError struct { + Reasons []string +} + +func (e PolicyError) Error() string { + if len(e.Reasons) == 0 { + return "ssrf policy blocked" + } + return "ssrf policy blocked: " + strings.Join(e.Reasons, ",") +} + +func NewPinnedTransport(base *http.Transport, resolver Resolver, policy Policy) *http.Transport { + var transport *http.Transport + if base != nil { + transport = base.Clone() + } else { + transport = http.DefaultTransport.(*http.Transport).Clone() + } + transport.Proxy = nil + transport.DialContext = PinnedDialer{Resolver: resolver, Policy: policy}.DialContext + transport.DialTLSContext = nil + if transport.TLSClientConfig != nil { + tlsConfig := transport.TLSClientConfig.Clone() + tlsConfig.ServerName = "" + transport.TLSClientConfig = tlsConfig + } + return transport +} + +func (d PinnedDialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { + host, port, err := net.SplitHostPort(address) + if err != nil || strings.TrimSpace(host) == "" || strings.TrimSpace(port) == "" { + return nil, PolicyError{Reasons: []string{"invalid_dial_address"}} + } + policy := d.Policy + if policy.AllowedPorts == nil { + policy.AllowedPorts = DefaultPolicy().AllowedPorts + } + if !policy.AllowedPorts[port] { + return nil, PolicyError{Reasons: []string{"blocked_port"}} + } + asciiHost, err := normalizedDialHost(host) + if err != nil { + return nil, PolicyError{Reasons: []string{"invalid_host"}} + } + addrs, err := d.resolve(ctx, asciiHost, policy) + if err != nil { + return nil, err + } + if len(addrs) == 0 { + return nil, PolicyError{Reasons: []string{"dns_resolution_failed"}} + } + var blocked []string + for _, addr := range addrs { + if blockedAddr(addr, policy) { + blocked = append(blocked, "blocked_ip_range") + } + } + if len(blocked) > 0 { + return nil, PolicyError{Reasons: dedupe(blocked)} + } + dialer := d.Dialer + if dialer == nil { + dialer = &net.Dialer{Timeout: 10 * time.Second, KeepAlive: 30 * time.Second} + } + return dialer.DialContext(ctx, network, net.JoinHostPort(addrs[0].String(), port)) +} + +func (d PinnedDialer) resolve(ctx context.Context, host string, policy Policy) ([]netip.Addr, error) { + if addr, err := netip.ParseAddr(host); err == nil { + reasons := []string{} + if !policy.AllowIPLiteral { + reasons = append(reasons, "ip_literal_blocked") + } + addr = addr.Unmap() + if blockedAddr(addr, policy) { + reasons = append(reasons, "blocked_ip_range") + } + if len(reasons) > 0 { + return nil, PolicyError{Reasons: dedupe(reasons)} + } + return []netip.Addr{addr}, nil + } + resolver := d.Resolver + if resolver == nil { + resolver = NetResolver{} + } + resolved, err := resolver.LookupIPAddr(ctx, host) + if err != nil { + return nil, PolicyError{Reasons: []string{"dns_resolution_failed"}} + } + addrs := make([]netip.Addr, 0, len(resolved)) + for _, addr := range resolved { + if !addr.IsValid() { + continue + } + addrs = append(addrs, addr.Unmap()) + } + return addrs, nil +} + +func normalizedDialHost(host string) (string, error) { + trimmed := strings.TrimSuffix(strings.ToLower(strings.TrimSpace(host)), ".") + if trimmed == "" { + return "", errors.New("empty host") + } + if _, err := netip.ParseAddr(trimmed); err == nil { + return trimmed, nil + } + return idna.Lookup.ToASCII(trimmed) +} diff --git a/internal/ssrf/transport_test.go b/internal/ssrf/transport_test.go new file mode 100644 index 0000000..02cb399 --- /dev/null +++ b/internal/ssrf/transport_test.go @@ -0,0 +1,111 @@ +package ssrf + +import ( + "context" + "errors" + "net" + "net/netip" + "testing" +) + +func TestPinnedDialerBlocksDNSRebindingAtDialTime(t *testing.T) { + preflight := Validator{Resolver: StaticResolver{ + "customer.example.com": {netip.MustParseAddr("93.184.216.34")}, + }} + result := preflight.Validate(context.Background(), "https://customer.example.com/webhook", DefaultPolicy()) + if !result.Allowed { + t.Fatalf("expected preflight validation to allow public DNS answer: %v", result.BlockedReasons) + } + + dialer := PinnedDialer{ + Resolver: StaticResolver{ + "customer.example.com": {netip.MustParseAddr("10.0.0.10")}, + }, + Policy: DefaultPolicy(), + Dialer: &capturingDialer{}, + } + if _, err := dialer.DialContext(context.Background(), "tcp", "customer.example.com:443"); err == nil { + t.Fatal("dial-time private DNS answer must be blocked") + } +} + +func TestPinnedDialerBlocksMetadataAndIPv4MappedIPv6(t *testing.T) { + tests := []struct { + name string + addr netip.Addr + }{ + {name: "metadata", addr: netip.MustParseAddr("169.254.169.254")}, + {name: "ipv4 mapped metadata", addr: netip.MustParseAddr("::ffff:169.254.169.254")}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + capture := &capturingDialer{} + dialer := PinnedDialer{ + Resolver: StaticResolver{"customer.example.com": {tt.addr}}, + Policy: DefaultPolicy(), + Dialer: capture, + } + if _, err := dialer.DialContext(context.Background(), "tcp", "customer.example.com:443"); err == nil { + t.Fatal("metadata address must be blocked") + } + if capture.called { + t.Fatal("blocked address must not reach the network dialer") + } + }) + } +} + +func TestPinnedDialerNormalizesIDNAHostBeforeResolving(t *testing.T) { + capture := &capturingDialer{err: errStopAfterDial} + dialer := PinnedDialer{ + Resolver: StaticResolver{"xn--bcher-kva.example": {netip.MustParseAddr("93.184.216.34")}}, + Policy: DefaultPolicy(), + Dialer: capture, + } + _, err := dialer.DialContext(context.Background(), "tcp", "bücher.example:443") + if !errors.Is(err, errStopAfterDial) { + t.Fatalf("expected fake dialer error after allowed pinned dial, got %v", err) + } + if capture.address != "93.184.216.34:443" { + t.Fatalf("expected dial to pinned public IP, got %q", capture.address) + } +} + +func TestPinnedDialerRejectsMixedPublicAndPrivateAnswers(t *testing.T) { + capture := &capturingDialer{} + dialer := PinnedDialer{ + Resolver: StaticResolver{ + "customer.example.com": { + netip.MustParseAddr("93.184.216.34"), + netip.MustParseAddr("10.0.0.10"), + }, + }, + Policy: DefaultPolicy(), + Dialer: capture, + } + if _, err := dialer.DialContext(context.Background(), "tcp", "customer.example.com:443"); err == nil { + t.Fatal("mixed public/private DNS answers must be blocked") + } + if capture.called { + t.Fatal("mixed blocked answers must not reach the network dialer") + } +} + +var errStopAfterDial = errors.New("stop after dial") + +type capturingDialer struct { + called bool + network string + address string + err error +} + +func (d *capturingDialer) DialContext(_ context.Context, network, address string) (net.Conn, error) { + d.called = true + d.network = network + d.address = address + if d.err != nil { + return nil, d.err + } + return nil, errStopAfterDial +} diff --git a/internal/worker/worker.go b/internal/worker/worker.go index c56353c..24990d1 100644 --- a/internal/worker/worker.go +++ b/internal/worker/worker.go @@ -99,6 +99,76 @@ type AlertStore interface { EvaluateAlertRules(ctx context.Context, workerID string, limit int) error } +type AuditChainBackfillResult struct { + LeaseAcquired bool + TenantsScanned int + EventsBackfilled int + More bool +} + +type AuditChainBackfillStore interface { + BackfillAuditChain(ctx context.Context, workerID string, limit int) (AuditChainBackfillResult, error) +} + +type Phase string + +const ( + PhaseOutbox Phase = "outbox" + PhaseDelivery Phase = "delivery" + PhaseRetention Phase = "retention" + PhaseMetrics Phase = "metrics" + PhaseAlerts Phase = "alerts" + PhaseAuditChainBackfill Phase = "audit_chain_backfill" + PhaseNotification Phase = "notification" + PhaseSIEM Phase = "siem" +) + +type PhaseResult struct { + Phase Phase + Err error +} + +type RunReport struct { + Results []PhaseResult +} + +func (r RunReport) Err() error { + var errs []error + for _, result := range r.Results { + if result.Err == nil { + continue + } + errs = append(errs, phaseError{phase: result.Phase, err: result.Err}) + } + return errors.Join(errs...) +} + +func (r RunReport) Result(phase Phase) (PhaseResult, bool) { + for _, result := range r.Results { + if result.Phase == phase { + return result, true + } + } + return PhaseResult{}, false +} + +func (r *RunReport) add(phase Phase, err error) { + r.Results = append(r.Results, PhaseResult{Phase: phase, Err: err}) +} + +type phaseError struct { + phase Phase + err error +} + +func (e phaseError) Error() string { + return string(e.phase) + " phase failed" +} + +func (e phaseError) Unwrap() error { + return e.err +} + type Worker struct { Store OutboxStore Processor OutboxProcessor @@ -111,15 +181,74 @@ type Worker struct { RetentionStore RetentionStore MetricsStore MetricsStore AlertStore AlertStore + AuditChainBackfillStore AuditChainBackfillStore WorkerID string Limit int } func (w Worker) RunOnce(ctx context.Context) error { + return w.RunOnceReport(ctx).Err() +} + +func (w Worker) RunOnceReport(ctx context.Context) RunReport { limit := w.Limit if limit <= 0 { limit = 10 } + var report RunReport + report.add(PhaseOutbox, w.runOutbox(ctx, limit)) + if stopAfterPhase(report.Results[len(report.Results)-1].Err) { + return report + } + if w.DeliveryStore != nil && w.DeliveryClient != nil { + err := w.runDeliveries(ctx, limit) + report.add(PhaseDelivery, err) + if stopAfterPhase(err) { + return report + } + } + if w.RetentionStore != nil { + err := w.RetentionStore.ApplyRetentionPolicies(ctx, w.WorkerID, limit) + report.add(PhaseRetention, err) + if stopAfterPhase(err) { + return report + } + } + if w.MetricsStore != nil { + err := w.MetricsStore.RefreshMetricsRollups(ctx, w.WorkerID, limit) + report.add(PhaseMetrics, err) + if stopAfterPhase(err) { + return report + } + } + if w.AlertStore != nil { + err := w.AlertStore.EvaluateAlertRules(ctx, w.WorkerID, limit) + report.add(PhaseAlerts, err) + if stopAfterPhase(err) { + return report + } + } + if w.AuditChainBackfillStore != nil { + _, err := w.AuditChainBackfillStore.BackfillAuditChain(ctx, w.WorkerID, limit) + report.add(PhaseAuditChainBackfill, err) + if stopAfterPhase(err) { + return report + } + } + if w.NotificationDeliveryStore != nil && w.NotificationClient != nil { + err := w.runNotificationDeliveries(ctx, limit) + report.add(PhaseNotification, err) + if stopAfterPhase(err) { + return report + } + } + if w.SIEMDeliveryStore != nil && w.SIEMClient != nil { + report.add(PhaseSIEM, w.runSIEMDeliveries(ctx, limit)) + } + return report +} + +func (w Worker) runOutbox(ctx context.Context, limit int) error { items, err := w.Store.ClaimOutbox(ctx, w.WorkerID, limit) if err != nil { return err @@ -137,63 +266,58 @@ func (w Worker) RunOnce(ctx context.Context) error { return err } } - if w.DeliveryStore != nil && w.DeliveryClient != nil { - deliveries, err := w.DeliveryStore.ClaimDueDeliveries(ctx, w.WorkerID, limit) - if err != nil { - return err - } - for _, item := range deliveries { - result, deliverErr := w.DeliveryClient.Deliver(ctx, item.EndpointURL, item.Body, item.SigningSecret, item.SigningKeyID, item.SigningKeyVersion, item.MTLSClientCertPEM, item.MTLSClientKeyPEM) - if err := w.DeliveryStore.RecordDeliveryAttempt(ctx, item, result, deliverErr); err != nil { - return err - } - } + return nil +} + +func (w Worker) runDeliveries(ctx context.Context, limit int) error { + deliveries, err := w.DeliveryStore.ClaimDueDeliveries(ctx, w.WorkerID, limit) + if err != nil { + return err } - if w.RetentionStore != nil { - if err := w.RetentionStore.ApplyRetentionPolicies(ctx, w.WorkerID, limit); err != nil { + for _, item := range deliveries { + result, deliverErr := w.DeliveryClient.Deliver(ctx, item.EndpointURL, item.Body, item.SigningSecret, item.SigningKeyID, item.SigningKeyVersion, item.MTLSClientCertPEM, item.MTLSClientKeyPEM) + if err := w.DeliveryStore.RecordDeliveryAttempt(ctx, item, result, deliverErr); err != nil { return err } } - if w.MetricsStore != nil { - if err := w.MetricsStore.RefreshMetricsRollups(ctx, w.WorkerID, limit); err != nil { - return err - } + return nil +} + +func (w Worker) runNotificationDeliveries(ctx context.Context, limit int) error { + deliveries, err := w.NotificationDeliveryStore.ClaimNotificationDeliveries(ctx, w.WorkerID, limit) + if err != nil { + return err } - if w.AlertStore != nil { - if err := w.AlertStore.EvaluateAlertRules(ctx, w.WorkerID, limit); err != nil { + for _, item := range deliveries { + result, deliverErr := w.NotificationClient.Deliver(ctx, item.URL, item.Body, item.Secret) + if err := w.NotificationDeliveryStore.RecordNotificationDeliveryAttempt(ctx, item, result, deliverErr); err != nil { return err } } - if w.NotificationDeliveryStore != nil && w.NotificationClient != nil { - deliveries, err := w.NotificationDeliveryStore.ClaimNotificationDeliveries(ctx, w.WorkerID, limit) - if err != nil { - return err - } - for _, item := range deliveries { - result, deliverErr := w.NotificationClient.Deliver(ctx, item.URL, item.Body, item.Secret) - if err := w.NotificationDeliveryStore.RecordNotificationDeliveryAttempt(ctx, item, result, deliverErr); err != nil { - return err - } - } + return nil +} + +func (w Worker) runSIEMDeliveries(ctx context.Context, limit int) error { + if err := w.SIEMDeliveryStore.EnqueueSIEMDeliveries(ctx, w.WorkerID, limit); err != nil { + return err } - if w.SIEMDeliveryStore != nil && w.SIEMClient != nil { - if err := w.SIEMDeliveryStore.EnqueueSIEMDeliveries(ctx, w.WorkerID, limit); err != nil { - return err - } - deliveries, err := w.SIEMDeliveryStore.ClaimSIEMDeliveries(ctx, w.WorkerID, limit) - if err != nil { + deliveries, err := w.SIEMDeliveryStore.ClaimSIEMDeliveries(ctx, w.WorkerID, limit) + if err != nil { + return err + } + for _, item := range deliveries { + result, deliverErr := w.SIEMClient.Deliver(ctx, item.URL, item.Body, item.Secret) + if err := w.SIEMDeliveryStore.RecordSIEMDeliveryAttempt(ctx, item, result, deliverErr); err != nil { return err } - for _, item := range deliveries { - result, deliverErr := w.SIEMClient.Deliver(ctx, item.URL, item.Body, item.Secret) - if err := w.SIEMDeliveryStore.RecordSIEMDeliveryAttempt(ctx, item, result, deliverErr); err != nil { - return err - } - } } return nil } +func stopAfterPhase(err error) bool { + return errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) +} + func (w Worker) Complete(item OutboxItem, ctx context.Context) error { return w.Store.CompleteOutbox(ctx, item.ID) } diff --git a/internal/worker/worker_test.go b/internal/worker/worker_test.go index 6f583ff..2507124 100644 --- a/internal/worker/worker_test.go +++ b/internal/worker/worker_test.go @@ -2,6 +2,8 @@ package worker import ( "context" + "errors" + "strings" "testing" ) @@ -108,6 +110,112 @@ func TestRunOnceEnqueuesAndDeliversClaimedSIEMSignal(t *testing.T) { } } +func TestRunOnceRunsAuditChainBackfillPhase(t *testing.T) { + store := &fakeWorkerStore{auditBackfillResult: AuditChainBackfillResult{LeaseAcquired: true, EventsBackfilled: 3, More: true}} + w := Worker{Store: store, AuditChainBackfillStore: store, WorkerID: "worker_1", Limit: 8} + report := w.RunOnceReport(context.Background()) + if err := report.Err(); err != nil { + t.Fatal(err) + } + if store.auditBackfillWorkerID != "worker_1" || store.auditBackfillLimit != 8 { + t.Fatalf("expected audit-chain backfill to run with worker id and limit, got worker=%q limit=%d", store.auditBackfillWorkerID, store.auditBackfillLimit) + } + result, ok := report.Result(PhaseAuditChainBackfill) + if !ok || result.Err != nil { + t.Fatalf("expected successful audit-chain backfill phase result, got result=%+v ok=%v", result, ok) + } +} + +func TestRunOnceContinuesAcrossIndependentPhaseFailures(t *testing.T) { + deliveryErr := errors.New("delivery claim failed") + retentionErr := errors.New("retention failed") + store := &fakeWorkerStore{ + claimDeliveriesErr: deliveryErr, + retentionErr: retentionErr, + } + w := Worker{ + Store: store, + DeliveryStore: store, + DeliveryClient: &fakeDeliveryClient{}, + RetentionStore: store, + MetricsStore: store, + AlertStore: store, + NotificationDeliveryStore: store, + NotificationClient: &fakeSignalClient{}, + SIEMDeliveryStore: store, + SIEMClient: &fakeSignalClient{}, + WorkerID: "worker_1", + Limit: 2, + } + err := w.RunOnce(context.Background()) + if !errors.Is(err, deliveryErr) { + t.Fatalf("expected delivery phase error, got %v", err) + } + if !errors.Is(err, retentionErr) { + t.Fatalf("expected retention phase error, got %v", err) + } + if store.metricsWorkerID != "worker_1" || store.alertWorkerID != "worker_1" { + t.Fatalf("expected metrics and alerts to run after earlier failures, metrics=%q alerts=%q", store.metricsWorkerID, store.alertWorkerID) + } + if !store.notificationClaimed || store.siemEnqueueWorkerID != "worker_1" || !store.siemClaimed { + t.Fatalf("expected notification and SIEM phases to run after earlier failures, notification=%v siem_enqueued=%q siem_claimed=%v", store.notificationClaimed, store.siemEnqueueWorkerID, store.siemClaimed) + } +} + +func TestRunOnceReportRecordsPhaseResults(t *testing.T) { + deliveryErr := errors.New("delivery claim failed") + retentionErr := errors.New("retention failed") + store := &fakeWorkerStore{ + claimDeliveriesErr: deliveryErr, + retentionErr: retentionErr, + } + w := Worker{ + Store: store, + DeliveryStore: store, + DeliveryClient: &fakeDeliveryClient{}, + RetentionStore: store, + MetricsStore: store, + WorkerID: "worker_1", + Limit: 2, + } + report := w.RunOnceReport(context.Background()) + if !errors.Is(report.Err(), deliveryErr) || !errors.Is(report.Err(), retentionErr) { + t.Fatalf("expected report to include delivery and retention errors, got %v", report.Err()) + } + deliveryResult, ok := report.Result(PhaseDelivery) + if !ok || !errors.Is(deliveryResult.Err, deliveryErr) { + t.Fatalf("expected delivery phase result with error, got result=%+v ok=%v", deliveryResult, ok) + } + retentionResult, ok := report.Result(PhaseRetention) + if !ok || !errors.Is(retentionResult.Err, retentionErr) { + t.Fatalf("expected retention phase result with error, got result=%+v ok=%v", retentionResult, ok) + } + metricsResult, ok := report.Result(PhaseMetrics) + if !ok || metricsResult.Err != nil { + t.Fatalf("expected successful metrics phase result, got result=%+v ok=%v", metricsResult, ok) + } +} + +func TestRunReportErrorRedactsUnderlyingPhaseDetails(t *testing.T) { + secretErr := errors.New("backend failed with whsec_secret and raw-body-secret") + var report RunReport + report.add(PhaseDelivery, secretErr) + + err := report.Err() + if err == nil { + t.Fatal("expected phase error") + } + if !errors.Is(err, secretErr) { + t.Fatalf("phase error should preserve unwrap semantics, got %v", err) + } + if strings.Contains(err.Error(), "whsec_secret") || strings.Contains(err.Error(), "raw-body-secret") { + t.Fatalf("worker phase error leaked underlying sensitive detail: %v", err) + } + if !strings.Contains(err.Error(), "delivery phase failed") { + t.Fatalf("worker phase error should identify failed phase without details: %v", err) + } +} + type fakeWorkerStore struct { items []OutboxItem processed string @@ -115,17 +223,25 @@ type fakeWorkerStore struct { deliveries []DeliveryItem recorded string processErr error + claimDeliveriesErr error retentionWorkerID string retentionLimit int + retentionErr error metricsWorkerID string metricsLimit int alertWorkerID string alertLimit int notificationDeliveries []SignalDeliveryItem + notificationClaimed bool notificationRecorded string siemDeliveries []SignalDeliveryItem siemRecorded string siemEnqueueWorkerID string + siemClaimed bool + auditBackfillWorkerID string + auditBackfillLimit int + auditBackfillResult AuditChainBackfillResult + auditBackfillErr error } func (f *fakeWorkerStore) ClaimOutbox(context.Context, string, int) ([]OutboxItem, error) { @@ -140,6 +256,9 @@ func (f *fakeWorkerStore) CompleteOutbox(_ context.Context, outboxID string) err return nil } func (f *fakeWorkerStore) ClaimDueDeliveries(context.Context, string, int) ([]DeliveryItem, error) { + if f.claimDeliveriesErr != nil { + return nil, f.claimDeliveriesErr + } return f.deliveries, nil } func (f *fakeWorkerStore) RecordDeliveryAttempt(_ context.Context, item DeliveryItem, _ DeliveryResult, _ error) error { @@ -149,7 +268,7 @@ func (f *fakeWorkerStore) RecordDeliveryAttempt(_ context.Context, item Delivery func (f *fakeWorkerStore) ApplyRetentionPolicies(_ context.Context, workerID string, limit int) error { f.retentionWorkerID = workerID f.retentionLimit = limit - return nil + return f.retentionErr } func (f *fakeWorkerStore) RefreshMetricsRollups(_ context.Context, workerID string, limit int) error { f.metricsWorkerID = workerID @@ -162,6 +281,7 @@ func (f *fakeWorkerStore) EvaluateAlertRules(_ context.Context, workerID string, return nil } func (f *fakeWorkerStore) ClaimNotificationDeliveries(context.Context, string, int) ([]SignalDeliveryItem, error) { + f.notificationClaimed = true return f.notificationDeliveries, nil } func (f *fakeWorkerStore) RecordNotificationDeliveryAttempt(_ context.Context, item SignalDeliveryItem, _ SignalDeliveryResult, _ error) error { @@ -173,12 +293,18 @@ func (f *fakeWorkerStore) EnqueueSIEMDeliveries(_ context.Context, workerID stri return nil } func (f *fakeWorkerStore) ClaimSIEMDeliveries(context.Context, string, int) ([]SignalDeliveryItem, error) { + f.siemClaimed = true return f.siemDeliveries, nil } func (f *fakeWorkerStore) RecordSIEMDeliveryAttempt(_ context.Context, item SignalDeliveryItem, _ SignalDeliveryResult, _ error) error { f.siemRecorded = item.ID return nil } +func (f *fakeWorkerStore) BackfillAuditChain(_ context.Context, workerID string, limit int) (AuditChainBackfillResult, error) { + f.auditBackfillWorkerID = workerID + f.auditBackfillLimit = limit + return f.auditBackfillResult, f.auditBackfillErr +} type fakeDeliveryClient struct { certPEM []byte diff --git a/migrations/026_incidents.down.sql b/migrations/026_incidents.down.sql new file mode 100644 index 0000000..9177918 --- /dev/null +++ b/migrations/026_incidents.down.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS incident_evidence_exports; +DROP TABLE IF EXISTS incident_report_snapshots; +DROP TABLE IF EXISTS incident_events; +DROP TABLE IF EXISTS incidents; diff --git a/migrations/026_incidents.up.sql b/migrations/026_incidents.up.sql new file mode 100644 index 0000000..0ebc008 --- /dev/null +++ b/migrations/026_incidents.up.sql @@ -0,0 +1,46 @@ +CREATE TABLE IF NOT EXISTS incidents ( + id text PRIMARY KEY, + tenant_id text NOT NULL REFERENCES tenants(id), + title text NOT NULL, + reason text NOT NULL DEFAULT '', + state text NOT NULL DEFAULT 'active', + created_by text NOT NULL DEFAULT '', + created_at timestamptz NOT NULL DEFAULT now(), + updated_at timestamptz NOT NULL DEFAULT now() +); +CREATE INDEX IF NOT EXISTS incidents_tenant_state_created_idx ON incidents(tenant_id, state, created_at DESC); + +CREATE TABLE IF NOT EXISTS incident_events ( + id text PRIMARY KEY, + tenant_id text NOT NULL REFERENCES tenants(id), + incident_id text NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, + event_id text NOT NULL REFERENCES events(id), + added_by text NOT NULL DEFAULT '', + reason text NOT NULL DEFAULT '', + created_at timestamptz NOT NULL DEFAULT now(), + UNIQUE (tenant_id, incident_id, event_id) +); +CREATE INDEX IF NOT EXISTS incident_events_tenant_incident_idx ON incident_events(tenant_id, incident_id, created_at ASC); +CREATE INDEX IF NOT EXISTS incident_events_tenant_event_idx ON incident_events(tenant_id, event_id, created_at DESC); + +CREATE TABLE IF NOT EXISTS incident_report_snapshots ( + id text PRIMARY KEY, + tenant_id text NOT NULL REFERENCES tenants(id), + incident_id text NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, + schema_version text NOT NULL, + report_json jsonb NOT NULL, + report_markdown text NOT NULL, + generated_by text NOT NULL DEFAULT '', + generated_at timestamptz NOT NULL DEFAULT now() +); +CREATE INDEX IF NOT EXISTS incident_report_snapshots_latest_idx ON incident_report_snapshots(tenant_id, incident_id, generated_at DESC); + +CREATE TABLE IF NOT EXISTS incident_evidence_exports ( + id text PRIMARY KEY, + tenant_id text NOT NULL REFERENCES tenants(id), + incident_id text NOT NULL REFERENCES incidents(id) ON DELETE CASCADE, + export_id text NOT NULL REFERENCES evidence_exports(id), + created_by text NOT NULL DEFAULT '', + created_at timestamptz NOT NULL DEFAULT now() +); +CREATE INDEX IF NOT EXISTS incident_evidence_exports_tenant_incident_idx ON incident_evidence_exports(tenant_id, incident_id, created_at DESC); diff --git a/migrations/027_replay_reason_codes.down.sql b/migrations/027_replay_reason_codes.down.sql new file mode 100644 index 0000000..63bf220 --- /dev/null +++ b/migrations/027_replay_reason_codes.down.sql @@ -0,0 +1,2 @@ +ALTER TABLE replay_jobs DROP COLUMN IF EXISTS reason_code; + diff --git a/migrations/027_replay_reason_codes.up.sql b/migrations/027_replay_reason_codes.up.sql new file mode 100644 index 0000000..c124718 --- /dev/null +++ b/migrations/027_replay_reason_codes.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE replay_jobs ADD COLUMN IF NOT EXISTS reason_code text NOT NULL DEFAULT 'operator_requested'; + diff --git a/migrations/028_replay_approval_expiry.down.sql b/migrations/028_replay_approval_expiry.down.sql new file mode 100644 index 0000000..df9e36d --- /dev/null +++ b/migrations/028_replay_approval_expiry.down.sql @@ -0,0 +1,3 @@ +DROP INDEX IF EXISTS replay_jobs_approval_expiry_idx; + +ALTER TABLE replay_jobs DROP COLUMN IF EXISTS approval_expires_at; diff --git a/migrations/028_replay_approval_expiry.up.sql b/migrations/028_replay_approval_expiry.up.sql new file mode 100644 index 0000000..7eb0e42 --- /dev/null +++ b/migrations/028_replay_approval_expiry.up.sql @@ -0,0 +1,10 @@ +ALTER TABLE replay_jobs ADD COLUMN IF NOT EXISTS approval_expires_at timestamptz; + +UPDATE replay_jobs +SET approval_expires_at = created_at + interval '24 hours' +WHERE approval_required = true + AND approval_expires_at IS NULL; + +CREATE INDEX IF NOT EXISTS replay_jobs_approval_expiry_idx + ON replay_jobs(tenant_id, state, approval_expires_at) + WHERE approval_required = true AND state = 'pending_approval'; diff --git a/migrations/029_replay_approval_policies.down.sql b/migrations/029_replay_approval_policies.down.sql new file mode 100644 index 0000000..94b9e9a --- /dev/null +++ b/migrations/029_replay_approval_policies.down.sql @@ -0,0 +1,3 @@ +DROP INDEX IF EXISTS replay_approval_policies_active_idx; + +DROP TABLE IF EXISTS replay_approval_policies; diff --git a/migrations/029_replay_approval_policies.up.sql b/migrations/029_replay_approval_policies.up.sql new file mode 100644 index 0000000..25e9aa6 --- /dev/null +++ b/migrations/029_replay_approval_policies.up.sql @@ -0,0 +1,18 @@ +CREATE TABLE IF NOT EXISTS replay_approval_policies ( + id text PRIMARY KEY, + tenant_id text NOT NULL REFERENCES tenants(id), + scope_type text NOT NULL, + scope_id text NOT NULL DEFAULT '', + require_approval boolean NOT NULL DEFAULT true, + default_expiry_seconds integer NOT NULL DEFAULT 86400, + state text NOT NULL DEFAULT 'active', + reason text NOT NULL DEFAULT '', + created_by text NOT NULL, + created_at timestamptz NOT NULL DEFAULT now(), + updated_at timestamptz NOT NULL DEFAULT now(), + UNIQUE (tenant_id, scope_type, scope_id) +); + +CREATE INDEX IF NOT EXISTS replay_approval_policies_active_idx + ON replay_approval_policies(tenant_id, scope_type, scope_id) + WHERE state = 'active' AND require_approval = true; diff --git a/openapi.yaml b/openapi.yaml index 48696cd..1e5e354 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -5,15 +5,50 @@ info: description: Self-hosted webhook evidence and delivery control plane. servers: - url: http://localhost:8080 +tags: + - name: System + description: Health, readiness, metrics, and the OpenAPI document. + - name: API Keys + description: Management API key lifecycle. + - name: Producer Trust + description: Product-event producer credentials, OAuth, and mTLS identities. + - name: Auth And Identity + description: OIDC sessions, SCIM, role bindings, access policies, and authorization explanation. + - name: Sources And Providers + description: Webhook sources, provider connections, adapters, and provider ingress. + - name: Endpoints And Routing + description: Outbound endpoints, subscriptions, routes, and retry policies. + - name: Schemas And Transformations + description: Event schemas, compatibility checks, and deterministic transformations. + - name: Events And Ingestion + description: Product events, provider ingestion, event evidence, raw payloads, and timelines. + - name: Incidents + description: Webhook incident packets, report snapshots, and incident evidence exports. + - name: Delivery And Replay + description: Deliveries, attempts, replay jobs, dead letter, and quarantine. + - name: Reconciliation + description: Provider reconciliation jobs and gap evidence. + - name: Audit And Retention + description: Audit events, audit chain, evidence exports, and retention policies. + - name: Operations + description: Operational metrics, storage/config views, workers, queues, and alerts. + - name: Signal Egress + description: Notification channels, notification deliveries, SIEM sinks, and SIEM deliveries. paths: /healthz: get: + tags: + - System + operationId: getHealthz summary: Liveness check responses: "200": description: Process is alive. /readyz: get: + tags: + - System + operationId: getReadyz summary: Readiness check responses: "200": @@ -22,12 +57,18 @@ paths: description: A dependency is unavailable. /openapi.yaml: get: + tags: + - System + operationId: getOpenapiYaml summary: OpenAPI document responses: "200": description: YAML OpenAPI document. /metrics: get: + tags: + - System + operationId: getMetrics summary: Prometheus metrics responses: "200": @@ -38,6 +79,9 @@ paths: type: string /v1/api-keys: get: + tags: + - API Keys + operationId: getApiKeys summary: List API keys security: - bearerAuth: [] @@ -51,6 +95,9 @@ paths: schema: $ref: "#/components/schemas/APIKeyPage" post: + tags: + - API Keys + operationId: postApiKeys summary: Create API key security: - bearerAuth: [] @@ -71,6 +118,9 @@ paths: $ref: "#/components/responses/Problem" /v1/api-keys/{api_key_id}:revoke: post: + tags: + - API Keys + operationId: postApiKeysApiKeyIdRevoke summary: Revoke API key security: - bearerAuth: [] @@ -95,6 +145,9 @@ paths: $ref: "#/components/schemas/APIKey" /v1/oauth/token: post: + tags: + - Producer Trust + operationId: postOauthToken summary: Issue producer OAuth access token description: Issues an opaque short-lived bearer token for product-event producers using the client credentials grant. Only HTTP Basic client authentication is accepted; client secrets in the form body are rejected. security: @@ -105,6 +158,9 @@ paths: application/x-www-form-urlencoded: schema: $ref: "#/components/schemas/OAuthClientCredentialsRequest" + example: + grant_type: client_credentials + scope: events:write responses: "200": description: Non-cacheable producer access token response. @@ -121,12 +177,20 @@ paths: application/json: schema: $ref: "#/components/schemas/ProducerTokenResponse" + example: + access_token: "" + token_type: Bearer + expires_in: 900 + scope: events:write "400": $ref: "#/components/responses/Problem" "401": $ref: "#/components/responses/Problem" /v1/producer-clients: get: + tags: + - Producer Trust + operationId: getProducerClients summary: List producer OAuth clients description: Lists tenant-scoped producer client metadata. Client secret hashes and token values are never returned. security: @@ -141,6 +205,9 @@ paths: schema: $ref: "#/components/schemas/ProducerClientPage" post: + tags: + - Producer Trust + operationId: postProducerClients summary: Create producer OAuth client description: Creates a tenant/source-bound product-event producer credential. The generated client secret is returned once only. security: @@ -162,6 +229,9 @@ paths: $ref: "#/components/responses/Problem" /v1/producer-clients/{client_id}: get: + tags: + - Producer Trust + operationId: getProducerClientsClientId summary: Get producer OAuth client security: - bearerAuth: [] @@ -179,6 +249,9 @@ paths: schema: $ref: "#/components/schemas/ProducerClient" patch: + tags: + - Producer Trust + operationId: patchProducerClientsClientId summary: Update producer OAuth client security: - bearerAuth: [] @@ -202,6 +275,9 @@ paths: schema: $ref: "#/components/schemas/ProducerClient" delete: + tags: + - Producer Trust + operationId: deleteProducerClientsClientId summary: Disable producer OAuth client security: - bearerAuth: [] @@ -226,6 +302,9 @@ paths: $ref: "#/components/schemas/ProducerClient" /v1/producer-clients/{client_id}/secrets:rotate: post: + tags: + - Producer Trust + operationId: postProducerClientsClientIdSecretsRotate summary: Rotate producer client secret description: Revokes active producer client secrets and returns the new client secret once only. security: @@ -251,6 +330,9 @@ paths: $ref: "#/components/schemas/ProducerClientSecretRotated" /v1/producer-mtls-identities: get: + tags: + - Producer Trust + operationId: getProducerMtlsIdentities summary: List producer mTLS identities description: Lists tenant-scoped producer client-certificate identities. Private keys and raw certificate private material are never stored or returned. security: @@ -265,6 +347,9 @@ paths: schema: $ref: "#/components/schemas/ProducerMTLSIdentityPage" post: + tags: + - Producer Trust + operationId: postProducerMtlsIdentities summary: Create producer mTLS identity security: - bearerAuth: [] @@ -283,6 +368,9 @@ paths: $ref: "#/components/schemas/ProducerMTLSIdentity" /v1/producer-mtls-identities/{identity_id}: get: + tags: + - Producer Trust + operationId: getProducerMtlsIdentitiesIdentityId summary: Get producer mTLS identity security: - bearerAuth: [] @@ -300,6 +388,9 @@ paths: schema: $ref: "#/components/schemas/ProducerMTLSIdentity" patch: + tags: + - Producer Trust + operationId: patchProducerMtlsIdentitiesIdentityId summary: Update producer mTLS identity security: - bearerAuth: [] @@ -323,6 +414,9 @@ paths: schema: $ref: "#/components/schemas/ProducerMTLSIdentity" delete: + tags: + - Producer Trust + operationId: deleteProducerMtlsIdentitiesIdentityId summary: Disable producer mTLS identity security: - bearerAuth: [] @@ -347,6 +441,9 @@ paths: $ref: "#/components/schemas/ProducerMTLSIdentity" /v1/producer-mtls-identities/{identity_id}:verify: post: + tags: + - Producer Trust + operationId: postProducerMtlsIdentitiesIdentityIdVerify summary: Verify producer mTLS certificate against identity security: - bearerAuth: [] @@ -371,6 +468,9 @@ paths: $ref: "#/components/schemas/ProducerMTLSIdentityVerification" /v1/auth/oidc/login: get: + tags: + - Auth And Identity + operationId: getAuthOidcLogin summary: Start OIDC management login description: Starts Authorization Code + PKCE login for a tenant identity provider and redirects to the provider authorization endpoint. parameters: @@ -394,6 +494,9 @@ paths: description: Redirect to identity provider. /v1/auth/oidc/callback: get: + tags: + - Auth And Identity + operationId: getAuthOidcCallback summary: Complete OIDC management login description: Validates state, nonce, issuer, audience, expiry, and signed ID token before creating a hashed management session cookie. parameters: @@ -416,6 +519,9 @@ paths: $ref: "#/components/schemas/AuthSessionCreated" /v1/auth/logout: post: + tags: + - Auth And Identity + operationId: postAuthLogout summary: Revoke current management session security: - bearerAuth: [] @@ -424,6 +530,9 @@ paths: description: Session revoked. /v1/auth/session: get: + tags: + - Auth And Identity + operationId: getAuthSession summary: Get current management session security: - bearerAuth: [] @@ -434,8 +543,20 @@ paths: application/json: schema: $ref: "#/components/schemas/AuthSession" + example: + id: ses_example + tenant_id: ten_example + user_id: usr_example + external_identity_id: oidc_example + state: active + created_at: "2026-05-26T12:00:00Z" + last_seen_at: "2026-05-26T12:05:00Z" + expires_at: "2026-05-26T20:00:00Z" /v1/auth/sessions: get: + tags: + - Auth And Identity + operationId: getAuthSessions summary: List active management sessions security: - bearerAuth: [] @@ -450,6 +571,9 @@ paths: $ref: "#/components/schemas/AuthSessionPage" /v1/auth/sessions/{session_id}:revoke: post: + tags: + - Auth And Identity + operationId: postAuthSessionsSessionIdRevoke summary: Revoke management session security: - bearerAuth: [] @@ -474,6 +598,9 @@ paths: $ref: "#/components/schemas/AuthSession" /v1/identity-providers: get: + tags: + - Auth And Identity + operationId: getIdentityProviders summary: List identity providers description: Lists tenant-scoped OIDC identity provider metadata. Client secrets are never returned. security: @@ -488,6 +615,9 @@ paths: schema: $ref: "#/components/schemas/IdentityProviderPage" post: + tags: + - Auth And Identity + operationId: postIdentityProviders summary: Create OIDC identity provider security: - bearerAuth: [] @@ -506,6 +636,9 @@ paths: $ref: "#/components/schemas/IdentityProvider" /v1/identity-providers/{provider_id}: get: + tags: + - Auth And Identity + operationId: getIdentityProvidersProviderId summary: Get identity provider security: - bearerAuth: [] @@ -523,6 +656,9 @@ paths: schema: $ref: "#/components/schemas/IdentityProvider" patch: + tags: + - Auth And Identity + operationId: patchIdentityProvidersProviderId summary: Update identity provider security: - bearerAuth: [] @@ -546,6 +682,9 @@ paths: schema: $ref: "#/components/schemas/IdentityProvider" delete: + tags: + - Auth And Identity + operationId: deleteIdentityProvidersProviderId summary: Disable identity provider security: - bearerAuth: [] @@ -570,6 +709,9 @@ paths: $ref: "#/components/schemas/IdentityProvider" /v1/identity-providers/{provider_id}:test: post: + tags: + - Auth And Identity + operationId: postIdentityProvidersProviderIdTest summary: Test identity provider configuration security: - bearerAuth: [] @@ -594,6 +736,9 @@ paths: $ref: "#/components/schemas/IdentityProvider" /v1/scim-tokens: get: + tags: + - Auth And Identity + operationId: getScimTokens summary: List SCIM tokens description: Lists SCIM token metadata only. Raw tokens and hashes are never returned. security: @@ -606,6 +751,9 @@ paths: schema: $ref: "#/components/schemas/SCIMTokenPage" post: + tags: + - Auth And Identity + operationId: postScimTokens summary: Create SCIM token description: Returns the SCIM bearer token exactly once. security: @@ -625,6 +773,9 @@ paths: $ref: "#/components/schemas/SCIMTokenCreated" /v1/scim-tokens/{token_id}: delete: + tags: + - Auth And Identity + operationId: deleteScimTokensTokenId summary: Revoke SCIM token security: - bearerAuth: [] @@ -649,6 +800,9 @@ paths: $ref: "#/components/schemas/SCIMToken" /v1/scim/v2/Users: get: + tags: + - Auth And Identity + operationId: getScimV2Users summary: List SCIM users security: - bearerAuth: [] @@ -656,6 +810,9 @@ paths: "200": description: SCIM ListResponse. post: + tags: + - Auth And Identity + operationId: postScimV2Users summary: Provision SCIM user security: - bearerAuth: [] @@ -670,6 +827,9 @@ paths: description: User provisioned. /v1/scim/v2/Users/{user_id}: get: + tags: + - Auth And Identity + operationId: getScimV2UsersUserId summary: Get SCIM user security: - bearerAuth: [] @@ -683,6 +843,9 @@ paths: "200": description: SCIM user. put: + tags: + - Auth And Identity + operationId: putScimV2UsersUserId summary: Replace SCIM user security: - bearerAuth: [] @@ -702,6 +865,9 @@ paths: "200": description: User replaced. patch: + tags: + - Auth And Identity + operationId: patchScimV2UsersUserId summary: Patch SCIM user security: - bearerAuth: [] @@ -721,6 +887,9 @@ paths: "200": description: User patched. delete: + tags: + - Auth And Identity + operationId: deleteScimV2UsersUserId summary: Deactivate SCIM user security: - bearerAuth: [] @@ -735,6 +904,9 @@ paths: description: User deactivated. /v1/scim/v2/Groups: get: + tags: + - Auth And Identity + operationId: getScimV2Groups summary: List SCIM groups security: - bearerAuth: [] @@ -742,6 +914,9 @@ paths: "200": description: SCIM ListResponse. post: + tags: + - Auth And Identity + operationId: postScimV2Groups summary: Provision SCIM group security: - bearerAuth: [] @@ -756,6 +931,9 @@ paths: description: Group provisioned. /v1/scim/v2/Groups/{group_id}: get: + tags: + - Auth And Identity + operationId: getScimV2GroupsGroupId summary: Get SCIM group security: - bearerAuth: [] @@ -769,6 +947,9 @@ paths: "200": description: SCIM group. put: + tags: + - Auth And Identity + operationId: putScimV2GroupsGroupId summary: Replace SCIM group security: - bearerAuth: [] @@ -788,6 +969,9 @@ paths: "200": description: Group replaced. patch: + tags: + - Auth And Identity + operationId: patchScimV2GroupsGroupId summary: Patch SCIM group security: - bearerAuth: [] @@ -807,6 +991,9 @@ paths: "200": description: Group patched. delete: + tags: + - Auth And Identity + operationId: deleteScimV2GroupsGroupId summary: Deactivate SCIM group security: - bearerAuth: [] @@ -821,6 +1008,9 @@ paths: description: Group deactivated. /v1/role-bindings: get: + tags: + - Auth And Identity + operationId: getRoleBindings summary: List resource-aware role bindings security: - bearerAuth: [] @@ -832,6 +1022,9 @@ paths: schema: $ref: "#/components/schemas/RoleBindingPage" post: + tags: + - Auth And Identity + operationId: postRoleBindings summary: Create resource-aware role binding security: - bearerAuth: [] @@ -850,6 +1043,9 @@ paths: $ref: "#/components/schemas/RoleBinding" /v1/role-bindings/{binding_id}: patch: + tags: + - Auth And Identity + operationId: patchRoleBindingsBindingId summary: Update role binding security: - bearerAuth: [] @@ -869,6 +1065,9 @@ paths: "200": description: Role binding updated. delete: + tags: + - Auth And Identity + operationId: deleteRoleBindingsBindingId summary: Disable role binding security: - bearerAuth: [] @@ -889,6 +1088,9 @@ paths: description: Role binding disabled. /v1/access-policies: get: + tags: + - Auth And Identity + operationId: getAccessPolicies summary: List access policy rules security: - bearerAuth: [] @@ -900,6 +1102,9 @@ paths: schema: $ref: "#/components/schemas/AccessPolicyRulePage" post: + tags: + - Auth And Identity + operationId: postAccessPolicies summary: Create access policy rule security: - bearerAuth: [] @@ -918,6 +1123,9 @@ paths: $ref: "#/components/schemas/AccessPolicyRule" /v1/access-policies/{policy_id}: patch: + tags: + - Auth And Identity + operationId: patchAccessPoliciesPolicyId summary: Update access policy rule security: - bearerAuth: [] @@ -937,6 +1145,9 @@ paths: "200": description: Access policy rule updated. delete: + tags: + - Auth And Identity + operationId: deleteAccessPoliciesPolicyId summary: Disable access policy rule security: - bearerAuth: [] @@ -957,6 +1168,9 @@ paths: description: Access policy rule disabled. /v1/authz:explain: post: + tags: + - Auth And Identity + operationId: postAuthzExplain summary: Explain authorization decision description: Returns a redacted allow/deny explanation for security operators. security: @@ -976,6 +1190,9 @@ paths: $ref: "#/components/schemas/AuthzDecision" /v1/sources: get: + tags: + - Sources And Providers + operationId: getSources summary: List inbound sources security: - bearerAuth: [] @@ -987,6 +1204,9 @@ paths: schema: $ref: "#/components/schemas/SourcePage" post: + tags: + - Sources And Providers + operationId: postSources summary: Create inbound source security: - bearerAuth: [] @@ -996,6 +1216,10 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateSourceRequest" + example: + name: stripe-prod + provider: stripe + verification_secret: "" responses: "201": description: Source created. @@ -1005,6 +1229,9 @@ paths: $ref: "#/components/schemas/Source" /v1/sources/{source_id}: get: + tags: + - Sources And Providers + operationId: getSourcesSourceId summary: Get inbound source security: - bearerAuth: [] @@ -1024,6 +1251,9 @@ paths: "404": description: Source not found in this tenant. patch: + tags: + - Sources And Providers + operationId: patchSourcesSourceId summary: Update inbound source metadata or state description: Updates mutable source metadata. Verification secrets are rotated through the dedicated rotate endpoint and are never returned. security: @@ -1048,6 +1278,9 @@ paths: schema: $ref: "#/components/schemas/Source" delete: + tags: + - Sources And Providers + operationId: deleteSourcesSourceId summary: Disable inbound source description: Disables the source without deleting historical evidence. security: @@ -1073,6 +1306,9 @@ paths: $ref: "#/components/schemas/Source" /v1/sources/{source_id}/secrets:rotate: post: + tags: + - Sources And Providers + operationId: postSourcesSourceIdSecretsRotate summary: Rotate source verification secret description: Creates a new active source secret version and keeps the previous version during the requested grace period. Plaintext secrets are never returned. security: @@ -1098,6 +1334,9 @@ paths: $ref: "#/components/schemas/SourceSecretVersion" /v1/provider-connections: get: + tags: + - Sources And Providers + operationId: getProviderConnections summary: List provider API reconciliation connections description: Plaintext provider credentials are never returned. security: @@ -1112,6 +1351,9 @@ paths: schema: $ref: "#/components/schemas/ProviderConnectionPage" post: + tags: + - Sources And Providers + operationId: postProviderConnections summary: Create provider API reconciliation connection security: - bearerAuth: [] @@ -1121,6 +1363,13 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateProviderConnectionRequest" + example: + name: stripe-reconciliation + provider: stripe + credential_type: api_key + credential: "" + config: + source_id: src_stripe responses: "201": description: Provider connection created with encrypted credentials. @@ -1130,6 +1379,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/provider-connections/{connection_id}: get: + tags: + - Sources And Providers + operationId: getProviderConnectionsConnectionId summary: Get provider API reconciliation connection security: - bearerAuth: [] @@ -1148,6 +1400,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/provider-connections/{connection_id}:verify: post: + tags: + - Sources And Providers + operationId: postProviderConnectionsConnectionIdVerify summary: Verify provider API connection credentials security: - bearerAuth: [] @@ -1172,6 +1427,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/provider-connections/{connection_id}:revoke: post: + tags: + - Sources And Providers + operationId: postProviderConnectionsConnectionIdRevoke summary: Revoke provider API connection credentials security: - bearerAuth: [] @@ -1196,6 +1454,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/adapters: get: + tags: + - Sources And Providers + operationId: getAdapters summary: List built-in and tenant custom adapters description: Returns adapter governance metadata only. Plugin packages are not executed by this endpoint. security: @@ -1210,6 +1471,9 @@ paths: schema: $ref: "#/components/schemas/ProviderAdapterPage" post: + tags: + - Sources And Providers + operationId: postAdapters summary: Create tenant custom adapter security: - bearerAuth: [] @@ -1228,6 +1492,9 @@ paths: $ref: "#/components/schemas/ProviderAdapter" /v1/adapters/{adapter_id}: get: + tags: + - Sources And Providers + operationId: getAdaptersAdapterId summary: Get adapter metadata security: - bearerAuth: [] @@ -1246,6 +1513,9 @@ paths: $ref: "#/components/schemas/ProviderAdapter" /v1/adapters/{adapter_id}/versions: get: + tags: + - Sources And Providers + operationId: getAdaptersAdapterIdVersions summary: List adapter versions security: - bearerAuth: [] @@ -1264,6 +1534,9 @@ paths: schema: $ref: "#/components/schemas/AdapterVersionPage" post: + tags: + - Sources And Providers + operationId: postAdaptersAdapterIdVersions summary: Create adapter version description: Declarative versions store JSON definitions. Plugin versions store signed package metadata only and are not executed. security: @@ -1289,6 +1562,9 @@ paths: $ref: "#/components/schemas/AdapterVersion" /v1/adapters/{adapter_id}/versions/{version_id}/test-vectors: post: + tags: + - Sources And Providers + operationId: postAdaptersAdapterIdVersionsVersionIdTestVectors summary: Add adapter version test vector security: - bearerAuth: [] @@ -1318,6 +1594,9 @@ paths: $ref: "#/components/schemas/AdapterTestVector" /v1/adapters/{adapter_id}/versions/{version_id}:transition: post: + tags: + - Sources And Providers + operationId: postAdaptersAdapterIdVersionsVersionIdTransition summary: Transition adapter version through approval workflow description: Supported actions are submit_tests, request_review, approve_staging, activate, deprecate, and retire. security: @@ -1348,6 +1627,9 @@ paths: $ref: "#/components/schemas/AdapterVersion" /v1/endpoints: get: + tags: + - Endpoints And Routing + operationId: getEndpoints summary: List outbound endpoints security: - bearerAuth: [] @@ -1359,6 +1641,9 @@ paths: schema: $ref: "#/components/schemas/EndpointPage" post: + tags: + - Endpoints And Routing + operationId: postEndpoints summary: Create outbound endpoint security: - bearerAuth: [] @@ -1368,6 +1653,9 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateEndpointRequest" + example: + name: billing-receiver + url: https://receiver.example/webhook responses: "201": description: Endpoint created after SSRF validation. @@ -1377,6 +1665,9 @@ paths: $ref: "#/components/schemas/Endpoint" /v1/endpoints/{endpoint_id}: get: + tags: + - Endpoints And Routing + operationId: getEndpointsEndpointId summary: Get outbound endpoint security: - bearerAuth: [] @@ -1396,6 +1687,9 @@ paths: "404": description: Endpoint not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchEndpointsEndpointId summary: Update outbound endpoint metadata, URL, retry policy, or state description: URL updates rerun the SSRF policy before persistence. Signing secrets and mTLS key material are managed through dedicated endpoints. security: @@ -1422,6 +1716,9 @@ paths: "422": description: Endpoint URL blocked by SSRF policy. delete: + tags: + - Endpoints And Routing + operationId: deleteEndpointsEndpointId summary: Disable outbound endpoint description: Disables future delivery claims without deleting historical deliveries, attempts, or evidence. security: @@ -1447,6 +1744,9 @@ paths: $ref: "#/components/schemas/Endpoint" /v1/endpoints:validate-url: post: + tags: + - Endpoints And Routing + operationId: postEndpointsValidateUrl summary: Validate endpoint URL against SSRF policy security: - bearerAuth: [] @@ -1455,6 +1755,9 @@ paths: description: URL validation result. /v1/endpoints/{endpoint_id}:test: post: + tags: + - Endpoints And Routing + operationId: postEndpointsEndpointIdTest summary: Schedule a signed endpoint test delivery security: - bearerAuth: [] @@ -1475,6 +1778,9 @@ paths: description: Endpoint test delivery scheduled. /v1/endpoints/{endpoint_id}/secrets:rotate: post: + tags: + - Endpoints And Routing + operationId: postEndpointsEndpointIdSecretsRotate summary: Rotate endpoint signing secret description: Creates a new active endpoint signing secret version. Outbound deliveries include signing key metadata headers. security: @@ -1500,6 +1806,9 @@ paths: $ref: "#/components/schemas/EndpointSecretVersion" /v1/subscriptions: get: + tags: + - Endpoints And Routing + operationId: getSubscriptions summary: List subscriptions security: - bearerAuth: [] @@ -1511,6 +1820,9 @@ paths: schema: $ref: "#/components/schemas/SubscriptionPage" post: + tags: + - Endpoints And Routing + operationId: postSubscriptions summary: Create subscription security: - bearerAuth: [] @@ -1529,6 +1841,9 @@ paths: $ref: "#/components/schemas/Subscription" /v1/subscriptions/{subscription_id}: get: + tags: + - Endpoints And Routing + operationId: getSubscriptionsSubscriptionId summary: Get subscription security: - bearerAuth: [] @@ -1548,6 +1863,9 @@ paths: "404": description: Subscription not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchSubscriptionsSubscriptionId summary: Update subscription description: Updates fanout configuration and records a new immutable subscription version. security: @@ -1572,6 +1890,9 @@ paths: schema: $ref: "#/components/schemas/Subscription" delete: + tags: + - Endpoints And Routing + operationId: deleteSubscriptionsSubscriptionId summary: Disable subscription description: Disables future fanout without deleting historical deliveries or configuration evidence. security: @@ -1597,6 +1918,9 @@ paths: $ref: "#/components/schemas/Subscription" /v1/transformations: get: + tags: + - Schemas And Transformations + operationId: getTransformations summary: List deterministic transformations security: - bearerAuth: [] @@ -1610,6 +1934,9 @@ paths: schema: $ref: "#/components/schemas/TransformationPage" post: + tags: + - Schemas And Transformations + operationId: postTransformations summary: Create deterministic transformation security: - bearerAuth: [] @@ -1628,6 +1955,9 @@ paths: $ref: "#/components/schemas/Transformation" /v1/transformations/{transformation_id}: get: + tags: + - Schemas And Transformations + operationId: getTransformationsTransformationId summary: Get deterministic transformation security: - bearerAuth: [] @@ -1646,6 +1976,9 @@ paths: $ref: "#/components/schemas/Transformation" /v1/transformations/{transformation_id}/versions: get: + tags: + - Schemas And Transformations + operationId: getTransformationsTransformationIdVersions summary: List immutable transformation versions security: - bearerAuth: [] @@ -1664,6 +1997,9 @@ paths: schema: $ref: "#/components/schemas/TransformationVersionPage" post: + tags: + - Schemas And Transformations + operationId: postTransformationsTransformationIdVersions summary: Create immutable transformation version security: - bearerAuth: [] @@ -1688,6 +2024,9 @@ paths: $ref: "#/components/schemas/TransformationVersion" /v1/transformations/{transformation_id}/versions/{version_id}:activate: post: + tags: + - Schemas And Transformations + operationId: postTransformationsTransformationIdVersionsVersionIdActivate summary: Activate transformation version security: - bearerAuth: [] @@ -1717,6 +2056,9 @@ paths: $ref: "#/components/schemas/TransformationVersion" /v1/retry-policies: get: + tags: + - Endpoints And Routing + operationId: getRetryPolicies summary: List retry policies security: - bearerAuth: [] @@ -1728,6 +2070,9 @@ paths: schema: $ref: "#/components/schemas/RetryPolicyPage" post: + tags: + - Endpoints And Routing + operationId: postRetryPolicies summary: Create retry policy version security: - bearerAuth: [] @@ -1746,6 +2091,9 @@ paths: $ref: "#/components/schemas/RetryPolicy" /v1/retry-policies/{retry_policy_id}: get: + tags: + - Endpoints And Routing + operationId: getRetryPoliciesRetryPolicyId summary: Get retry policy security: - bearerAuth: [] @@ -1765,6 +2113,9 @@ paths: "404": description: Retry policy not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchRetryPoliciesRetryPolicyId summary: Create a new retry policy version from an existing policy description: Leaves the existing policy row intact and returns the new version row. security: @@ -1789,6 +2140,9 @@ paths: schema: $ref: "#/components/schemas/RetryPolicy" delete: + tags: + - Endpoints And Routing + operationId: deleteRetryPoliciesRetryPolicyId summary: Disable retry policy description: Disables future use of the referenced retry policy row without deleting historical delivery evidence. security: @@ -1814,6 +2168,9 @@ paths: $ref: "#/components/schemas/RetryPolicy" /v1/routes: get: + tags: + - Endpoints And Routing + operationId: getRoutes summary: List routes security: - bearerAuth: [] @@ -1825,6 +2182,9 @@ paths: schema: $ref: "#/components/schemas/RoutePage" post: + tags: + - Endpoints And Routing + operationId: postRoutes summary: Create route security: - bearerAuth: [] @@ -1834,6 +2194,15 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateRouteRequest" + example: + source_id: src_stripe + name: invoice-events + endpoint_id: end_billing + event_types: + - invoice.paid + - invoice.updated + priority: 100 + state: active responses: "201": description: Route created. @@ -1843,6 +2212,9 @@ paths: $ref: "#/components/schemas/Route" /v1/routes/{route_id}: get: + tags: + - Endpoints And Routing + operationId: getRoutesRouteId summary: Get route security: - bearerAuth: [] @@ -1862,6 +2234,9 @@ paths: "404": description: Route not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchRoutesRouteId summary: Update route description: Updates route matching/destination metadata and records a new immutable route version. security: @@ -1886,6 +2261,9 @@ paths: schema: $ref: "#/components/schemas/Route" delete: + tags: + - Endpoints And Routing + operationId: deleteRoutesRouteId summary: Inactivate route description: Moves the route to inactive without deleting historical decisions, deliveries, or version evidence. security: @@ -1911,6 +2289,9 @@ paths: $ref: "#/components/schemas/Route" /v1/routes/{route_id}/versions: get: + tags: + - Endpoints And Routing + operationId: getRoutesRouteIdVersions summary: List immutable route versions security: - bearerAuth: [] @@ -1930,6 +2311,9 @@ paths: $ref: "#/components/schemas/RouteVersionPage" /v1/routes/{route_id}:activate: post: + tags: + - Endpoints And Routing + operationId: postRoutesRouteIdActivate summary: Activate route security: - bearerAuth: [] @@ -1944,6 +2328,9 @@ paths: description: Route activated. /v1/routes/{route_id}:dry-run: post: + tags: + - Endpoints And Routing + operationId: postRoutesRouteIdDryRun summary: Dry-run route against event security: - bearerAuth: [] @@ -1958,6 +2345,9 @@ paths: description: Route match explanation. /v1/event-types: get: + tags: + - Schemas And Transformations + operationId: getEventTypes summary: List event types security: - bearerAuth: [] @@ -1969,6 +2359,9 @@ paths: schema: $ref: "#/components/schemas/EventTypePage" post: + tags: + - Schemas And Transformations + operationId: postEventTypes summary: Create event type security: - bearerAuth: [] @@ -1987,6 +2380,9 @@ paths: $ref: "#/components/schemas/EventType" /v1/event-types/{event_type}: get: + tags: + - Schemas And Transformations + operationId: getEventTypesEventType summary: Get event type security: - bearerAuth: [] @@ -2006,6 +2402,9 @@ paths: "404": description: Event type not found in this tenant. patch: + tags: + - Schemas And Transformations + operationId: patchEventTypesEventType summary: Update event type metadata or state description: Event type names remain immutable; delete disables the event type. security: @@ -2030,6 +2429,9 @@ paths: schema: $ref: "#/components/schemas/EventType" delete: + tags: + - Schemas And Transformations + operationId: deleteEventTypesEventType summary: Disable event type description: Event type delete is a state transition to disabled; historical schemas and evidence remain. security: @@ -2055,6 +2457,9 @@ paths: $ref: "#/components/schemas/EventType" /v1/event-types/{event_type}/schemas: get: + tags: + - Schemas And Transformations + operationId: getEventTypesEventTypeSchemas summary: List schemas for event type security: - bearerAuth: [] @@ -2072,6 +2477,9 @@ paths: schema: $ref: "#/components/schemas/EventSchemaPage" post: + tags: + - Schemas And Transformations + operationId: postEventTypesEventTypeSchemas summary: Create schema for event type security: - bearerAuth: [] @@ -2096,6 +2504,9 @@ paths: $ref: "#/components/schemas/EventSchema" /v1/event-types/{event_type}/schemas/{schema_version}: get: + tags: + - Schemas And Transformations + operationId: getEventTypesEventTypeSchemasSchemaVersion summary: Get schema for event type security: - bearerAuth: [] @@ -2120,6 +2531,9 @@ paths: "404": description: Schema not found in this tenant. patch: + tags: + - Schemas And Transformations + operationId: patchEventTypesEventTypeSchemasSchemaVersion summary: Update schema lifecycle state description: Schema body and version remain immutable; lifecycle state changes are audited and config-versioned. security: @@ -2149,6 +2563,9 @@ paths: schema: $ref: "#/components/schemas/EventSchema" delete: + tags: + - Schemas And Transformations + operationId: deleteEventTypesEventTypeSchemasSchemaVersion summary: Retire schema description: Schema delete is a state transition to retired; historical evidence remains. security: @@ -2179,6 +2596,9 @@ paths: $ref: "#/components/schemas/EventSchema" /v1/event-types/{event_type}/schemas/{schema_version}:validate: post: + tags: + - Schemas And Transformations + operationId: postEventTypesEventTypeSchemasSchemaVersionValidate summary: Validate payload against schema security: - bearerAuth: [] @@ -2208,6 +2628,9 @@ paths: $ref: "#/components/schemas/SchemaValidationResult" /v1/event-types/{event_type}/schemas/{schema_version}:check-compatibility: post: + tags: + - Schemas And Transformations + operationId: postEventTypesEventTypeSchemasSchemaVersionCheckCompatibility summary: Check schema compatibility description: Performs conservative JSON-object compatibility checks for required fields and property type changes. security: @@ -2238,13 +2661,70 @@ paths: $ref: "#/components/schemas/SchemaCompatibilityResult" /v1/events: get: + tags: + - Events And Ingestion + operationId: getEvents summary: Search events security: - bearerAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + minimum: 1 + maximum: 100 + - name: provider + in: query + schema: + type: string + description: Provider name, such as `stripe`, `github`, or `shopify`. + - name: external_id + in: query + schema: + type: string + description: Provider event ID, such as a Stripe event ID. + - name: delivery_id + in: query + schema: + type: string + description: Webhookery delivery ID linked to the event. + - name: status + in: query + schema: + type: string + enum: [dlq, dead_lettered] + description: Forensic status preset. `dlq` and `dead_lettered` return events with open DLQ evidence. + - name: verification + in: query + schema: + type: string + enum: [valid, invalid] + description: Provider signature verification result. + - name: received_after + in: query + schema: + type: string + format: date-time + description: Lower bound for event receipt time. + - name: route_id + in: query + schema: + type: string + description: Route ID with delivery evidence linked to the event. responses: "200": description: Paginated event list. + content: + application/json: + schema: + $ref: "#/components/schemas/EventPage" + "400": + $ref: "#/components/responses/ValidationProblem" post: + tags: + - Events And Ingestion + operationId: postEvents summary: Ingest product event description: Accepts product events from management API keys, producer OAuth bearer tokens, or verified producer mTLS identities. Source-bound credentials must match the submitted `source_id`. security: @@ -2256,15 +2736,36 @@ paths: application/json: schema: $ref: "#/components/schemas/ProductEventIngestRequest" + example: + source_id: src_internal + id: evt_demo_001 + type: demo.created + data: + ok: true responses: "202": description: Product event accepted after durable capture. + content: + application/json: + example: + Accepted: true + EventID: evt_example + ReceiptID: rcp_example + RawPayloadID: raw_example + TraceID: req_example + VerifyReason: verified + DedupeStatus: unique + "400": + $ref: "#/components/responses/ValidationProblem" "401": - $ref: "#/components/responses/Problem" + $ref: "#/components/responses/UnauthorizedProblem" "403": - $ref: "#/components/responses/Problem" + $ref: "#/components/responses/ForbiddenProblem" /v1/events/{event_id}: get: + tags: + - Events And Ingestion + operationId: getEventsEventId summary: Get event security: - bearerAuth: [] @@ -2277,10 +2778,22 @@ paths: responses: "200": description: Event detail. + content: + application/json: + example: + id: evt_example + source_id: src_internal + event_type: demo.created + verification_status: verified + dedupe_status: unique + created_at: "2026-05-26T12:00:00Z" "404": - description: Event not found or not visible. + $ref: "#/components/responses/NotFoundProblem" /v1/events/{event_id}/raw: get: + tags: + - Events And Ingestion + operationId: getEventsEventIdRaw summary: Get raw payload evidence security: - bearerAuth: [] @@ -2290,15 +2803,38 @@ paths: required: true schema: type: string + - name: reason + in: query + required: true + description: Operator reason for elevated raw payload access. The reason is recorded on the raw_payload.read audit event. + schema: + type: string + minLength: 1 + maxLength: 500 responses: "200": description: Raw body as base64 plus hash and storage metadata. + content: + application/json: + example: + event_id: evt_example + raw_payload_hash: sha256:0f343b0931126a20f133d67c2b018a3b + content_type: application/json + size_bytes: 42 + storage_backend: postgres + storage_status: stored + body_base64: eyJpZCI6ImV2dF9leGFtcGxlIn0= + "400": + $ref: "#/components/responses/ValidationProblem" "403": - description: Actor lacks raw payload permission. + $ref: "#/components/responses/ForbiddenProblem" "410": description: Raw payload body has expired or was deleted by retention; metadata remains on related records. /v1/events/{event_id}/normalized: get: + tags: + - Events And Ingestion + operationId: getEventsEventIdNormalized summary: Get normalized event evidence description: Metadata and hashes require events:read. Including normalized data requires events:raw and writes an audit event. security: @@ -2323,11 +2859,14 @@ paths: schema: $ref: "#/components/schemas/NormalizedEnvelope" "403": - description: Actor lacks events:raw for include_data=true. + $ref: "#/components/responses/ForbiddenProblem" "410": description: Normalized data was deleted by retention; metadata and hashes remain. /v1/events/{event_id}/timeline: get: + tags: + - Events And Ingestion + operationId: getEventsEventIdTimeline summary: Get event timeline security: - bearerAuth: [] @@ -2341,8 +2880,237 @@ paths: responses: "200": description: Timeline entries for event, receipts, deliveries, attempts, and audit records. + content: + application/json: + schema: + $ref: "#/components/schemas/EventTimelinePage" + /v1/incidents: + get: + tags: + - Incidents + operationId: getIncidents + summary: List webhook incidents + security: + - bearerAuth: [] + parameters: + - $ref: "#/components/parameters/Limit" + responses: + "200": + description: Paginated tenant-scoped incidents. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentPage" + post: + tags: + - Incidents + operationId: postIncidents + summary: Create webhook incident + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateIncidentRequest" + example: + title: "Stripe payment webhook failed" + reason: "customer support investigation" + responses: + "201": + description: Incident created. + content: + application/json: + schema: + $ref: "#/components/schemas/Incident" + "403": + $ref: "#/components/responses/ForbiddenProblem" + /v1/incidents/{incident_id}: + get: + tags: + - Incidents + operationId: getIncidentsIncidentId + summary: Get webhook incident + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + responses: + "200": + description: Incident metadata. + content: + application/json: + schema: + $ref: "#/components/schemas/Incident" + "404": + description: Incident not found or not visible. + /v1/incidents/{incident_id}/events: + post: + tags: + - Incidents + operationId: postIncidentsIncidentIdEvents + summary: Attach event to incident + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/AddIncidentEventRequest" + example: + event_id: evt_example + reason: "failed downstream delivery" + responses: + "201": + description: Event attached to incident. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentEvent" + "404": + description: Incident or event not found in the actor tenant. + /v1/incidents/{incident_id}/events/{event_id}: + delete: + tags: + - Incidents + operationId: deleteIncidentsIncidentIdEventsEventId + summary: Remove event from incident + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + - name: event_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/StateChangeRequest" + example: + reason: "event not related to this incident" + responses: + "200": + description: Event removed from incident. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentEvent" + /v1/incidents/{incident_id}/generate-report: + post: + tags: + - Incidents + operationId: postIncidentsIncidentIdGenerateReport + summary: Generate incident report snapshot + description: Generates a tenant-scoped JSON and Markdown report from attached event timelines. Raw payload bodies are omitted by default. + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentReportRequest" + example: + reason: "support handoff" + responses: + "201": + description: Incident report snapshot generated. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentReportSnapshot" + /v1/incidents/{incident_id}/report: + get: + tags: + - Incidents + operationId: getIncidentsIncidentIdReport + summary: Get latest incident report snapshot + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + - name: format + in: query + required: false + schema: + type: string + enum: [json, markdown] + default: json + responses: + "200": + description: Latest incident report snapshot. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentReportSnapshot" + text/markdown: + schema: + type: string + /v1/incidents/{incident_id}/evidence-export: + post: + tags: + - Incidents + operationId: postIncidentsIncidentIdEvidenceExport + summary: Create incident evidence export + description: Creates a tenant-scoped evidence bundle that includes incident_report.json and incident_report.md. Raw payload bodies are excluded. + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateIncidentEvidenceExportRequest" + example: + reason: "customer evidence package" + responses: + "202": + description: Incident evidence export created. + content: + application/json: + schema: + $ref: "#/components/schemas/EvidenceExport" /v1/ingest/{tenant_id}/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestTenantIdSourceId summary: Generic provider webhook ingestion parameters: - name: tenant_id @@ -2355,70 +3123,113 @@ paths: required: true schema: type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + additionalProperties: true + example: + id: evt_provider_123 + type: invoice.paid + data: + object: invoice responses: "200": description: Accepted after durable capture. + content: + application/json: + example: + Accepted: true + EventID: evt_example + ReceiptID: rcp_example + RawPayloadID: raw_example + TraceID: "" + VerifyReason: verified + DedupeStatus: unique "401": - description: Invalid signature. + $ref: "#/components/responses/UnauthorizedProblem" "413": - description: Payload too large. + $ref: "#/components/responses/PayloadTooLargeProblem" "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" "503": - description: Durable storage unavailable before acknowledgement. + $ref: "#/components/responses/StorageUnavailableProblem" /v1/ingest/stripe/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestStripeSourceId summary: Stripe webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/github/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestGithubSourceId summary: GitHub webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/shopify/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestShopifySourceId summary: Shopify webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/slack/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestSlackSourceId summary: Slack webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/cloudevents/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestCloudeventsSourceId summary: CloudEvents webhook ingestion responses: "200": description: Accepted after durable capture for binary or structured CloudEvents envelopes. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/generic-jwt/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestGenericJwtSourceId summary: Generic JWT/JWS webhook ingestion - description: Accepts compact JWT/JWS signatures from `Authorization: Bearer ...` or `Webhook-JWT`; only HS256 is supported and the token must include `exp` plus a `body_sha256` claim for the exact raw body. + description: "Accepts compact JWT/JWS signatures from `Authorization: Bearer ...` or `Webhook-JWT`; only HS256 is supported and the token must include `exp` plus a `body_sha256` claim for the exact raw body." responses: "200": description: Accepted after durable capture. "401": description: Invalid or unsupported JWT signature. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/deliveries: get: + tags: + - Delivery And Replay + operationId: getDeliveries summary: List deliveries security: - bearerAuth: [] @@ -2427,6 +3238,9 @@ paths: description: Paginated delivery list, including retry seed evidence for reproducible scheduling. /v1/deliveries/{delivery_id}/attempts: get: + tags: + - Delivery And Replay + operationId: getDeliveriesDeliveryIdAttempts summary: List delivery attempts security: - bearerAuth: [] @@ -2441,6 +3255,9 @@ paths: description: Paginated delivery attempts, including deterministic retry delay and next retry timestamp when retryable. /v1/deliveries/{delivery_id}:retry: post: + tags: + - Delivery And Replay + operationId: postDeliveriesDeliveryIdRetry summary: Manually retry delivery security: - bearerAuth: [] @@ -2461,6 +3278,9 @@ paths: description: Delivery retry scheduled. /v1/deliveries/{delivery_id}:cancel: post: + tags: + - Delivery And Replay + operationId: postDeliveriesDeliveryIdCancel summary: Cancel scheduled delivery security: - bearerAuth: [] @@ -2481,6 +3301,9 @@ paths: description: Delivery canceled. /v1/delivery-attempts/{attempt_id}: get: + tags: + - Delivery And Replay + operationId: getDeliveryAttemptsAttemptId summary: Get delivery attempt security: - bearerAuth: [] @@ -2495,6 +3318,9 @@ paths: description: Delivery attempt detail, including deterministic retry delay evidence when retryable. /v1/replay-jobs:dry-run: post: + tags: + - Delivery And Replay + operationId: postReplayJobsDryRun summary: Dry-run replay security: - bearerAuth: [] @@ -2507,8 +3333,29 @@ paths: responses: "200": description: Replay dry-run result. + /v1/replay-jobs/preview: + post: + tags: + - Delivery And Replay + operationId: postReplayJobsPreview + summary: Preview replay + description: Additive alias for replay dry-run. It validates the same request and does not create replay jobs or delivery work. + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayRequest" + responses: + "200": + description: Replay preview result. /v1/replay-jobs: get: + tags: + - Delivery And Replay + operationId: getReplayJobs summary: List replay jobs security: - bearerAuth: [] @@ -2516,6 +3363,9 @@ paths: "200": description: Paginated replay job list. post: + tags: + - Delivery And Replay + operationId: postReplayJobs summary: Create replay job security: - bearerAuth: [] @@ -2525,13 +3375,40 @@ paths: application/json: schema: $ref: "#/components/schemas/ReplayRequest" + example: + event_id: evt_example + endpoint_id: end_example + reason_code: receiver_fixed + reason: "customer requested replay after downstream fix" + config_mode: current + rate_limit_per_minute: 60 + require_approval: true + approval_expires_at: "2026-06-05T12:00:00Z" responses: "202": description: Replay job scheduled or pending approval when require_approval is true. + content: + application/json: + example: + id: rpl_example + state: pending_approval + scope_hash: sha256:2c26b46b68ffc68ff99b453c1d304134 + reason_code: receiver_fixed + reason: "customer requested replay after downstream fix" + config_mode: current + rate_limit_per_minute: 60 + total_items: 1 + processed_items: 0 + failed_items: 0 + approval_required: true + approval_expires_at: "2026-06-05T12:00:00Z" /v1/replay-jobs/{replay_job_id}:approve: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdApprove summary: Approve pending replay job - description: Moves a tenant-scoped pending replay job into scheduled state and enqueues durable replay work. + description: Moves a tenant-scoped pending replay job into scheduled state and enqueues durable replay work. The approving actor must be different from the creator and the approval window must not be expired. security: - bearerAuth: [] parameters: @@ -2551,6 +3428,9 @@ paths: description: Replay job approved and scheduled. /v1/replay-jobs/{replay_job_id}:pause: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdPause summary: Pause replay job security: - bearerAuth: [] @@ -2571,6 +3451,9 @@ paths: description: Replay job paused. /v1/replay-jobs/{replay_job_id}:resume: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdResume summary: Resume replay job security: - bearerAuth: [] @@ -2591,6 +3474,9 @@ paths: description: Replay job scheduled again. /v1/replay-jobs/{replay_job_id}:cancel: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdCancel summary: Cancel replay job security: - bearerAuth: [] @@ -2609,8 +3495,81 @@ paths: responses: "200": description: Replay job canceled. + /v1/replay-approval-policies: + get: + tags: + - Delivery And Replay + operationId: getReplayApprovalPolicies + summary: List replay approval policies + description: Lists tenant-scoped policies that automatically require replay approval for tenant, source, or route scopes. + security: + - bearerAuth: [] + responses: + "200": + description: Paginated replay approval policy list. + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayApprovalPolicyPage" + post: + tags: + - Delivery And Replay + operationId: postReplayApprovalPolicies + summary: Create or reactivate replay approval policy + description: Creates or reactivates an active policy that makes matching replay jobs pending approval before delivery work is enqueued. + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateReplayApprovalPolicyRequest" + example: + scope_type: source + scope_id: src_stripe + default_expiry_seconds: 86400 + reason: "payments source requires maker-checker replay" + responses: + "201": + description: Replay approval policy created or reactivated. + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayApprovalPolicy" + /v1/replay-approval-policies/{policy_id}: + delete: + tags: + - Delivery And Replay + operationId: deleteReplayApprovalPoliciesPolicyId + summary: Disable replay approval policy + description: Disables a tenant-scoped replay approval policy. Historical pending jobs keep their approval requirement. + security: + - bearerAuth: [] + parameters: + - name: policy_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ReasonRequest" + responses: + "200": + description: Replay approval policy disabled. + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayApprovalPolicy" /v1/reconciliation-jobs:dry-run: post: + tags: + - Reconciliation + operationId: postReconciliationJobsDryRun summary: Dry-run provider reconciliation description: Reports expected matched, missing, recoverable, redelivery, or unrecoverable outcomes without capturing recovered events or requesting redelivery. security: @@ -2630,6 +3589,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/reconciliation-jobs: get: + tags: + - Reconciliation + operationId: getReconciliationJobs summary: List provider reconciliation jobs security: - bearerAuth: [] @@ -2643,6 +3605,9 @@ paths: schema: $ref: "#/components/schemas/ReconciliationJobPage" post: + tags: + - Reconciliation + operationId: postReconciliationJobs summary: Create provider reconciliation job description: Recovered provider API events are not marked as signed webhooks and route only when `route_recovered=true`. security: @@ -2676,6 +3641,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/reconciliation-jobs/{job_id}: get: + tags: + - Reconciliation + operationId: getReconciliationJobsJobId summary: Get provider reconciliation job security: - bearerAuth: [] @@ -2694,6 +3662,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/reconciliation-jobs/{job_id}/items: get: + tags: + - Reconciliation + operationId: getReconciliationJobsJobIdItems summary: List provider reconciliation gap items security: - bearerAuth: [] @@ -2713,6 +3684,9 @@ paths: $ref: "#/components/schemas/ReconciliationItemPage" /v1/reconciliation-jobs/{job_id}:cancel: post: + tags: + - Reconciliation + operationId: postReconciliationJobsJobIdCancel summary: Cancel provider reconciliation job security: - bearerAuth: [] @@ -2737,6 +3711,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/dead-letter: get: + tags: + - Delivery And Replay + operationId: getDeadLetter summary: List dead-letter entries security: - bearerAuth: [] @@ -2745,6 +3722,9 @@ paths: description: Paginated dead-letter list. /v1/dead-letter/{entry_id}:release: post: + tags: + - Delivery And Replay + operationId: postDeadLetterEntryIdRelease summary: Release dead-letter entry into replay work security: - bearerAuth: [] @@ -2754,11 +3734,20 @@ paths: required: true schema: type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DeadLetterReleaseRequest" responses: "202": description: Replay job scheduled for dead-letter entry. /v1/dead-letter:bulk-release: post: + tags: + - Delivery And Replay + operationId: postDeadLetterBulkRelease summary: Bulk release dead-letter entries security: - bearerAuth: [] @@ -2773,6 +3762,9 @@ paths: description: Replay jobs scheduled for matching dead-letter entries. /v1/quarantine: get: + tags: + - Delivery And Replay + operationId: getQuarantine summary: List quarantine entries security: - bearerAuth: [] @@ -2781,6 +3773,9 @@ paths: description: Paginated quarantine list. /v1/quarantine/{entry_id}:approve: post: + tags: + - Delivery And Replay + operationId: postQuarantineEntryIdApprove summary: Approve quarantine entry security: - bearerAuth: [] @@ -2795,6 +3790,9 @@ paths: description: Quarantine entry approved. /v1/quarantine/{entry_id}:reject: post: + tags: + - Delivery And Replay + operationId: postQuarantineEntryIdReject summary: Reject quarantine entry security: - bearerAuth: [] @@ -2809,6 +3807,9 @@ paths: description: Quarantine entry rejected. /v1/audit-events: get: + tags: + - Audit And Retention + operationId: getAuditEvents summary: List audit events security: - bearerAuth: [] @@ -2817,6 +3818,9 @@ paths: description: Paginated audit event list. /v1/audit-chain/head: get: + tags: + - Audit And Retention + operationId: getAuditChainHead summary: Get audit chain head security: - bearerAuth: [] @@ -2829,6 +3833,9 @@ paths: $ref: "#/components/schemas/AuditChainHead" /v1/audit-chain:verify: post: + tags: + - Audit And Retention + operationId: postAuditChainVerify summary: Verify audit chain continuity security: - bearerAuth: [] @@ -2847,6 +3854,9 @@ paths: $ref: "#/components/schemas/AuditChainVerification" /v1/audit-chain:anchor: post: + tags: + - Audit And Retention + operationId: postAuditChainAnchor summary: Anchor a verified audit chain range security: - bearerAuth: [] @@ -2867,6 +3877,9 @@ paths: description: Actor lacks security:write. /v1/audit-chain/anchors: get: + tags: + - Audit And Retention + operationId: getAuditChainAnchors summary: List audit chain anchors security: - bearerAuth: [] @@ -2881,6 +3894,9 @@ paths: $ref: "#/components/schemas/AuditChainAnchorPage" /v1/audit-chain/anchors/{anchor_id}: get: + tags: + - Audit And Retention + operationId: getAuditChainAnchorsAnchorId summary: Get audit chain anchor security: - bearerAuth: [] @@ -2901,6 +3917,9 @@ paths: description: Anchor not found. /v1/audit-events:export: post: + tags: + - Audit And Retention + operationId: postAuditEventsExport summary: Create audit evidence export description: Creates a tenant-scoped tar.gz bundle containing manifest.json, audit_events.jsonl, and optional timeline or raw payload JSONL files. Raw payload bodies require events:raw in addition to audit:read. security: @@ -2911,6 +3930,13 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateAuditExportRequest" + example: + from: "2026-05-26T00:00:00Z" + to: "2026-05-26T23:59:59Z" + include_raw_payloads: true + include_payload_bodies: false + include_timelines: true + reason: "support evidence package" responses: "202": description: Export created and ready for download. @@ -2918,10 +3944,27 @@ paths: application/json: schema: $ref: "#/components/schemas/EvidenceExport" + example: + id: exp_example + tenant_id: ten_example + state: ready + include_raw_payloads: true + include_timelines: true + include_payload_bodies: false + format: tar+gzip+jsonl + storage_backend: postgres + sha256: sha256:2c26b46b68ffc68ff99b453c1d304134 + manifest_sha256: sha256:fcde2b2edba56bf408601fb721fe9b5c + size_bytes: 4096 + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" "403": description: Actor lacks audit:read or requested raw payload bodies without events:raw. /v1/audit-exports: get: + tags: + - Audit And Retention + operationId: getAuditExports summary: List audit evidence exports security: - bearerAuth: [] @@ -2936,6 +3979,9 @@ paths: $ref: "#/components/schemas/EvidenceExportPage" /v1/audit-exports/{export_id}: get: + tags: + - Audit And Retention + operationId: getAuditExportsExportId summary: Get audit evidence export status security: - bearerAuth: [] @@ -2956,6 +4002,9 @@ paths: description: Export not found or not visible. /v1/audit-exports/{export_id}:download: get: + tags: + - Audit And Retention + operationId: getAuditExportsExportIdDownload summary: Download audit evidence export bundle security: - bearerAuth: [] @@ -2983,6 +4032,9 @@ paths: description: Export bundle is unavailable. /v1/admin/retention-policies: get: + tags: + - Audit And Retention + operationId: getAdminRetentionPolicies summary: List retention policies security: - bearerAuth: [] @@ -2996,6 +4048,9 @@ paths: schema: $ref: "#/components/schemas/RetentionPolicyPage" post: + tags: + - Audit And Retention + operationId: postAdminRetentionPolicies summary: Create or update retention policy security: - bearerAuth: [] @@ -3014,6 +4069,9 @@ paths: $ref: "#/components/schemas/RetentionPolicy" /v1/admin/retention-policies/{policy_id}: patch: + tags: + - Audit And Retention + operationId: patchAdminRetentionPoliciesPolicyId summary: Update retention policy security: - bearerAuth: [] @@ -3038,6 +4096,9 @@ paths: $ref: "#/components/schemas/RetentionPolicy" /v1/endpoint-health: get: + tags: + - Operations + operationId: getEndpointHealth summary: List endpoint health security: - bearerAuth: [] @@ -3050,6 +4111,9 @@ paths: $ref: "#/components/schemas/EndpointHealthPage" /v1/ops/metrics: get: + tags: + - Operations + operationId: getOpsMetrics summary: Get tenant ops metrics security: - bearerAuth: [] @@ -3062,6 +4126,9 @@ paths: $ref: "#/components/schemas/OpsMetrics" /v1/ops/metrics/rollups: get: + tags: + - Operations + operationId: getOpsMetricsRollups summary: List tenant metrics rollups description: Returns tenant-scoped derived operational rollups. Rollups summarize state for dashboards and alerts and are not evidence source-of-truth. security: @@ -3085,6 +4152,9 @@ paths: description: Invalid metric filter. /v1/ops/storage: get: + tags: + - Operations + operationId: getOpsStorage summary: Get tenant storage status description: Returns redacted storage metadata and tenant-scoped counts without object-store credentials or raw payload bodies. security: @@ -3098,6 +4168,9 @@ paths: $ref: "#/components/schemas/OpsStorageStatus" /v1/ops/config: get: + tags: + - Operations + operationId: getOpsConfig summary: Get redacted runtime configuration description: Returns safe runtime configuration metadata only; secret values and connection strings are never included. security: @@ -3111,6 +4184,9 @@ paths: $ref: "#/components/schemas/OpsConfig" /v1/ops/workers: get: + tags: + - Operations + operationId: getOpsWorkers summary: List worker leases description: Lists runtime worker lease metadata only; no tenant payload data is exposed. security: @@ -3126,6 +4202,9 @@ paths: $ref: "#/components/schemas/WorkerStatusPage" /v1/ops/workers/{worker_id}: get: + tags: + - Operations + operationId: getOpsWorkersWorkerId summary: Get worker lease security: - bearerAuth: [] @@ -3146,6 +4225,9 @@ paths: description: Worker lease not found. /v1/ops/queues: get: + tags: + - Operations + operationId: getOpsQueues summary: List tenant queue stats description: Returns tenant-scoped durable outbox and delivery queue counts. security: @@ -3159,6 +4241,9 @@ paths: $ref: "#/components/schemas/QueueStatsPage" /v1/alerts: get: + tags: + - Operations + operationId: getAlerts summary: List alert rules security: - bearerAuth: [] @@ -3172,6 +4257,9 @@ paths: schema: $ref: "#/components/schemas/AlertRulePage" post: + tags: + - Operations + operationId: postAlerts summary: Create alert rule security: - bearerAuth: [] @@ -3181,6 +4269,14 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateAlertRuleRequest" + example: + name: "DLQ backlog" + rule_type: dead_letter_open + metric_name: dead_letter.open + threshold: 0 + comparator: ">" + window_seconds: 300 + state: active responses: "201": description: Alert rule created. @@ -3188,10 +4284,26 @@ paths: application/json: schema: $ref: "#/components/schemas/AlertRule" + example: + id: alr_example + tenant_id: ten_example + name: "DLQ backlog" + rule_type: dead_letter_open + metric_name: dead_letter.open + threshold: 0 + comparator: ">" + window_seconds: 300 + state: active + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" + updated_at: "2026-05-26T12:00:00Z" "403": description: Actor lacks ops:write. /v1/alerts/{alert_id}: get: + tags: + - Operations + operationId: getAlertsAlertId summary: Get alert rule security: - bearerAuth: [] @@ -3209,6 +4321,9 @@ paths: schema: $ref: "#/components/schemas/AlertRule" patch: + tags: + - Operations + operationId: patchAlertsAlertId summary: Update alert rule security: - bearerAuth: [] @@ -3232,6 +4347,9 @@ paths: schema: $ref: "#/components/schemas/AlertRule" delete: + tags: + - Operations + operationId: deleteAlertsAlertId summary: Disable alert rule description: Disables the rule and preserves historical firings. security: @@ -3257,6 +4375,9 @@ paths: $ref: "#/components/schemas/AlertRule" /v1/alert-firings: get: + tags: + - Operations + operationId: getAlertFirings summary: List alert firings security: - bearerAuth: [] @@ -3277,6 +4398,9 @@ paths: $ref: "#/components/schemas/AlertFiringPage" /v1/alert-firings/{firing_id}: get: + tags: + - Operations + operationId: getAlertFiringsFiringId summary: Get alert firing security: - bearerAuth: [] @@ -3295,6 +4419,9 @@ paths: $ref: "#/components/schemas/AlertFiring" /v1/alert-firings/{firing_id}:acknowledge: post: + tags: + - Operations + operationId: postAlertFiringsFiringIdAcknowledge summary: Acknowledge alert firing security: - bearerAuth: [] @@ -3319,6 +4446,9 @@ paths: $ref: "#/components/schemas/AlertFiring" /v1/notification-channels: get: + tags: + - Signal Egress + operationId: getNotificationChannels summary: List notification channels description: Lists tenant-scoped generic HTTPS webhook alert notification channels. Signing secrets are never returned. security: @@ -3333,6 +4463,9 @@ paths: schema: $ref: "#/components/schemas/NotificationChannelPage" post: + tags: + - Signal Egress + operationId: postNotificationChannels summary: Create notification channel security: - bearerAuth: [] @@ -3342,6 +4475,11 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateNotificationChannelRequest" + example: + name: "Ops webhook" + channel_type: webhook + url: "https://alerts.example.com/webhook" + signing_secret: "" responses: "201": description: Notification channel created. @@ -3349,8 +4487,22 @@ paths: application/json: schema: $ref: "#/components/schemas/NotificationChannel" + example: + id: nch_example + tenant_id: ten_example + name: "Ops webhook" + channel_type: webhook + url: "https://alerts.example.com/webhook" + state: active + secret_hint: configured + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" + updated_at: "2026-05-26T12:00:00Z" /v1/notification-channels/{channel_id}: get: + tags: + - Signal Egress + operationId: getNotificationChannelsChannelId summary: Get notification channel security: - bearerAuth: [] @@ -3368,6 +4520,9 @@ paths: schema: $ref: "#/components/schemas/NotificationChannel" patch: + tags: + - Signal Egress + operationId: patchNotificationChannelsChannelId summary: Update notification channel security: - bearerAuth: [] @@ -3391,6 +4546,9 @@ paths: schema: $ref: "#/components/schemas/NotificationChannel" delete: + tags: + - Signal Egress + operationId: deleteNotificationChannelsChannelId summary: Disable notification channel security: - bearerAuth: [] @@ -3415,6 +4573,9 @@ paths: $ref: "#/components/schemas/NotificationChannel" /v1/notification-channels/{channel_id}:test: post: + tags: + - Signal Egress + operationId: postNotificationChannelsChannelIdTest summary: Queue a test notification delivery security: - bearerAuth: [] @@ -3439,6 +4600,9 @@ paths: $ref: "#/components/schemas/NotificationDelivery" /v1/notification-deliveries: get: + tags: + - Signal Egress + operationId: getNotificationDeliveries summary: List notification deliveries security: - bearerAuth: [] @@ -3459,6 +4623,9 @@ paths: $ref: "#/components/schemas/NotificationDeliveryPage" /v1/notification-deliveries/{delivery_id}/attempts: get: + tags: + - Signal Egress + operationId: getNotificationDeliveriesDeliveryIdAttempts summary: List notification delivery attempts security: - bearerAuth: [] @@ -3478,6 +4645,9 @@ paths: $ref: "#/components/schemas/NotificationDeliveryAttemptPage" /v1/notification-deliveries/{delivery_id}:retry: post: + tags: + - Signal Egress + operationId: postNotificationDeliveriesDeliveryIdRetry summary: Retry notification delivery security: - bearerAuth: [] @@ -3502,6 +4672,9 @@ paths: $ref: "#/components/schemas/NotificationDelivery" /v1/siem-sinks: get: + tags: + - Signal Egress + operationId: getSiemSinks summary: List SIEM sinks description: Lists tenant-scoped generic HTTPS audit-chain stream sinks. Signing secrets are never returned. security: @@ -3516,6 +4689,9 @@ paths: schema: $ref: "#/components/schemas/SIEMSinkPage" post: + tags: + - Signal Egress + operationId: postSiemSinks summary: Create SIEM sink security: - bearerAuth: [] @@ -3525,6 +4701,11 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateSIEMSinkRequest" + example: + name: "Security log stream" + sink_type: webhook + url: "https://siem.example.com/ingest" + signing_secret: "" responses: "201": description: SIEM sink created. @@ -3532,8 +4713,23 @@ paths: application/json: schema: $ref: "#/components/schemas/SIEMSink" + example: + id: siem_example + tenant_id: ten_example + name: "Security log stream" + sink_type: webhook + url: "https://siem.example.com/ingest" + state: active + secret_hint: configured + cursor_sequence: 0 + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" + updated_at: "2026-05-26T12:00:00Z" /v1/siem-sinks/{sink_id}: get: + tags: + - Signal Egress + operationId: getSiemSinksSinkId summary: Get SIEM sink security: - bearerAuth: [] @@ -3551,6 +4747,9 @@ paths: schema: $ref: "#/components/schemas/SIEMSink" patch: + tags: + - Signal Egress + operationId: patchSiemSinksSinkId summary: Update SIEM sink security: - bearerAuth: [] @@ -3574,6 +4773,9 @@ paths: schema: $ref: "#/components/schemas/SIEMSink" delete: + tags: + - Signal Egress + operationId: deleteSiemSinksSinkId summary: Disable SIEM sink security: - bearerAuth: [] @@ -3598,6 +4800,9 @@ paths: $ref: "#/components/schemas/SIEMSink" /v1/siem-sinks/{sink_id}:test: post: + tags: + - Signal Egress + operationId: postSiemSinksSinkIdTest summary: Queue a test SIEM delivery security: - bearerAuth: [] @@ -3622,6 +4827,9 @@ paths: $ref: "#/components/schemas/SIEMDelivery" /v1/siem-deliveries: get: + tags: + - Signal Egress + operationId: getSiemDeliveries summary: List SIEM deliveries security: - bearerAuth: [] @@ -3642,6 +4850,9 @@ paths: $ref: "#/components/schemas/SIEMDeliveryPage" /v1/siem-deliveries/{delivery_id}/attempts: get: + tags: + - Signal Egress + operationId: getSiemDeliveriesDeliveryIdAttempts summary: List SIEM delivery attempts security: - bearerAuth: [] @@ -3661,6 +4872,9 @@ paths: $ref: "#/components/schemas/SIEMDeliveryAttemptPage" /v1/siem-deliveries/{delivery_id}:retry: post: + tags: + - Signal Egress + operationId: postSiemDeliveriesDeliveryIdRetry summary: Retry SIEM delivery security: - bearerAuth: [] @@ -3709,10 +4923,117 @@ components: application/problem+json: schema: $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Forbidden + status: 403 + code: authorization_error + stable_code: WEBHOOKERY_TENANT_ACCESS_DENIED + request_id: req_example + retryable: false + ValidationProblem: + description: The request is malformed or violates validation rules. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Bad request + status: 400 + code: validation_error + stable_code: WEBHOOKERY_VALIDATION_FAILED + detail: Invalid JSON body. + request_id: req_example + retryable: false + UnauthorizedProblem: + description: Authentication is missing, invalid, or expired. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Unauthorized + status: 401 + code: authentication_error + stable_code: WEBHOOKERY_AUTHENTICATION_REQUIRED + request_id: req_example + retryable: false + ForbiddenProblem: + description: The authenticated actor lacks the required role, scope, tenant membership, or raw-payload permission. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Forbidden + status: 403 + code: authorization_error + stable_code: WEBHOOKERY_TENANT_ACCESS_DENIED + request_id: req_example + retryable: false + NotFoundProblem: + description: The resource does not exist or is not visible to the actor. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Not found + status: 404 + code: not_found + stable_code: WEBHOOKERY_RESOURCE_NOT_FOUND + request_id: req_example + retryable: false + PayloadTooLargeProblem: + description: The request body exceeds the configured capture limit. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Payload too large + status: 413 + code: payload_too_large + stable_code: WEBHOOKERY_PAYLOAD_TOO_LARGE + request_id: req_example + retryable: false + HeadersTooLargeProblem: + description: Header count or header bytes exceed the configured ingress limits. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Headers too large + status: 431 + code: headers_too_large + stable_code: WEBHOOKERY_HEADERS_TOO_LARGE + request_id: req_example + retryable: false + StorageUnavailableProblem: + description: Durable storage was unavailable before acknowledgement. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Internal server error + status: 503 + code: storage_unavailable + stable_code: WEBHOOKERY_DURABLE_CAPTURE_UNAVAILABLE + request_id: req_example + retryable: true schemas: Problem: type: object - required: [type, title, status, code, request_id] + required: [type, title, status, code, stable_code, request_id] properties: type: type: string @@ -3724,6 +5045,12 @@ components: type: string code: type: string + description: Legacy short problem code retained for compatibility. + stable_code: + type: string + description: Namespaced stable code for SDK, CLI, support, and incident handling. + examples: + - WEBHOOKERY_PROVIDER_SIGNATURE_INVALID request_id: type: string retryable: @@ -5049,6 +6376,212 @@ components: nullable: true has_more: type: boolean + CreateIncidentRequest: + type: object + required: [title, reason] + additionalProperties: false + properties: + title: + type: string + minLength: 1 + maxLength: 200 + reason: + type: string + minLength: 1 + maxLength: 500 + AddIncidentEventRequest: + type: object + required: [event_id, reason] + additionalProperties: false + properties: + event_id: + type: string + minLength: 1 + reason: + type: string + minLength: 1 + maxLength: 500 + IncidentReportRequest: + type: object + required: [reason] + additionalProperties: false + properties: + reason: + type: string + minLength: 1 + maxLength: 500 + CreateIncidentEvidenceExportRequest: + type: object + required: [reason] + additionalProperties: false + properties: + reason: + type: string + minLength: 1 + maxLength: 500 + Incident: + type: object + required: [id, tenant_id, title, reason, state, created_by, created_at] + properties: + id: + type: string + tenant_id: + type: string + title: + type: string + reason: + type: string + state: + type: string + enum: [active, disabled] + created_by: + type: string + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + IncidentPage: + type: object + required: [data, has_more] + properties: + data: + type: array + items: + $ref: "#/components/schemas/Incident" + next_cursor: + type: string + nullable: true + has_more: + type: boolean + IncidentEvent: + type: object + required: [id, tenant_id, incident_id, event_id, added_by, reason, created_at] + properties: + id: + type: string + tenant_id: + type: string + incident_id: + type: string + event_id: + type: string + added_by: + type: string + reason: + type: string + created_at: + type: string + format: date-time + IncidentReportSnapshot: + type: object + required: [id, tenant_id, incident_id, schema_version, report, markdown, generated_by, generated_at] + properties: + id: + type: string + tenant_id: + type: string + incident_id: + type: string + schema_version: + type: string + enum: [webhookery.incident_report.v1] + report: + type: object + additionalProperties: true + description: Machine-readable incident report. Raw payload bodies, secrets, and signatures are omitted by default. + markdown: + type: string + description: Human-readable Markdown incident report. + generated_by: + type: string + generated_at: + type: string + format: date-time + EventTimelineEntry: + type: object + required: [schema_version, sequence, kind, ref_id, state, detail, occurred_at] + additionalProperties: false + properties: + schema_version: + type: string + enum: [webhookery.event_timeline.v1] + sequence: + type: integer + minimum: 1 + kind: + type: string + enum: [event, receipt, raw_payload, normalized, delivery, delivery_payload, attempt, reconciliation, replay, audit] + ref_id: + type: string + state: + type: string + detail: + type: string + description: Redacted timeline detail; raw bodies and secrets are not included. + occurred_at: + type: string + format: date-time + EventTimelinePage: + type: object + required: [data, next_cursor, has_more] + properties: + data: + type: array + items: + $ref: "#/components/schemas/EventTimelineEntry" + next_cursor: + type: string + nullable: true + has_more: + type: boolean + Event: + type: object + required: [id, tenant_id, source_id, provider, type, raw_payload_id, raw_payload_hash, signature_verified, verification_reason, deduplication_key, dedupe_status, received_at, trace_id] + properties: + id: + type: string + tenant_id: + type: string + source_id: + type: string + provider: + type: string + type: + type: string + provider_event_id: + type: string + raw_payload_id: + type: string + raw_payload_hash: + type: string + signature_verified: + type: boolean + verification_reason: + type: string + deduplication_key: + type: string + dedupe_status: + type: string + received_at: + type: string + format: date-time + trace_id: + type: string + EventPage: + type: object + required: [data, next_cursor, has_more] + properties: + data: + type: array + items: + $ref: "#/components/schemas/Event" + next_cursor: + type: string + nullable: true + has_more: + type: boolean NormalizedEnvelope: type: object required: [id, tenant_id, event_id, provider, type, source, envelope_sha256, data_sha256, metadata_sha256, storage_status, created_at] @@ -5658,10 +7191,6 @@ components: type: integer minimum: 0 maximum: 60000 - require_approval: - type: boolean - default: false - description: Create the replay job in pending_approval state. Delivery work is not enqueued until the job is approved. state: type: string enum: [active, disabled] @@ -5824,9 +7353,20 @@ components: type: array items: type: string + DeadLetterReleaseRequest: + type: object + required: [reason_code, reason] + additionalProperties: false + properties: + reason_code: + type: string + enum: [receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery] + reason: + type: string + minLength: 1 DeadLetterBulkReleaseRequest: type: object - required: [reason] + required: [reason_code, reason] additionalProperties: false properties: entry_ids: @@ -5834,11 +7374,15 @@ components: description: Empty or omitted means up to 100 currently open entries. items: type: string + reason_code: + type: string + enum: [receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery] reason: type: string minLength: 1 ReplayRequest: type: object + required: [reason_code, reason] additionalProperties: false properties: event_id: @@ -5847,8 +7391,12 @@ components: type: string endpoint_id: type: string + reason_code: + type: string + enum: [receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery] reason: type: string + minLength: 1 dry_run: type: boolean config_mode: @@ -5859,6 +7407,79 @@ components: type: integer minimum: 0 maximum: 60000 + require_approval: + type: boolean + default: false + description: Create the replay job in pending_approval state. Delivery work is not enqueued until the job is approved. + approval_expires_at: + type: string + format: date-time + description: Optional approval expiry for pending replay jobs. When omitted with require_approval=true, the server defaults to 24 hours from creation. The field is rejected unless require_approval is true. + CreateReplayApprovalPolicyRequest: + type: object + required: [scope_type, reason] + additionalProperties: false + properties: + scope_type: + type: string + enum: [tenant, source, route] + scope_id: + type: string + description: Required for source and route policies. Omit for tenant-wide policies. + require_approval: + type: boolean + default: true + description: Active v1 policies require approval. Disable the policy to stop automatic approval gating. + default_expiry_seconds: + type: integer + minimum: 300 + maximum: 604800 + default: 86400 + reason: + type: string + minLength: 1 + maxLength: 500 + ReplayApprovalPolicy: + type: object + required: [id, tenant_id, scope_type, require_approval, default_expiry_seconds, state, created_by, created_at, updated_at] + properties: + id: + type: string + tenant_id: + type: string + scope_type: + type: string + enum: [tenant, source, route] + scope_id: + type: string + require_approval: + type: boolean + default_expiry_seconds: + type: integer + state: + type: string + reason: + type: string + created_by: + type: string + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + ReplayApprovalPolicyPage: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ReplayApprovalPolicy" + next_cursor: + type: string + nullable: true + has_more: + type: boolean CreateAuditExportRequest: type: object additionalProperties: false diff --git a/release/current.json b/release/current.json new file mode 100644 index 0000000..8dafeab --- /dev/null +++ b/release/current.json @@ -0,0 +1,26 @@ +{ + "schema": "webhookery-current-release.v1", + "repository": "aatuh/webhookery", + "current_public_release": { + "tag": "v0.1.0-rc1", + "name": "Webhookery v0.1.0-rc1", + "status": "controlled self-hosted release candidate", + "published_at": "2026-05-27T13:51:41Z", + "release_date": "2026-05-27", + "release_url": "https://github.com/aatuh/webhookery/releases/tag/v0.1.0-rc1", + "source_commit": "51b455378b307914c71fff8b2e0bba81b9c6d435", + "release_workflow_run": "26996295151", + "release_notes_path": "docs/releases/v0.1.0-rc1.md", + "release_evidence_asset": "webhookery-v0.1.0-rc1-release-evidence.zip" + }, + "next_pilot_package": { + "target": "v0.2.0-pilot", + "status": "repo-ready checklist; external pilot evidence still manual", + "checklist_path": "docs/releases/v0.2.0-pilot.md" + }, + "notes": [ + "GitHub Releases remains the external source of truth for published tags and assets.", + "Current release evidence uses local/fake provider and receiver acceptance gates unless separate live-provider proof records say otherwise.", + "Release evidence is not exactly-once delivery proof, provider-side event completeness proof, compliance certification, legal evidentiary certification, external timestamping, or live-provider acceptance." + ] +} diff --git a/scripts/demo_media.sh b/scripts/demo_media.sh new file mode 100755 index 0000000..3f180e9 --- /dev/null +++ b/scripts/demo_media.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +usage() { + cat <<'USAGE' +usage: + scripts/demo_media.sh plan [--output DIR] + scripts/demo_media.sh run [--output DIR] + +plan writes a sanitized recording outline without running Webhookery. run +requires WEBHOOKERY_TEST_DATABASE_URL and regenerates the deterministic local +evidence demo into DIR/output. +USAGE +} + +fail() { + printf '%s\n' "demo-media: $*" >&2 + exit 2 +} + +validate_output_dir() { + dir="$1" + newline=' +' + case "$dir" in + ""|*"$newline"*) fail "output directory is invalid" ;; + -*) fail "output directory must not start with '-'" ;; + esac +} + +write_plan() { + out_dir="$1" + validate_output_dir "$out_dir" + umask 077 + mkdir -p -- "$out_dir" + script_file="$out_dir/demo-script.md" + { + printf '%s\n' "# Webhookery Demo Media Script" + printf '\n' + printf '%s\n' "Use only the deterministic local evidence demo and synthetic fixtures." + printf '%s\n' "Do not record provider dashboards, customer receivers, production" + printf '%s\n' "databases, database URLs, API keys, webhook secrets, raw signatures," + printf '%s\n' "raw payload bodies, private hostnames, or customer data." + printf '\n' + printf '%s\n' "## Recording Flow" + printf '\n' + printf '%s\n' "1. Show the README headline or \`site/index.html\`." + printf '%s\n' "2. Show \`docs/security-promise.md\` durable-capture and non-claim boundaries." + printf '%s\n' "3. Run \`make demo-media\` or show the generated \`tmp/demo-media/output\` files." + printf '%s\n' "4. Open \`incident-report.md\`, \`evidence-manifest.json\`, and \`verify-output.json\`." + printf '%s\n' "5. End on \`docs/commercial-evaluation.md\` or \`docs/pilot-topology.md\` for buyer-facing assets." + printf '\n' + printf '%s\n' "## Required Narration Boundaries" + printf '\n' + printf '%s\n' "- Inbound success means durable capture, not downstream business success." + printf '%s\n' "- Delivery and replay are at-least-once." + printf '%s\n' "- Local deterministic demo output is not live provider certification." + printf '%s\n' "- Evidence bundles are not compliance or legal certification." + } > "$script_file" + cp docs/demo-media-checklist.md "$out_dir/recording-checklist.md" + printf '%s\n' "$script_file" +} + +run_demo() { + out_dir="$1" + validate_output_dir "$out_dir" + if [ -z "${WEBHOOKERY_TEST_DATABASE_URL:-}" ]; then + fail "WEBHOOKERY_TEST_DATABASE_URL is required; start docker compose postgres and export the disposable database URL" + fi + write_plan "$out_dir" >/dev/null + mkdir -p -- "$out_dir/output" + WEBHOOKERY_DEMO_OUTPUT_DIR="$out_dir/output" examples/webhook-evidence-demo/run.sh + printf '%s\n' "demo-media: output written to $out_dir" +} + +cmd="${1:-plan}" +out_dir="tmp/demo-media" +shift || true +while [ "$#" -gt 0 ]; do + case "$1" in + --output) + [ "$#" -ge 2 ] || fail "--output requires a directory" + out_dir="$2" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + fail "unknown argument: $1" + ;; + esac +done + +case "$cmd" in + plan) write_plan "$out_dir" ;; + run) run_demo "$out_dir" ;; + --help|-h) + usage + ;; + *) + fail "unknown command: $cmd" + ;; +esac diff --git a/scripts/failure_drills.sh b/scripts/failure_drills.sh new file mode 100755 index 0000000..8ac7369 --- /dev/null +++ b/scripts/failure_drills.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +usage() { + cat <<'USAGE' +usage: + scripts/failure_drills.sh list + scripts/failure_drills.sh plan [--output DIR] + scripts/failure_drills.sh run local-demo + +The plan command writes a sanitized failure-drill checklist. The local-demo +drill requires WEBHOOKERY_TEST_DATABASE_URL and runs the deterministic local +evidence demo. +USAGE +} + +fail() { + printf '%s\n' "failure-drills: $*" >&2 + exit 2 +} + +validate_output_dir() { + dir="$1" + newline=' +' + case "$dir" in + ""|-"$newline"*|*"$newline"*) fail "output directory is invalid" ;; + -*) fail "output directory must not start with '-'" ;; + esac +} + +list_drills() { + cat <<'DRILLS' +downstream-receiver-fails +downstream-recovers +invalid-signature +replay-after-dlq +postgres-unavailable-before-capture +object-storage-unavailable-s3-mode +audit-chain-verification-failure +retention-raw-payload-tombstone +DRILLS +} + +write_plan() { + out_dir="$1" + validate_output_dir "$out_dir" + umask 077 + mkdir -p -- "$out_dir" + out_file="$out_dir/failure-drills.md" + { + printf '%s\n' "# Webhookery Failure Drills" + printf '\n' + printf '%s\n' "This file is a sanitized local/pilot drill plan. It omits database URLs," + printf '%s\n' "provider credentials, webhook secrets, raw signatures, raw payload bodies," + printf '%s\n' "customer data, and private receiver URLs." + printf '\n' + printf '%s\n' "| Drill | Safe command or setup | Expected result | Evidence |" + printf '%s\n' "|-------|-----------------------|-----------------|----------|" + printf '%s\n' "| downstream-receiver-fails | \`scripts/failure_drills.sh run local-demo\` | Initial downstream delivery fails and is visible before replay. | \`examples/webhook-evidence-demo/output/incident-report.md\` |" + printf '%s\n' "| downstream-recovers | \`scripts/failure_drills.sh run local-demo\` | Replay succeeds after receiver recovery. | \`examples/webhook-evidence-demo/output/verify-output.json\` |" + printf '%s\n' "| invalid-signature | \`scripts/failure_drills.sh run local-demo\` | Invalid signature path is persisted as evidence and not routed. | Local E2E output and incident packet references. |" + printf '%s\n' "| replay-after-dlq | \`scripts/failure_drills.sh run local-demo\` | DLQ release creates replay work with reason evidence. | Incident report replay and DLQ sections. |" + printf '%s\n' "| postgres-unavailable-before-capture | Stop PostgreSQL in a disposable local stack, then send a synthetic event. | Ingress does not return success before durable capture is available. | API error, readiness output, and ops notes. |" + printf '%s\n' "| object-storage-unavailable-s3-mode | In a MinIO-only test stack, block object storage before object-backed raw payload capture. | Object-backed capture is not acknowledged when required object writes fail. | Storage drill notes and redacted API output. |" + printf '%s\n' "| audit-chain-verification-failure | Use a disposable database copy and intentionally alter a copied audit row. | Verification reports failure; original evidence remains untouched. | \`whcp audit verify-chain\` output from the disposable copy. |" + printf '%s\n' "| retention-raw-payload-tombstone | Run the local demo retention check or a disposable retention policy. | Raw body read returns retained/tombstoned state while metadata remains queryable. | Timeline, retention run, and audit entries. |" + printf '\n' + printf '%s\n' "Run destructive or failure-injection drills only against disposable local or" + printf '%s\n' "pilot-approved resources. Record completed pilot results in" + printf '%s\n' "\`docs/pilot-evidence-template.md\`." + } > "$out_file" + printf '%s\n' "$out_file" +} + +run_local_demo() { + if [ -z "${WEBHOOKERY_TEST_DATABASE_URL:-}" ]; then + fail "WEBHOOKERY_TEST_DATABASE_URL is required for local-demo" + fi + examples/webhook-evidence-demo/run.sh + for required in \ + examples/webhook-evidence-demo/output/incident-report.md \ + examples/webhook-evidence-demo/output/incident-report.json \ + examples/webhook-evidence-demo/output/evidence-manifest.json \ + examples/webhook-evidence-demo/output/verify-output.json + do + if [ ! -f "$required" ]; then + fail "local-demo did not produce $required" + fi + done + printf '%s\n' "failure-drills: local-demo completed" +} + +cmd="${1:-}" +case "$cmd" in + list) + if [ "$#" -ne 1 ]; then + usage >&2 + exit 2 + fi + list_drills + ;; + plan) + out_dir="tmp/failure-drills" + shift + while [ "$#" -gt 0 ]; do + case "$1" in + --output) + [ "$#" -ge 2 ] || fail "--output requires a directory" + out_dir="$2" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + fail "unknown plan argument: $1" + ;; + esac + done + write_plan "$out_dir" + ;; + run) + [ "$#" -eq 2 ] || fail "usage: scripts/failure_drills.sh run local-demo" + case "$2" in + local-demo) run_local_demo ;; + *) fail "unknown drill: $2" ;; + esac + ;; + --help|-h|"") + usage + [ -n "$cmd" ] || exit 2 + ;; + *) + fail "unknown command: $cmd" + ;; +esac diff --git a/scripts/integration_evidence.sh b/scripts/integration_evidence.sh new file mode 100755 index 0000000..73484a1 --- /dev/null +++ b/scripts/integration_evidence.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env sh +set -eu + +out_dir="${1:-integration-evidence}" +mkdir -p "$out_dir" + +migration_count="$(find migrations -type f -name '*.up.sql' | wc -l | tr -d ' ')" +live_postgres_outcome="${LIVE_POSTGRES_CHECK_OUTCOME:-${POSTGRES_INTEGRATION_OUTCOME:-unknown}}" +rc_outcome="${RC_CHECK_OUTCOME:-unknown}" +restore_status="${RESTORE_DRILL_STATUS:-skipped_not_configured}" +perf_smoke_outcome="${PERF_SMOKE_OUTCOME:-unknown}" +provider_conformance_outcome="${PROVIDER_CONFORMANCE_OUTCOME:-unknown}" +branch_protection_status="${BRANCH_PROTECTION_STATUS:-not_checked_by_workflow}" +external_review_status="${EXTERNAL_REVIEW_STATUS:-not_completed_or_not_attached}" + +if [ -d tmp/perf-smoke ]; then + mkdir -p "$out_dir/perf-smoke" + cp tmp/perf-smoke/perf-smoke.json "$out_dir/perf-smoke/" 2>/dev/null || true + cp tmp/perf-smoke/perf-smoke.md "$out_dir/perf-smoke/" 2>/dev/null || true +fi + +{ + printf '%s\n' "# Webhookery Integration Evidence" + printf '\n' + printf '%s\n' "- Commit: ${GITHUB_SHA:-local}" + printf '%s\n' "- Workflow: ${GITHUB_WORKFLOW:-local-integration}" + printf '%s\n' "- Run ID: ${GITHUB_RUN_ID:-local}" + printf '\n' + printf '%s\n' "## Checks" + printf '%s\n' "- Postgres migrations discovered: ${migration_count}" + printf '%s\n' "- make live-postgres-check: ${live_postgres_outcome}" + printf '%s\n' "- DB-backed make rc-check: ${rc_outcome}" + printf '%s\n' "- make perf-smoke: ${perf_smoke_outcome}" + printf '%s\n' "- make provider-conformance-check: ${provider_conformance_outcome}" + printf '%s\n' "- DB-backed RC E2E: covered by make rc-check when WEBHOOKERY_TEST_DATABASE_URL is set" + printf '%s\n' "- Restore drill: ${restore_status}" + printf '%s\n' "- Branch protection status: ${branch_protection_status}" + printf '%s\n' "- External review status: ${external_review_status}" + printf '\n' + printf '%s\n' "## Maturity Evidence" + printf '%s\n' "- Performance artifacts: $([ -d "$out_dir/perf-smoke" ] && printf 'attached' || printf 'not_attached')" + printf '%s\n' "- Failure drill coverage: DB-backed make rc-check includes local fake receiver/provider drills only" + printf '%s\n' "- Provider conformance: ${provider_conformance_outcome}" + printf '%s\n' "- Accepted risk status: ${ACCEPTED_RISK_STATUS:-not_attached}" + printf '\n' + printf '%s\n' "## Sanitization" + printf '%s\n' "- Database URLs, credentials, raw payload bodies, webhook signatures, provider tokens, and customer data are intentionally omitted." + printf '%s\n' "- Local CI uses disposable Postgres and fake receivers/providers only; no live third-party provider or cloud calls are required." +} > "$out_dir/integration-evidence.md" diff --git a/scripts/openapi_reference.go b/scripts/openapi_reference.go new file mode 100644 index 0000000..4d333ca --- /dev/null +++ b/scripts/openapi_reference.go @@ -0,0 +1,398 @@ +package main + +import ( + "bytes" + "flag" + "fmt" + "html" + "os" + "path/filepath" + "sort" + "strings" + + "go.yaml.in/yaml/v3" +) + +type operation struct { + Method string + Path string + ID string + Summary string + Tag string + Auth string + Parameters string + Request string + Responses string +} + +func main() { + input := flag.String("input", "openapi.yaml", "OpenAPI source file") + htmlOut := flag.String("html", "docs/openapi/index.html", "rendered HTML output") + matrixOut := flag.String("matrix", "docs/reference/api-contract-matrix.md", "contract matrix markdown output") + summaryOut := flag.String("summary", "docs/reference/openapi.md", "OpenAPI summary markdown output") + flag.Parse() + + doc, err := loadOpenAPI(*input) + if err != nil { + fatal(err) + } + ops := collectOperations(doc) + if len(ops) == 0 { + fatal(fmt.Errorf("no OpenAPI operations found in %s", *input)) + } + + if err := writeFile(*htmlOut, renderHTML(doc, ops)); err != nil { + fatal(err) + } + if err := writeFile(*matrixOut, renderMatrix(ops)); err != nil { + fatal(err) + } + if err := writeFile(*summaryOut, renderSummary(doc, ops)); err != nil { + fatal(err) + } +} + +func loadOpenAPI(path string) (map[string]any, error) { + raw, err := os.ReadFile(path) // #nosec G304 -- repository generator reads an explicit maintainer-provided input path. + if err != nil { + return nil, err + } + var doc map[string]any + if err := yaml.Unmarshal(raw, &doc); err != nil { + return nil, err + } + return doc, nil +} + +func collectOperations(doc map[string]any) []operation { + paths := asMap(doc["paths"]) + pathNames := sortedKeys(paths) + methodOrder := map[string]int{ + "get": 0, "post": 1, "put": 2, "patch": 3, "delete": 4, + "options": 5, "head": 6, "trace": 7, + } + + var ops []operation + for _, pathName := range pathNames { + pathItem := asMap(paths[pathName]) + methods := sortedKeys(pathItem) + sort.SliceStable(methods, func(i, j int) bool { + ai, aok := methodOrder[methods[i]] + bi, bok := methodOrder[methods[j]] + switch { + case aok && bok: + return ai < bi + case aok: + return true + case bok: + return false + default: + return methods[i] < methods[j] + } + }) + for _, method := range methods { + if _, ok := methodOrder[method]; !ok { + continue + } + opMap := asMap(pathItem[method]) + if len(opMap) == 0 { + continue + } + ops = append(ops, operation{ + Method: strings.ToUpper(method), + Path: pathName, + ID: fallback(asString(opMap["operationId"]), "-"), + Summary: fallback(asString(opMap["summary"]), "-"), + Tag: firstString(asSlice(opMap["tags"]), "-"), + Auth: extractAuth(opMap), + Parameters: extractParameters(opMap), + Request: extractRequest(opMap), + Responses: extractResponses(opMap), + }) + } + } + return ops +} + +func renderHTML(doc map[string]any, ops []operation) []byte { + info := asMap(doc["info"]) + title := fallback(asString(info["title"]), "OpenAPI") + description := fallback(asString(info["description"]), "") + version := fallback(asString(info["version"]), "") + + var b bytes.Buffer + b.WriteString("\n\n\n") + b.WriteString(" \n") + b.WriteString(" \n") + fmt.Fprintf(&b, " %s Reference\n", html.EscapeString(title)) + b.WriteString(" \n\n\n") + b.WriteString("
\n") + fmt.Fprintf(&b, "

%s Reference

\n", html.EscapeString(title)) + if description != "" { + fmt.Fprintf(&b, "

%s

\n", html.EscapeString(description)) + } + b.WriteString("
\n") + fmt.Fprintf(&b, " Version %s\n", html.EscapeString(version)) + fmt.Fprintf(&b, " %d operations\n", len(ops)) + b.WriteString(" Generated from openapi.yaml\n") + b.WriteString("
\n") + b.WriteString("
\n
\n") + b.WriteString(" \n") + b.WriteString(" \n") + b.WriteString(" \n") + for _, op := range ops { + b.WriteString(" ") + fmt.Fprintf(&b, "", html.EscapeString(op.Method)) + fmt.Fprintf(&b, "", html.EscapeString(op.Path)) + fmt.Fprintf(&b, "", html.EscapeString(op.ID), html.EscapeString(op.Summary)) + fmt.Fprintf(&b, "", html.EscapeString(op.Tag)) + fmt.Fprintf(&b, "", html.EscapeString(op.Auth)) + fmt.Fprintf(&b, "", html.EscapeString(op.Request)) + fmt.Fprintf(&b, "", html.EscapeString(op.Responses)) + b.WriteString("\n") + } + b.WriteString(" \n
MethodPathOperationTagAuthRequestResponses
%s%s%s
%s
%s%s%s%s
\n
\n\n\n") + return b.Bytes() +} + +func renderMatrix(ops []operation) []byte { + var b bytes.Buffer + b.WriteString("# Webhookery API Contract Matrix\n\n") + b.WriteString("Generated from `openapi.yaml`. Do not edit operation rows manually; run `make openapi-reference-generate`.\n\n") + fmt.Fprintf(&b, "Total operations: `%d`.\n\n", len(ops)) + b.WriteString("| Method | Path | Operation ID | Tag | Auth | Parameters | Request | Responses |\n") + b.WriteString("| --- | --- | --- | --- | --- | --- | --- | --- |\n") + for _, op := range ops { + fmt.Fprintf(&b, "| `%s` | `%s` | `%s` | %s | %s | %s | %s | %s |\n", + md(op.Method), md(op.Path), md(op.ID), md(op.Tag), md(op.Auth), + md(op.Parameters), md(op.Request), md(op.Responses)) + } + return b.Bytes() +} + +func renderSummary(doc map[string]any, ops []operation) []byte { + info := asMap(doc["info"]) + title := fallback(asString(info["title"]), "Webhookery API") + version := fallback(asString(info["version"]), "") + description := fallback(asString(info["description"]), "") + counts := map[string]int{} + for _, op := range ops { + counts[op.Tag]++ + } + tags := sortedKeysAnyCount(counts) + + var b bytes.Buffer + b.WriteString("# OpenAPI Reference\n\n") + fmt.Fprintf(&b, "`openapi.yaml` is the canonical REST API contract for %s", title) + if version != "" { + fmt.Fprintf(&b, " version `%s`", version) + } + b.WriteString(".\n\n") + if description != "" { + fmt.Fprintf(&b, "%s\n\n", description) + } + fmt.Fprintf(&b, "- Rendered HTML reference: [`docs/openapi/index.html`](../openapi/index.html)\n") + fmt.Fprintf(&b, "- API contract matrix: [`docs/reference/api-contract-matrix.md`](api-contract-matrix.md)\n") + fmt.Fprintf(&b, "- Total operations: `%d`\n\n", len(ops)) + b.WriteString("## Operations By Tag\n\n") + b.WriteString("| Tag | Operations |\n| --- | ---: |\n") + for _, tag := range tags { + fmt.Fprintf(&b, "| %s | %d |\n", md(tag), counts[tag]) + } + b.WriteString("\n## Maintenance\n\n") + b.WriteString("When `openapi.yaml` changes, run `make openapi-reference-generate` and commit the regenerated reference artifacts with the contract change. `make openapi-reference-check` verifies that the generated files are current.\n") + return b.Bytes() +} + +func extractAuth(op map[string]any) string { + security, ok := op["security"] + if !ok { + return "none" + } + items := asSlice(security) + if len(items) == 0 { + return "none" + } + var schemes []string + for _, item := range items { + for name := range asMap(item) { + schemes = append(schemes, name) + } + } + sort.Strings(schemes) + if len(schemes) == 0 { + return "none" + } + return strings.Join(unique(schemes), ", ") +} + +func extractParameters(op map[string]any) string { + params := asSlice(op["parameters"]) + if len(params) == 0 { + return "-" + } + var out []string + for _, param := range params { + p := asMap(param) + if ref := asString(p["$ref"]); ref != "" { + out = append(out, "ref:"+lastRef(ref)) + continue + } + name := asString(p["name"]) + in := asString(p["in"]) + if name == "" { + continue + } + if in != "" { + out = append(out, in+":"+name) + } else { + out = append(out, name) + } + } + if len(out) == 0 { + return "-" + } + return strings.Join(out, ", ") +} + +func extractRequest(op map[string]any) string { + requestBody := asMap(op["requestBody"]) + if len(requestBody) == 0 { + return "-" + } + content := asMap(requestBody["content"]) + if len(content) == 0 { + return "present" + } + return strings.Join(sortedKeys(content), ", ") +} + +func extractResponses(op map[string]any) string { + responses := asMap(op["responses"]) + if len(responses) == 0 { + return "-" + } + keys := sortedKeys(responses) + sort.SliceStable(keys, func(i, j int) bool { + return responseRank(keys[i]) < responseRank(keys[j]) + }) + return strings.Join(keys, ", ") +} + +func responseRank(code string) string { + if len(code) == 3 && code[0] >= '0' && code[0] <= '9' { + return code + } + return "999" + code +} + +func asMap(v any) map[string]any { + if m, ok := v.(map[string]any); ok { + return m + } + return map[string]any{} +} + +func asSlice(v any) []any { + if s, ok := v.([]any); ok { + return s + } + return nil +} + +func asString(v any) string { + if s, ok := v.(string); ok { + return s + } + return "" +} + +func firstString(items []any, fallbackValue string) string { + for _, item := range items { + if s := asString(item); s != "" { + return s + } + } + return fallbackValue +} + +func fallback(value, fallbackValue string) string { + if value == "" { + return fallbackValue + } + return value +} + +func sortedKeys(m map[string]any) []string { + keys := make([]string, 0, len(m)) + for key := range m { + keys = append(keys, key) + } + sort.Strings(keys) + return keys +} + +func sortedKeysAnyCount(m map[string]int) []string { + keys := make([]string, 0, len(m)) + for key := range m { + keys = append(keys, key) + } + sort.Strings(keys) + return keys +} + +func unique(values []string) []string { + if len(values) == 0 { + return nil + } + out := values[:0] + var last string + for i, value := range values { + if i == 0 || value != last { + out = append(out, value) + last = value + } + } + return out +} + +func lastRef(ref string) string { + parts := strings.Split(ref, "/") + return parts[len(parts)-1] +} + +func md(value string) string { + value = strings.ReplaceAll(value, "|", "\\|") + value = strings.ReplaceAll(value, "\n", " ") + if value == "" { + return "-" + } + return value +} + +func writeFile(path string, data []byte) error { + if err := os.MkdirAll(filepath.Dir(path), 0o750); err != nil { + return err + } + return os.WriteFile(path, data, 0o644) // #nosec G304,G306 -- repository generator writes explicit public documentation artifact paths. +} + +func fatal(err error) { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +} diff --git a/scripts/perf_smoke.sh b/scripts/perf_smoke.sh new file mode 100755 index 0000000..9cb0e7f --- /dev/null +++ b/scripts/perf_smoke.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +if [ -z "${WEBHOOKERY_TEST_DATABASE_URL:-}" ]; then + printf '%s\n' "perf-smoke: WEBHOOKERY_TEST_DATABASE_URL is required; use a disposable PostgreSQL database" >&2 + exit 2 +fi + +out_dir="${WEBHOOKERY_PERF_OUTPUT_DIR:-tmp/perf-smoke}" +case "$out_dir" in + /*) out_abs="$out_dir" ;; + *) out_abs="$repo_root/$out_dir" ;; +esac +mkdir -p "$out_abs" + +WEBHOOKERY_PERF_OUTPUT_DIR="$out_abs" go test ./internal/e2e -run TestPerfSmoke -count=1 -timeout=2m + +printf '%s\n' "perf-smoke: wrote ${out_dir}/perf-smoke.json" +printf '%s\n' "perf-smoke: wrote ${out_dir}/perf-smoke.md" diff --git a/scripts/provider_conformance_check.sh b/scripts/provider_conformance_check.sh new file mode 100755 index 0000000..f41635f --- /dev/null +++ b/scripts/provider_conformance_check.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +doc="docs/provider-conformance.md" +manifest="docs/provider-conformance.manifest.json" +vector_registry="internal/provider/testdata/signature_vectors.json" + +test -f "$doc" +test -f "$manifest" +test -f "$vector_registry" +test -f docs/provider-proof-manifest.json +grep -q "Provider Conformance Matrix" "$doc" +grep -q "Last official-doc verification: 2026-05-27" "$doc" +grep -q "no provider-side completeness guarantee" "$doc" +grep -q "does not call Stripe" "$doc" +grep -q "docs/live-provider-proof/stripe.md" "$doc" +grep -q "docs/live-provider-proof/github.md" "$doc" +grep -q "docs/live-provider-proof/shopify.md" "$doc" +grep -q "https://docs.stripe.com/webhooks" "$doc" +grep -q "https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries" "$doc" +grep -q "https://shopify.dev/docs/apps/build/webhooks/verify-deliveries" "$doc" +grep -q "https://api.slack.com/docs/verifying-requests-from-slack" "$doc" +grep -q "https://github.com/cloudevents/spec" "$doc" +grep -q "https://www.rfc-editor.org/info/rfc7519/" "$doc" + +python3 - "$manifest" "$vector_registry" <<'PY' +import datetime +import json +import sys + +path = sys.argv[1] +vector_path = sys.argv[2] +with open(path, "r", encoding="utf-8") as fh: + data = json.load(fh) +with open(vector_path, "r", encoding="utf-8") as fh: + vectors = json.load(fh) + +expected = {"stripe", "github", "shopify", "slack", "generic-hmac", "generic-jwt", "cloudevents"} +providers = data.get("providers", []) +names = {item.get("name") for item in providers} +missing = sorted(expected - names) +if missing: + raise SystemExit(f"provider conformance manifest missing providers: {missing}") +if data.get("no_live_provider_calls") is not True: + raise SystemExit("provider conformance must not require live provider calls") +if data.get("signature_vector_registry") != vector_path: + raise SystemExit("provider conformance manifest must point at internal/provider/testdata/signature_vectors.json") + +checked = datetime.date.fromisoformat(data["last_official_doc_verification"]) +today = datetime.date.today() +if checked > today: + raise SystemExit("provider conformance verification date is in the future") +if (today - checked).days > 90: + raise SystemExit("provider conformance verification date is older than 90 days") + +for item in providers: + required = ["name", "official_docs", "signature", "event_id", "event_type", "vector_tests", "limitations"] + for key in required: + if not item.get(key): + raise SystemExit(f"{item.get('name', '')} missing {key}") + if not isinstance(item["official_docs"], list) or not isinstance(item["vector_tests"], list): + raise SystemExit(f"{item['name']} docs and vector_tests must be arrays") + if not item["limitations"]: + raise SystemExit(f"{item['name']} must record limitations") + +if vectors.get("schema_version") != "webhookery.provider_signature_vectors.v1": + raise SystemExit("signature vector registry has an unexpected schema_version") +vector_items = vectors.get("vectors", []) +vector_names = {item.get("provider") for item in vector_items} +missing_vectors = sorted({"stripe", "github", "shopify", "slack"} - vector_names) +if missing_vectors: + raise SystemExit(f"signature vector registry missing vectors: {missing_vectors}") +for item in vector_items: + for key in ["name", "provider", "source", "checked_date", "now", "secret", "raw_body", "mutated_raw_body", "headers", "expected"]: + if not item.get(key): + raise SystemExit(f"signature vector missing {key}: {item.get('name', '')}") + datetime.date.fromisoformat(item["checked_date"]) + if item["expected"].get("verified") is not True or item["expected"].get("reason") != "ok": + raise SystemExit(f"signature vector expected result must be explicit success: {item['name']}") +PY + +grep -q "/v1/ingest/{tenant_id}/{source_id}" openapi.yaml +grep -q "stripe" openapi.yaml +grep -q "github" openapi.yaml +grep -q "shopify" openapi.yaml +grep -q "slack" openapi.yaml + +go test ./internal/provider -run 'TestProviderSignatureVectors|TestNormalizeBuiltInProviderMetadata|TestCloudEventsAdapterDoesNotVerifyUnsigned|TestGenericJWTAdapter|TestDeclarativeAdapter' -count=1 +go test ./pkg/verifier -run 'TestHMACSignatureUsesExactRawBytes|TestTimestampedSignatureWindow' -count=1 + +printf '%s\n' "provider conformance checks passed" diff --git a/scripts/provider_proof_check.sh b/scripts/provider_proof_check.sh new file mode 100755 index 0000000..cbbcd8d --- /dev/null +++ b/scripts/provider_proof_check.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +manifest="docs/provider-proof-manifest.json" + +test -f "$manifest" +test -f docs/live-provider-proof/stripe.md +test -f docs/live-provider-proof/github.md +test -f docs/live-provider-proof/shopify.md +test -f docs/live-provider-proof/run-record-template.md +test -f docs/live-provider-proof/stripe-redaction-policy.md +test -f docs/live-provider-proof/samples/stripe-incident-report.redacted.md +test -f docs/live-provider-proof/samples/github-incident-report.redacted.md +test -f docs/live-provider-proof/samples/shopify-incident-report.redacted.md +test -f docs/providers/stripe.md +test -f docs/providers/github.md +test -f docs/providers/shopify.md + +grep -q "not provider certification" docs/live-provider-proof/stripe.md +grep -q "not provider certification" docs/live-provider-proof/github.md +grep -q "not provider certification" docs/live-provider-proof/shopify.md +grep -q "Do not commit completed run records" docs/live-provider-proof/run-record-template.md +grep -q "not provider certification" docs/live-provider-proof/run-record-template.md +grep -q "Do not commit" docs/live-provider-proof/stripe-redaction-policy.md +grep -q "docs/live-provider-proof/stripe.md" docs/provider-conformance.md +grep -q "docs/live-provider-proof/github.md" docs/provider-conformance.md +grep -q "docs/live-provider-proof/shopify.md" docs/provider-conformance.md +grep -q "docs/live-provider-proof/run-record-template.md" docs/provider-conformance.md + +python3 - "$manifest" <<'PY' +import datetime +import json +import sys +from pathlib import Path +from urllib.parse import urlparse + +root = Path.cwd().resolve() +path = Path(sys.argv[1]) + +with path.open("r", encoding="utf-8") as fh: + data = json.load(fh) + +if data.get("schema_version") != "provider-proof-v1": + raise SystemExit("provider proof manifest schema_version must be provider-proof-v1") +if data.get("project") != "webhookery": + raise SystemExit("provider proof manifest project must be webhookery") +if data.get("no_live_provider_calls") is not True: + raise SystemExit("provider proof check must not require live provider calls") + +max_age_days = data.get("max_age_days") +if not isinstance(max_age_days, int) or max_age_days <= 0: + raise SystemExit("provider proof manifest max_age_days must be a positive integer") + +required_providers = {"stripe", "github", "shopify"} +proofs = data.get("proofs") +if not isinstance(proofs, list): + raise SystemExit("provider proof manifest proofs must be an array") +providers = {item.get("provider") for item in proofs} +missing = sorted(required_providers - providers) +if missing: + raise SystemExit(f"provider proof manifest missing providers: {missing}") + +today = datetime.date.today() +for item in proofs: + provider = item.get("provider", "") + if item.get("status") != "manual_external": + raise SystemExit(f"{provider} provider proof status must be manual_external") + checked = datetime.date.fromisoformat(item["checked_date"]) + expires = datetime.date.fromisoformat(item["expires_after"]) + if checked > today: + raise SystemExit(f"{provider} provider proof checked_date is in the future") + if expires < checked: + raise SystemExit(f"{provider} provider proof expires before checked date") + if (today - checked).days > max_age_days: + raise SystemExit(f"{provider} provider proof metadata is older than {max_age_days} days") + if expires < today: + raise SystemExit(f"{provider} provider proof metadata is expired") + + for key in ("operator_guide", "proof_guide", "redaction_policy", "sample_report"): + raw = item.get(key) + if not raw or Path(raw).is_absolute(): + raise SystemExit(f"{provider} {key} must be a relative path") + resolved = (root / raw).resolve() + if root != resolved and root not in resolved.parents: + raise SystemExit(f"{provider} {key} escapes repository root") + if not resolved.is_file(): + raise SystemExit(f"{provider} {key} does not exist: {raw}") + text = resolved.read_text(encoding="utf-8").lower() + for required in ("not provider certification", "exactly-once", "provider-side"): + if required not in text: + raise SystemExit(f"{provider} {key} missing required non-claim text: {required}") + + sources = item.get("official_sources") + if not isinstance(sources, list) or not sources: + raise SystemExit(f"{provider} official_sources must be a non-empty array") + for source in sources: + parsed = urlparse(source) + if parsed.scheme != "https": + raise SystemExit(f"{provider} official source must be https: {source}") + if provider == "stripe" and parsed.netloc != "docs.stripe.com": + raise SystemExit(f"stripe official source must use docs.stripe.com: {source}") + if provider == "github" and parsed.netloc != "docs.github.com": + raise SystemExit(f"github official source must use docs.github.com: {source}") + if provider == "shopify" and parsed.netloc != "shopify.dev": + raise SystemExit(f"shopify official source must use shopify.dev: {source}") + + scope = item.get("scope_checked") + if not isinstance(scope, list) or len(scope) < 2: + raise SystemExit(f"{provider} scope_checked must list the reviewed behavior") + non_claims = item.get("non_claims") + if not isinstance(non_claims, list) or "not provider certification" not in non_claims: + raise SystemExit(f"{provider} non_claims must include not provider certification") + +for sample_key in ("sample_report",): + for item in proofs: + sample = (root / item[sample_key]).resolve() + text = sample.read_text(encoding="utf-8") + forbidden = [ + "sk_live_", + "sk_test_", + "rk_live_", + "whsec_", + "github_pat_", + "ghp_", + "shpat_", + "shpua_", + "shpss_", + "shppa_", + "Bearer ", + "Stripe-Signature:", + "X-Hub-Signature-256: sha256=", + "X-Shopify-Hmac-SHA256:", + ] + leaked = [marker for marker in forbidden if marker in text] + if leaked: + raise SystemExit(f"{item['provider']} sample contains forbidden secret-shaped markers: {leaked}") +PY + +printf '%s\n' "provider proof checks passed" diff --git a/scripts/rc_acceptance.sh b/scripts/rc_acceptance.sh index 105b0ef..dcb2447 100755 --- a/scripts/rc_acceptance.sh +++ b/scripts/rc_acceptance.sh @@ -45,8 +45,8 @@ say "running targeted production-core tests" go test ./cmd/whcp ./internal/adapters/httpapi ./internal/adapters/postgres ./internal/app ./internal/worker ./internal/provider ./internal/ssrf ./internal/evidence ./pkg/client ./pkg/verifier if [ -n "${WEBHOOKERY_TEST_DATABASE_URL:-}" ]; then - say "running postgres integration checks" - make postgres-integration-test + say "running live postgres quality gate" + make live-postgres-check say "running db-backed rc e2e checks" go test ./internal/e2e -run TestRCE2E -count=1 else diff --git a/scripts/release_acceptance.sh b/scripts/release_acceptance.sh index 1c12525..5a5dd28 100755 --- a/scripts/release_acceptance.sh +++ b/scripts/release_acceptance.sh @@ -4,9 +4,13 @@ set -eu repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" cd "$repo_root" -make fast-check +# Keep release acceptance's baseline gate non-live and deterministic. Workflows +# may provide WEBHOOKERY_TEST_DATABASE_URL for later RC/performance checks; do +# not let the broad package test fan out DB-backed E2E migrations in parallel. +WEBHOOKERY_TEST_DATABASE_URL= WEBHOOKERY_RC_RESTORE_DATABASE_URL= make fast-check test -f LICENSE +test -f CHANGELOG.md grep -q "GNU AFFERO GENERAL PUBLIC LICENSE" LICENSE test -f COMMERCIAL.md test -f SECURITY.md @@ -16,10 +20,59 @@ test -f GOVERNANCE.md test -f TRADEMARKS.md test -f RELEASE_EVIDENCE.md test -f docs/release-evidence-template.md +test -f docs/release-evidence-sample.md +test -f docs/production-rc-checklist.md +test -f docs/releases/v0.1.0-rc1.md +test -f docs/releases/v0.2.0-pilot.md test -f docs/security-review-package.md +test -f docs/external-review-package.md +test -f docs/external-review-scope.md +test -f docs/external-review-findings-template.md +test -f docs/external-review-accepted-risks.md +test -f docs/provider-conformance.md +test -f docs/provider-conformance.manifest.json +test -f docs/provider-proof-manifest.json +test -f docs/providers/stripe.md +test -f docs/providers/github.md +test -f docs/providers/shopify.md +test -f docs/live-provider-proof/stripe.md +test -f docs/live-provider-proof/github.md +test -f docs/live-provider-proof/shopify.md +test -f docs/live-provider-proof/stripe-redaction-policy.md +test -f docs/live-provider-proof/samples/stripe-incident-report.redacted.md +test -f docs/live-provider-proof/samples/github-incident-report.redacted.md +test -f docs/live-provider-proof/samples/shopify-incident-report.redacted.md +test -f docs/evaluator-quickstart.md +test -f examples/webhook-evidence-demo/run.sh +test -f site/index.html +test -f docs/commercial-evaluation.md +test -f docs/production-readiness-review.md +test -f docs/support-packages.md +test -f docs/comparisons/build-vs-buy.md +test -f docs/articles/exactly-once-webhooks.md +test -f docs/articles/self-hosted-webhook-gateway-architecture.md +test -f docs/articles/webhook-security-review-checklist.md +test -f docs/launch-copy.md +test -f docs/launch-metrics.md +test -f docs/demo-media-checklist.md +test -f docs/customer-discovery-notes-template.md +test -f docs/pilot-feedback-template.md +test -f docs/roadmap-intake-policy.md +test -f docs/pilot-review-checklist.md +test -f .github/ISSUE_TEMPLATE/evaluator-feedback.yml test -f .dockerignore test -f .golangci.yml grep -q "AGPL-3.0-only" COMMERCIAL.md +grep -q "v0.1.0-rc1" CHANGELOG.md +grep -q "release candidate" docs/releases/v0.1.0-rc1.md +grep -q "make rc-check" docs/releases/v0.1.0-rc1.md +grep -q "fake/local providers" docs/releases/v0.1.0-rc1.md +grep -q "exactly-once delivery" docs/releases/v0.1.0-rc1.md +grep -q "provider-side event completeness" docs/releases/v0.1.0-rc1.md +grep -q "v0.2.0 Pilot Readiness Checklist" docs/releases/v0.2.0-pilot.md +grep -q "make provider-proof-check" docs/releases/v0.2.0-pilot.md +grep -q "raw payload bodies" .github/ISSUE_TEMPLATE/evaluator-feedback.yml +grep -q "roadmap-intake-policy.md" .github/ISSUE_TEMPLATE/evaluator-feedback.yml grep -q "AGPL-3.0-only" CONTRIBUTING.md grep -q "https://www.linkedin.com/in/aatu-harju" SECURITY.md grep -q "webhook secrets" SECURITY.md @@ -29,14 +82,55 @@ grep -q "no provider-side event completeness" RELEASE_EVIDENCE.md grep -q "compliance" RELEASE_EVIDENCE.md grep -q "not a certification" RELEASE_EVIDENCE.md grep -q "live third-party provider" docs/release-evidence-template.md +grep -q "Release Evidence Sample" docs/release-evidence-sample.md +grep -q "Production RC Checklist" docs/production-rc-checklist.md +grep -q "exactly-once delivery" docs/production-rc-checklist.md +grep -q "External Review" docs/release-evidence-template.md +grep -q "Branch Protection" docs/release-evidence-template.md +grep -q "External Review Package" docs/external-review-package.md +grep -q "accepted_risk" docs/external-review-accepted-risks.md +grep -q "External Review Scope Template" docs/external-review-scope.md +grep -q "External Review Findings Template" docs/external-review-findings-template.md +grep -q "Provider Conformance Matrix" docs/provider-conformance.md +grep -q "no provider-side completeness guarantee" docs/provider-conformance.md +grep -q "docs/live-provider-proof/stripe.md" docs/provider-conformance.md +grep -q "docs/live-provider-proof/github.md" docs/provider-conformance.md +grep -q "docs/live-provider-proof/shopify.md" docs/provider-conformance.md +grep -q "not provider certification" docs/live-provider-proof/stripe.md +grep -q "not provider certification" docs/live-provider-proof/github.md +grep -q "not provider certification" docs/live-provider-proof/shopify.md +grep -q "provider-proof-v1" docs/provider-proof-manifest.json +grep -q "Evaluator Quickstart" docs/evaluator-quickstart.md +grep -q "webhook evidence demo" examples/webhook-evidence-demo/README.md +grep -q "Self-hosted webhook evidence infrastructure" site/index.html +grep -q "EUR 490-1,000" docs/commercial-evaluation.md +grep -q "Production Readiness Review" docs/production-readiness-review.md +grep -q "Support Packages" docs/support-packages.md +grep -q "Build Vs Buy" docs/comparisons/build-vs-buy.md +grep -q "Exactly-Once Webhooks" docs/articles/exactly-once-webhooks.md +grep -q "Self-Hosted Webhook Gateway Architecture" docs/articles/self-hosted-webhook-gateway-architecture.md +grep -q "Webhook Security Review Checklist" docs/articles/webhook-security-review-checklist.md +grep -q "Launch Copy Templates" docs/launch-copy.md +grep -q "Launch Metrics Plan" docs/launch-metrics.md +grep -q "Customer Discovery Notes Template" docs/customer-discovery-notes-template.md +grep -q "Pilot Feedback Template" docs/pilot-feedback-template.md +grep -q "Roadmap Intake Policy" docs/roadmap-intake-policy.md +grep -q "Pilot Review Checklist" docs/pilot-review-checklist.md grep -q ".refs" .dockerignore grep -q "release-evidence" .dockerignore grep -q "backups" .dockerignore +grep -q "live-proof-private" .dockerignore +grep -q "launch-metrics-private" .dockerignore +grep -q "live-proof-private" .gitignore +grep -q "launch-metrics-private" .gitignore test -f Dockerfile test -f docker-compose.yml test -f deploy/kubernetes/kustomization.yaml +test -f deploy/kubernetes/networkpolicy.example.yaml test -f deploy/helm/webhookery/Chart.yaml +test -f deploy/helm/webhookery/values-production.example.yaml +test -f deploy/observability/prometheus-rules.example.yaml test -f deploy/terraform/webhookery-helm/main.tf grep -q "runAsNonRoot: true" deploy/kubernetes/api-deployment.yaml grep -q "runAsNonRoot: true" deploy/helm/webhookery/values.yaml @@ -46,11 +140,23 @@ test -f .api.env.example test -f collections/postman/webhookery.postman_collection.json test -f collections/bruno/Webhookery/bruno.json test -x scripts/backup_postgres.sh +test -x scripts/demo_media.sh +test -x scripts/failure_drills.sh +test -x scripts/integration_evidence.sh test -x scripts/restore_postgres.sh +test -x scripts/restore_drill.sh +test -f docs/failure-drills.md +grep -q "make demo-media" docs/demo-media-checklist.md grep -q "backup_postgres.sh" docs/operations.md grep -q "restore_postgres.sh" docs/operations.md +grep -q "make restore-drill" docs/operations.md +grep -q "Failure Drills" docs/failure-drills.md grep -q "Production Doctor" docs/operations.md grep -q "doctor production" README.md +grep -q "doctor pilot --no-network" README.md +grep -q "doctor pilot --no-network" docs/cli.md +grep -q "Pilot Doctor Runbook" docs/operations.md +grep -q "WEBHOOKERY_PILOT_ALLOW_RECEIVER_CHECK" docs/configuration.md grep -q "blocker" docs/operations.md grep -q "warning" docs/operations.md grep -q "WEBHOOKERY_SECRET_BOX_MODE=aws-kms" docs/operations.md @@ -61,9 +167,25 @@ grep -q "Incident Triage" docs/operations.md grep -q "Explicit Non-Goals" docs/operations.md grep -q "Production RC Readiness" README.md grep -q "make rc-check" README.md +grep -q "make live-postgres-check" README.md +grep -q "make live-postgres-check" docs/operations.md +grep -q "make live-postgres-check" docs/release-evidence-template.md +test -f docs/day-2-operations.md +test -f docs/observability.md +grep -q "Day-2 Operations Guide" docs/day-2-operations.md +grep -q "Observability Examples" docs/observability.md +grep -q "networkpolicy.example.yaml" docs/deployment.md +grep -q "prometheus-rules.example.yaml" docs/deployment.md +grep -q "make perf-smoke" .github/workflows/integration.yml +grep -q "make provider-conformance-check" .github/workflows/integration.yml +grep -q "make perf-smoke" .github/workflows/release.yml +grep -q "Branch protection status" .github/workflows/release.yml + +make provider-conformance-check +make provider-proof-check if [ -n "${WEBHOOKERY_TEST_DATABASE_URL:-}" ]; then - make postgres-integration-test + make live-postgres-check fi printf '%s\n' "release acceptance checks passed" diff --git a/scripts/release_assets.sh b/scripts/release_assets.sh new file mode 100755 index 0000000..10cdd95 --- /dev/null +++ b/scripts/release_assets.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +tag="${1:?usage: scripts/release_assets.sh [commit]}" +out_dir="${2:?usage: scripts/release_assets.sh [commit]}" +commit="${3:-$(git rev-parse HEAD)}" +platforms="${WEBHOOKERY_RELEASE_ASSET_PLATFORMS:-linux/amd64 linux/arm64 darwin/amd64 darwin/arm64 windows/amd64}" + +case "$tag" in + v[0-9]*) + ;; + *) + printf 'release tag must start with v and a digit: %s\n' "$tag" >&2 + exit 2 + ;; +esac + +mkdir -p "$out_dir" +out_dir="$(cd "$out_dir" && pwd)" +tmp_dir="$(mktemp -d)" +trap 'rm -rf "$tmp_dir"' EXIT + +build_archive() { + local goos="$1" + local goarch="$2" + local binary="whcp" + local ext="" + if [ "$goos" = "windows" ]; then + ext=".exe" + fi + + local name="webhookery_${tag}_${goos}_${goarch}" + local package_dir="$tmp_dir/$name" + mkdir -p "$package_dir" + + CGO_ENABLED=0 GOOS="$goos" GOARCH="$goarch" go build \ + -trimpath \ + -ldflags "-s -w" \ + -o "$package_dir/${binary}${ext}" \ + ./cmd/whcp + + cp LICENSE README.md openapi.yaml "$package_dir/" + mkdir -p "$package_dir/docs/releases" + if [ -f "docs/releases/${tag}.md" ]; then + cp "docs/releases/${tag}.md" "$package_dir/docs/releases/" + fi + + if [ "$goos" = "windows" ]; then + (cd "$tmp_dir" && zip -qr "$out_dir/${name}.zip" "$name") + else + tar -C "$tmp_dir" -czf "$out_dir/${name}.tar.gz" "$name" + fi +} + +for platform in $platforms; do + goos="${platform%/*}" + goarch="${platform#*/}" + build_archive "$goos" "$goarch" +done + +cp openapi.yaml "$out_dir/openapi.yaml" +sha256sum openapi.yaml > "$out_dir/openapi.sha256" +find migrations -type f -print0 | sort -z | xargs -0 sha256sum > "$out_dir/migrations.sha256" + +if [ -f "docs/releases/${tag}.md" ]; then + cp "docs/releases/${tag}.md" "$out_dir/release-notes.md" +else + { + printf '# Webhookery %s\n\n' "$tag" + printf 'Release notes were not found under `docs/releases/%s.md`.\n' "$tag" + } > "$out_dir/release-notes.md" +fi + +{ + printf '%s\n' "Webhookery release asset summary" + printf '%s\n' "tag=$tag" + printf '%s\n' "commit=$commit" + printf '%s\n' "checks=release-acceptance,provider-conformance,provider-proof,perf-smoke,rc-check" + printf '%s\n' "non_claims=no exactly-once delivery; no provider-side completeness; no compliance certification; no live-provider acceptance unless separately recorded" +} > "$out_dir/release-check-summary.txt" + +if [ -f source.spdx.json ]; then + cp source.spdx.json "$out_dir/source.spdx.json" +fi +if [ -f image.spdx.json ]; then + cp image.spdx.json "$out_dir/image.spdx.json" +fi +if [ -f coverage.out ]; then + cp coverage.out "$out_dir/coverage.out" +fi +if [ -f release-evidence/release-evidence.md ]; then + cp release-evidence/release-evidence.md "$out_dir/release-evidence.md" +fi +if [ -d tmp/perf-smoke ]; then + mkdir -p "$out_dir/perf-smoke" + cp tmp/perf-smoke/perf-smoke.* "$out_dir/perf-smoke/" 2>/dev/null || true +fi + +(cd "$out_dir" && find . -maxdepth 1 -type f ! -name SHA256SUMS -printf '%P\0' | sort -z | xargs -0 sha256sum) > "$out_dir/SHA256SUMS" + +python3 - "$out_dir" "$tag" "$commit" <<'PY' +import hashlib +import json +import pathlib +import sys + +out_dir = pathlib.Path(sys.argv[1]) +tag = sys.argv[2] +commit = sys.argv[3] + +artifacts = [] +for path in sorted(p for p in out_dir.iterdir() if p.is_file()): + if path.name in {"webhookery-release-manifest.json", "webhookery-release-provenance.json", "webhookery-release-provenance.intoto.jsonl"}: + continue + digest = hashlib.sha256(path.read_bytes()).hexdigest() + artifacts.append({ + "name": path.name, + "sha256": digest, + "size": path.stat().st_size, + }) + +manifest = { + "schema": "webhookery-release-manifest.v1", + "tag": tag, + "commit": commit, + "artifacts": artifacts, + "non_claims": [ + "not exactly-once delivery proof", + "not provider-side event completeness proof", + "not compliance certification", + "not legal evidentiary certification", + "not live-provider acceptance unless separately recorded", + ], +} +(out_dir / "webhookery-release-manifest.json").write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8") + +provenance = { + "schema": "webhookery-release-provenance.v1", + "tag": tag, + "commit": commit, + "builder": "scripts/release_assets.sh", + "materials": [ + "openapi.yaml", + "migrations/", + "cmd/whcp", + "go.mod", + ], + "limitations": [ + "This is project release metadata, not a SLSA level claim.", + "GitHub workflow identity and artifact digests must be verified from the published release run.", + ], +} +(out_dir / "webhookery-release-provenance.json").write_text(json.dumps(provenance, indent=2, sort_keys=True) + "\n", encoding="utf-8") + +statement = { + "_type": "https://in-toto.io/Statement/v1", + "subject": [{"name": item["name"], "digest": {"sha256": item["sha256"]}} for item in artifacts], + "predicateType": "https://webhookery.local/provenance/v1", + "predicate": provenance, +} +(out_dir / "webhookery-release-provenance.intoto.jsonl").write_text(json.dumps(statement, sort_keys=True) + "\n", encoding="utf-8") +PY + +(cd "$out_dir" && sha256sum webhookery-release-manifest.json webhookery-release-provenance.json webhookery-release-provenance.intoto.jsonl >> SHA256SUMS) + +printf 'release assets written to %s\n' "$out_dir" diff --git a/scripts/restore_drill.sh b/scripts/restore_drill.sh new file mode 100755 index 0000000..2cfa742 --- /dev/null +++ b/scripts/restore_drill.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env sh +set -eu + +repo_root="$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd)" +cd "$repo_root" + +usage() { + cat <<'USAGE' +usage: WEBHOOKERY_DATABASE_URL=postgres://source \ + WEBHOOKERY_RESTORE_DRILL_DATABASE_URL=postgres://disposable-restore \ + scripts/restore_drill.sh [--output DIR] + +The restore target is destructive. The script refuses to run unless source and +restore URLs are both set and different. +USAGE +} + +fail() { + printf '%s\n' "restore-drill: $*" >&2 + exit 2 +} + +validate_output_dir() { + dir="$1" + newline=' +' + case "$dir" in + ""|*"$newline"*) fail "output directory is invalid" ;; + -*) fail "output directory must not start with '-'" ;; + esac +} + +out_dir="tmp/restore-drill" +while [ "$#" -gt 0 ]; do + case "$1" in + --output) + [ "$#" -ge 2 ] || fail "--output requires a directory" + out_dir="$2" + shift 2 + ;; + --help|-h) + usage + exit 0 + ;; + *) + fail "unknown argument: $1" + ;; + esac +done + +source_url="${WEBHOOKERY_DATABASE_URL:-${DATABASE_URL:-}}" +restore_url="${WEBHOOKERY_RESTORE_DRILL_DATABASE_URL:-}" + +[ -n "$source_url" ] || fail "WEBHOOKERY_DATABASE_URL or DATABASE_URL is required" +[ -n "$restore_url" ] || fail "WEBHOOKERY_RESTORE_DRILL_DATABASE_URL is required" +[ "$source_url" != "$restore_url" ] || fail "restore target must be different from source database" + +command -v pg_dump >/dev/null 2>&1 || fail "pg_dump is required" +command -v pg_restore >/dev/null 2>&1 || fail "pg_restore is required" + +validate_output_dir "$out_dir" +umask 077 +mkdir -p -- "$out_dir/backups" + +started_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +printf '%s\n' "restore-drill: creating source backup" +backup_path="$(WEBHOOKERY_DATABASE_URL="$source_url" scripts/backup_postgres.sh "$out_dir/backups")" + +printf '%s\n' "restore-drill: restoring into disposable target" +WEBHOOKERY_DATABASE_URL="$restore_url" WEBHOOKERY_RESTORE_CONFIRM=restore scripts/restore_postgres.sh "$backup_path" + +printf '%s\n' "restore-drill: applying migrations to disposable target" +WEBHOOKERY_DATABASE_URL="$restore_url" go run ./cmd/whcp migrate up + +completed_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" +evidence_file="$out_dir/restore-drill.json" +backup_name="${backup_path##*/}" +{ + printf '%s\n' "{" + printf '%s\n' " \"schema_version\": \"webhookery.restore_drill.v1\"," + printf '%s\n' " \"started_at\": \"$started_at\"," + printf '%s\n' " \"completed_at\": \"$completed_at\"," + printf '%s\n' " \"source_database_url_redacted\": true," + printf '%s\n' " \"restore_database_url_redacted\": true," + printf '%s\n' " \"backup_file\": \"$backup_name\"," + printf '%s\n' " \"restore_target_destructive\": true," + printf '%s\n' " \"migrations_applied\": true," + printf '%s\n' " \"object_storage_bodies_verified\": false," + printf '%s\n' " \"object_storage_note\": \"PostgreSQL restore drills do not verify S3 or MinIO object bodies.\"" + printf '%s\n' "}" +} > "$evidence_file" + +printf '%s\n' "restore-drill: evidence written to $evidence_file" diff --git a/sdk/README.md b/sdk/README.md index 594bec2..7e7b907 100644 --- a/sdk/README.md +++ b/sdk/README.md @@ -1,25 +1,183 @@ # Webhookery SDK Artifacts -`sdk/openapi.yaml` is the committed SDK-ready OpenAPI source copied from the -canonical root `openapi.yaml`. `pkg/client` contains a small Go client for -producer event ingestion and audit-chain verification over the REST API. -`sdk/python` contains a stdlib-only Python client for the same core control -plane calls. Client errors do not include API key material. -`sdk/typescript` contains a small fetch-based TypeScript client with the same -token-redaction behavior. +This directory contains committed SDK-facing artifacts for local use and +release evidence. -Operator request collections are committed under `collections/postman` and -`collections/bruno`. +| Artifact | Audience | Status | +|----------|----------|--------| +| `sdk/openapi.yaml` | SDK maintainers and API consumers | Derived copy of root `openapi.yaml`; keep aligned with `make sdk-generate` and `make sdk-check`. | +| `pkg/client` | Go producers and audit tooling | Small Go REST client for product event ingestion and audit-chain verification. | +| `sdk/python` | Python producers and audit tooling | Stdlib-only local Python client for the same core calls. | +| `sdk/typescript` | TypeScript producers and audit tooling | Fetch-based local TypeScript client for the same core calls. | + +These clients cover a deliberately small surface: + +- `POST /v1/events` +- `GET /v1/audit-chain/head` +- `POST /v1/audit-chain:verify` + +Use `openapi.yaml` for the full REST contract. + +Evidence workflow examples live in: + +- `sdk/examples/evidence-workflow-go/main.go` +- `sdk/typescript/examples/evidence-workflow.ts` + +They show how to combine the narrow committed clients with direct OpenAPI +calls for incident report and evidence export workflows. + +## Authentication + +All committed clients use bearer API keys: + +```text +Authorization: Bearer +``` + +Use `WEBHOOKERY_API_KEY` or a secret manager in real environments. Do not put +real API keys, provider credentials, webhook secrets, raw payload bodies, raw +signatures, private keys, or customer data in docs, tests, issues, support +artifacts, or generated examples. + +## Go + +Setup from this repository: + +```bash +go test ./pkg/client +``` + +Producer event ingestion: + +```go +package main + +import ( + "context" + "os" + + "webhookery/pkg/client" +) + +func main() error { + c, err := client.New("http://localhost:8080", os.Getenv("WEBHOOKERY_API_KEY")) + if err != nil { + return err + } + + _, err = c.CreateEvent(context.Background(), client.ProductEvent{ + ID: "evt_product_123", + Type: "demo.created", + SourceID: "src_internal", + Data: map[string]any{"ok": true}, + }, client.WithIdempotencyKey("demo-event-123")) + return err +} +``` + +Audit-chain verification: ```go -c, err := client.New("http://localhost:8080", os.Getenv("WEBHOOKERY_API_KEY")) +head, err := c.AuditChainHead(context.Background()) if err != nil { - return err + return err +} +_ = head + +verification, err := c.VerifyAuditChain(context.Background(), client.AuditChainVerifyRequest{}) +if err != nil { + return err +} +if !verification.Valid { + return errors.New("audit chain did not verify") } -_, err = c.CreateEvent(ctx, client.ProductEvent{ - ID: "evt_product_123", - Type: "invoice.paid", - SourceID: "src_internal", - Data: map[string]any{"invoice_id": "inv_123"}, -}) ``` + +## Python + +Setup from this repository: + +```bash +PYTHONPATH=sdk/python python3 -m unittest discover -s sdk/python/tests +``` + +Producer event ingestion: + +```python +import os +from webhookery import WebhookeryClient + +client = WebhookeryClient("http://localhost:8080", os.environ["WEBHOOKERY_API_KEY"]) + +client.create_event( + { + "id": "evt_product_123", + "type": "demo.created", + "source_id": "src_internal", + "data": {"ok": True}, + }, + idempotency_key="demo-event-123", +) +``` + +Audit-chain verification: + +```python +head = client.audit_chain_head() +verification = client.verify_audit_chain() +if not verification.get("valid"): + raise RuntimeError("audit chain did not verify") +``` + +## TypeScript + +Setup from this repository: + +```bash +tsc -p sdk/typescript/tsconfig.json +node --test sdk/typescript/test/client.test.mjs +``` + +Producer event ingestion: + +```ts +import { WebhookeryClient } from "@webhookery/client"; + +const client = new WebhookeryClient( + "http://localhost:8080", + process.env.WEBHOOKERY_API_KEY ?? "", +); + +await client.createEvent( + { + id: "evt_product_123", + type: "demo.created", + source_id: "src_internal", + data: { ok: true }, + }, + { idempotencyKey: "demo-event-123" }, +); +``` + +Audit-chain verification: + +```ts +const head = await client.auditChainHead(); +void head; + +const verification = await client.verifyAuditChain(); +if (verification.valid !== true) { + throw new Error("audit chain did not verify"); +} +``` + +## Error Handling And Redaction + +Client constructors validate base URLs and require API keys where the language +client can enforce it. HTTP errors include status codes and bounded response +bodies, but clients do not add API key material to error messages. + +The server is responsible for returning redacted problem details. SDK tests +include checks that API keys are not included in client error messages. Treat +raw event data, evidence bundles, and local payload files as sensitive even +when the client library does not log them. diff --git a/sdk/examples/evidence-workflow-go/main.go b/sdk/examples/evidence-workflow-go/main.go new file mode 100644 index 0000000..2f2de90 --- /dev/null +++ b/sdk/examples/evidence-workflow-go/main.go @@ -0,0 +1,232 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "time" + + "webhookery/pkg/client" +) + +func main() { + if err := run(context.Background()); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} + +func run(ctx context.Context) error { + baseURL, err := requiredEnv("WEBHOOKERY_BASE_URL") + if err != nil { + return err + } + apiKey, err := requiredEnv("WEBHOOKERY_API_KEY") + if err != nil { + return err + } + sourceID, err := requiredEnv("WEBHOOKERY_SOURCE_ID") + if err != nil { + return err + } + output := strings.TrimSpace(os.Getenv("WEBHOOKERY_EVIDENCE_OUTPUT")) + if output == "" { + output = "evidence-workflow.tar.gz" + } + + sdkClient, err := client.New(baseURL, apiKey) + if err != nil { + return err + } + rest := restClient{ + baseURL: baseURL, + apiKey: apiKey, + client: &http.Client{Timeout: 30 * time.Second}, + } + + eventID := "evt_sdk_evidence_" + time.Now().UTC().Format("20060102T150405Z") + ingest, err := sdkClient.CreateEvent(ctx, client.ProductEvent{ + ID: eventID, + Type: "sdk.evidence.demo", + SourceID: sourceID, + Data: map[string]any{ + "sanitized": true, + }, + }, client.WithIdempotencyKey(eventID)) + if err != nil { + return err + } + if strings.TrimSpace(ingest.EventID) != "" { + eventID = ingest.EventID + } + + var incident struct { + ID string `json:"id"` + } + if err := rest.doJSON(ctx, http.MethodPost, "/v1/incidents", map[string]string{ + "title": "SDK evidence workflow", + "reason": "local SDK evidence example", + }, &incident); err != nil { + return err + } + if incident.ID == "" { + return errors.New("incident response did not include id") + } + + if err := rest.doJSON(ctx, http.MethodPost, "/v1/incidents/"+url.PathEscape(incident.ID)+"/events", map[string]string{ + "event_id": eventID, + "reason": "attach SDK-created event to evidence workflow", + }, nil); err != nil { + return err + } + if err := rest.doJSON(ctx, http.MethodPost, "/v1/incidents/"+url.PathEscape(incident.ID)+"/generate-report", map[string]string{ + "reason": "generate SDK example report", + }, nil); err != nil { + return err + } + + var export struct { + ID string `json:"id"` + } + if err := rest.doJSON(ctx, http.MethodPost, "/v1/incidents/"+url.PathEscape(incident.ID)+"/evidence-export", map[string]string{ + "reason": "create SDK example evidence export", + }, &export); err != nil { + return err + } + if export.ID == "" { + return errors.New("evidence export response did not include id") + } + if err := rest.download(ctx, "/v1/audit-exports/"+url.PathEscape(export.ID)+":download", output); err != nil { + return err + } + + verification, err := sdkClient.VerifyAuditChain(ctx, client.AuditChainVerifyRequest{}) + if err != nil { + return err + } + if !verification.Valid { + return errors.New("audit chain did not verify after evidence workflow") + } + + fmt.Printf("wrote evidence bundle to %s\n", output) + return nil +} + +type restClient struct { + baseURL string + apiKey string + client *http.Client +} + +func (c restClient) doJSON(ctx context.Context, method, path string, in, out any) error { + var body io.Reader + if in != nil { + raw, err := json.Marshal(in) + if err != nil { + return err + } + body = bytes.NewReader(raw) + } + req, err := http.NewRequestWithContext(ctx, method, endpoint(c.baseURL, path), body) + if err != nil { + return err + } + req.Header.Set("Accept", "application/json") + req.Header.Set("Authorization", "Bearer "+c.apiKey) + if in != nil { + req.Header.Set("Content-Type", "application/json") + } + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemError(resp) + } + if out == nil { + return nil + } + return json.NewDecoder(resp.Body).Decode(out) +} + +func (c restClient) download(ctx context.Context, path, output string) error { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint(c.baseURL, path), nil) + if err != nil { + return err + } + req.Header.Set("Authorization", "Bearer "+c.apiKey) + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return problemError(resp) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + outputDir := filepath.Dir(output) + if outputDir != "." { + if err := os.MkdirAll(outputDir, 0o700); err != nil { // #nosec G703 -- SDK example writes to an operator-selected local evidence output directory. + return err + } + } + return os.WriteFile(output, body, 0o600) // #nosec G703 -- SDK example writes to an operator-selected local evidence output path. +} + +func endpoint(baseURL, path string) string { + parsed, err := url.Parse(strings.TrimSpace(baseURL)) + if err != nil { + panic(err) + } + if parsed.Scheme != "http" && parsed.Scheme != "https" { + panic("WEBHOOKERY_BASE_URL must use http or https") + } + if parsed.Host == "" || parsed.User != nil { + panic("WEBHOOKERY_BASE_URL must include a host and no credentials") + } + parsed.Path = strings.TrimRight(parsed.Path, "/") + path + parsed.RawQuery = "" + parsed.Fragment = "" + return parsed.String() +} + +func problemError(resp *http.Response) error { + raw, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + var p struct { + Code string `json:"code"` + StableCode string `json:"stable_code"` + RequestID string `json:"request_id"` + } + _ = json.Unmarshal(raw, &p) + code := strings.TrimSpace(p.StableCode) + if code == "" { + code = strings.TrimSpace(p.Code) + } + if code == "" { + code = "unknown_error" + } + if p.RequestID != "" { + return fmt.Errorf("webhookery API returned HTTP %d (%s, request_id=%s)", resp.StatusCode, code, p.RequestID) + } + return fmt.Errorf("webhookery API returned HTTP %d (%s)", resp.StatusCode, code) +} + +func requiredEnv(name string) (string, error) { + value := strings.TrimSpace(os.Getenv(name)) + if value == "" { + return "", fmt.Errorf("%s is required", name) + } + return value, nil +} diff --git a/sdk/openapi.yaml b/sdk/openapi.yaml index 48696cd..1e5e354 100644 --- a/sdk/openapi.yaml +++ b/sdk/openapi.yaml @@ -5,15 +5,50 @@ info: description: Self-hosted webhook evidence and delivery control plane. servers: - url: http://localhost:8080 +tags: + - name: System + description: Health, readiness, metrics, and the OpenAPI document. + - name: API Keys + description: Management API key lifecycle. + - name: Producer Trust + description: Product-event producer credentials, OAuth, and mTLS identities. + - name: Auth And Identity + description: OIDC sessions, SCIM, role bindings, access policies, and authorization explanation. + - name: Sources And Providers + description: Webhook sources, provider connections, adapters, and provider ingress. + - name: Endpoints And Routing + description: Outbound endpoints, subscriptions, routes, and retry policies. + - name: Schemas And Transformations + description: Event schemas, compatibility checks, and deterministic transformations. + - name: Events And Ingestion + description: Product events, provider ingestion, event evidence, raw payloads, and timelines. + - name: Incidents + description: Webhook incident packets, report snapshots, and incident evidence exports. + - name: Delivery And Replay + description: Deliveries, attempts, replay jobs, dead letter, and quarantine. + - name: Reconciliation + description: Provider reconciliation jobs and gap evidence. + - name: Audit And Retention + description: Audit events, audit chain, evidence exports, and retention policies. + - name: Operations + description: Operational metrics, storage/config views, workers, queues, and alerts. + - name: Signal Egress + description: Notification channels, notification deliveries, SIEM sinks, and SIEM deliveries. paths: /healthz: get: + tags: + - System + operationId: getHealthz summary: Liveness check responses: "200": description: Process is alive. /readyz: get: + tags: + - System + operationId: getReadyz summary: Readiness check responses: "200": @@ -22,12 +57,18 @@ paths: description: A dependency is unavailable. /openapi.yaml: get: + tags: + - System + operationId: getOpenapiYaml summary: OpenAPI document responses: "200": description: YAML OpenAPI document. /metrics: get: + tags: + - System + operationId: getMetrics summary: Prometheus metrics responses: "200": @@ -38,6 +79,9 @@ paths: type: string /v1/api-keys: get: + tags: + - API Keys + operationId: getApiKeys summary: List API keys security: - bearerAuth: [] @@ -51,6 +95,9 @@ paths: schema: $ref: "#/components/schemas/APIKeyPage" post: + tags: + - API Keys + operationId: postApiKeys summary: Create API key security: - bearerAuth: [] @@ -71,6 +118,9 @@ paths: $ref: "#/components/responses/Problem" /v1/api-keys/{api_key_id}:revoke: post: + tags: + - API Keys + operationId: postApiKeysApiKeyIdRevoke summary: Revoke API key security: - bearerAuth: [] @@ -95,6 +145,9 @@ paths: $ref: "#/components/schemas/APIKey" /v1/oauth/token: post: + tags: + - Producer Trust + operationId: postOauthToken summary: Issue producer OAuth access token description: Issues an opaque short-lived bearer token for product-event producers using the client credentials grant. Only HTTP Basic client authentication is accepted; client secrets in the form body are rejected. security: @@ -105,6 +158,9 @@ paths: application/x-www-form-urlencoded: schema: $ref: "#/components/schemas/OAuthClientCredentialsRequest" + example: + grant_type: client_credentials + scope: events:write responses: "200": description: Non-cacheable producer access token response. @@ -121,12 +177,20 @@ paths: application/json: schema: $ref: "#/components/schemas/ProducerTokenResponse" + example: + access_token: "" + token_type: Bearer + expires_in: 900 + scope: events:write "400": $ref: "#/components/responses/Problem" "401": $ref: "#/components/responses/Problem" /v1/producer-clients: get: + tags: + - Producer Trust + operationId: getProducerClients summary: List producer OAuth clients description: Lists tenant-scoped producer client metadata. Client secret hashes and token values are never returned. security: @@ -141,6 +205,9 @@ paths: schema: $ref: "#/components/schemas/ProducerClientPage" post: + tags: + - Producer Trust + operationId: postProducerClients summary: Create producer OAuth client description: Creates a tenant/source-bound product-event producer credential. The generated client secret is returned once only. security: @@ -162,6 +229,9 @@ paths: $ref: "#/components/responses/Problem" /v1/producer-clients/{client_id}: get: + tags: + - Producer Trust + operationId: getProducerClientsClientId summary: Get producer OAuth client security: - bearerAuth: [] @@ -179,6 +249,9 @@ paths: schema: $ref: "#/components/schemas/ProducerClient" patch: + tags: + - Producer Trust + operationId: patchProducerClientsClientId summary: Update producer OAuth client security: - bearerAuth: [] @@ -202,6 +275,9 @@ paths: schema: $ref: "#/components/schemas/ProducerClient" delete: + tags: + - Producer Trust + operationId: deleteProducerClientsClientId summary: Disable producer OAuth client security: - bearerAuth: [] @@ -226,6 +302,9 @@ paths: $ref: "#/components/schemas/ProducerClient" /v1/producer-clients/{client_id}/secrets:rotate: post: + tags: + - Producer Trust + operationId: postProducerClientsClientIdSecretsRotate summary: Rotate producer client secret description: Revokes active producer client secrets and returns the new client secret once only. security: @@ -251,6 +330,9 @@ paths: $ref: "#/components/schemas/ProducerClientSecretRotated" /v1/producer-mtls-identities: get: + tags: + - Producer Trust + operationId: getProducerMtlsIdentities summary: List producer mTLS identities description: Lists tenant-scoped producer client-certificate identities. Private keys and raw certificate private material are never stored or returned. security: @@ -265,6 +347,9 @@ paths: schema: $ref: "#/components/schemas/ProducerMTLSIdentityPage" post: + tags: + - Producer Trust + operationId: postProducerMtlsIdentities summary: Create producer mTLS identity security: - bearerAuth: [] @@ -283,6 +368,9 @@ paths: $ref: "#/components/schemas/ProducerMTLSIdentity" /v1/producer-mtls-identities/{identity_id}: get: + tags: + - Producer Trust + operationId: getProducerMtlsIdentitiesIdentityId summary: Get producer mTLS identity security: - bearerAuth: [] @@ -300,6 +388,9 @@ paths: schema: $ref: "#/components/schemas/ProducerMTLSIdentity" patch: + tags: + - Producer Trust + operationId: patchProducerMtlsIdentitiesIdentityId summary: Update producer mTLS identity security: - bearerAuth: [] @@ -323,6 +414,9 @@ paths: schema: $ref: "#/components/schemas/ProducerMTLSIdentity" delete: + tags: + - Producer Trust + operationId: deleteProducerMtlsIdentitiesIdentityId summary: Disable producer mTLS identity security: - bearerAuth: [] @@ -347,6 +441,9 @@ paths: $ref: "#/components/schemas/ProducerMTLSIdentity" /v1/producer-mtls-identities/{identity_id}:verify: post: + tags: + - Producer Trust + operationId: postProducerMtlsIdentitiesIdentityIdVerify summary: Verify producer mTLS certificate against identity security: - bearerAuth: [] @@ -371,6 +468,9 @@ paths: $ref: "#/components/schemas/ProducerMTLSIdentityVerification" /v1/auth/oidc/login: get: + tags: + - Auth And Identity + operationId: getAuthOidcLogin summary: Start OIDC management login description: Starts Authorization Code + PKCE login for a tenant identity provider and redirects to the provider authorization endpoint. parameters: @@ -394,6 +494,9 @@ paths: description: Redirect to identity provider. /v1/auth/oidc/callback: get: + tags: + - Auth And Identity + operationId: getAuthOidcCallback summary: Complete OIDC management login description: Validates state, nonce, issuer, audience, expiry, and signed ID token before creating a hashed management session cookie. parameters: @@ -416,6 +519,9 @@ paths: $ref: "#/components/schemas/AuthSessionCreated" /v1/auth/logout: post: + tags: + - Auth And Identity + operationId: postAuthLogout summary: Revoke current management session security: - bearerAuth: [] @@ -424,6 +530,9 @@ paths: description: Session revoked. /v1/auth/session: get: + tags: + - Auth And Identity + operationId: getAuthSession summary: Get current management session security: - bearerAuth: [] @@ -434,8 +543,20 @@ paths: application/json: schema: $ref: "#/components/schemas/AuthSession" + example: + id: ses_example + tenant_id: ten_example + user_id: usr_example + external_identity_id: oidc_example + state: active + created_at: "2026-05-26T12:00:00Z" + last_seen_at: "2026-05-26T12:05:00Z" + expires_at: "2026-05-26T20:00:00Z" /v1/auth/sessions: get: + tags: + - Auth And Identity + operationId: getAuthSessions summary: List active management sessions security: - bearerAuth: [] @@ -450,6 +571,9 @@ paths: $ref: "#/components/schemas/AuthSessionPage" /v1/auth/sessions/{session_id}:revoke: post: + tags: + - Auth And Identity + operationId: postAuthSessionsSessionIdRevoke summary: Revoke management session security: - bearerAuth: [] @@ -474,6 +598,9 @@ paths: $ref: "#/components/schemas/AuthSession" /v1/identity-providers: get: + tags: + - Auth And Identity + operationId: getIdentityProviders summary: List identity providers description: Lists tenant-scoped OIDC identity provider metadata. Client secrets are never returned. security: @@ -488,6 +615,9 @@ paths: schema: $ref: "#/components/schemas/IdentityProviderPage" post: + tags: + - Auth And Identity + operationId: postIdentityProviders summary: Create OIDC identity provider security: - bearerAuth: [] @@ -506,6 +636,9 @@ paths: $ref: "#/components/schemas/IdentityProvider" /v1/identity-providers/{provider_id}: get: + tags: + - Auth And Identity + operationId: getIdentityProvidersProviderId summary: Get identity provider security: - bearerAuth: [] @@ -523,6 +656,9 @@ paths: schema: $ref: "#/components/schemas/IdentityProvider" patch: + tags: + - Auth And Identity + operationId: patchIdentityProvidersProviderId summary: Update identity provider security: - bearerAuth: [] @@ -546,6 +682,9 @@ paths: schema: $ref: "#/components/schemas/IdentityProvider" delete: + tags: + - Auth And Identity + operationId: deleteIdentityProvidersProviderId summary: Disable identity provider security: - bearerAuth: [] @@ -570,6 +709,9 @@ paths: $ref: "#/components/schemas/IdentityProvider" /v1/identity-providers/{provider_id}:test: post: + tags: + - Auth And Identity + operationId: postIdentityProvidersProviderIdTest summary: Test identity provider configuration security: - bearerAuth: [] @@ -594,6 +736,9 @@ paths: $ref: "#/components/schemas/IdentityProvider" /v1/scim-tokens: get: + tags: + - Auth And Identity + operationId: getScimTokens summary: List SCIM tokens description: Lists SCIM token metadata only. Raw tokens and hashes are never returned. security: @@ -606,6 +751,9 @@ paths: schema: $ref: "#/components/schemas/SCIMTokenPage" post: + tags: + - Auth And Identity + operationId: postScimTokens summary: Create SCIM token description: Returns the SCIM bearer token exactly once. security: @@ -625,6 +773,9 @@ paths: $ref: "#/components/schemas/SCIMTokenCreated" /v1/scim-tokens/{token_id}: delete: + tags: + - Auth And Identity + operationId: deleteScimTokensTokenId summary: Revoke SCIM token security: - bearerAuth: [] @@ -649,6 +800,9 @@ paths: $ref: "#/components/schemas/SCIMToken" /v1/scim/v2/Users: get: + tags: + - Auth And Identity + operationId: getScimV2Users summary: List SCIM users security: - bearerAuth: [] @@ -656,6 +810,9 @@ paths: "200": description: SCIM ListResponse. post: + tags: + - Auth And Identity + operationId: postScimV2Users summary: Provision SCIM user security: - bearerAuth: [] @@ -670,6 +827,9 @@ paths: description: User provisioned. /v1/scim/v2/Users/{user_id}: get: + tags: + - Auth And Identity + operationId: getScimV2UsersUserId summary: Get SCIM user security: - bearerAuth: [] @@ -683,6 +843,9 @@ paths: "200": description: SCIM user. put: + tags: + - Auth And Identity + operationId: putScimV2UsersUserId summary: Replace SCIM user security: - bearerAuth: [] @@ -702,6 +865,9 @@ paths: "200": description: User replaced. patch: + tags: + - Auth And Identity + operationId: patchScimV2UsersUserId summary: Patch SCIM user security: - bearerAuth: [] @@ -721,6 +887,9 @@ paths: "200": description: User patched. delete: + tags: + - Auth And Identity + operationId: deleteScimV2UsersUserId summary: Deactivate SCIM user security: - bearerAuth: [] @@ -735,6 +904,9 @@ paths: description: User deactivated. /v1/scim/v2/Groups: get: + tags: + - Auth And Identity + operationId: getScimV2Groups summary: List SCIM groups security: - bearerAuth: [] @@ -742,6 +914,9 @@ paths: "200": description: SCIM ListResponse. post: + tags: + - Auth And Identity + operationId: postScimV2Groups summary: Provision SCIM group security: - bearerAuth: [] @@ -756,6 +931,9 @@ paths: description: Group provisioned. /v1/scim/v2/Groups/{group_id}: get: + tags: + - Auth And Identity + operationId: getScimV2GroupsGroupId summary: Get SCIM group security: - bearerAuth: [] @@ -769,6 +947,9 @@ paths: "200": description: SCIM group. put: + tags: + - Auth And Identity + operationId: putScimV2GroupsGroupId summary: Replace SCIM group security: - bearerAuth: [] @@ -788,6 +969,9 @@ paths: "200": description: Group replaced. patch: + tags: + - Auth And Identity + operationId: patchScimV2GroupsGroupId summary: Patch SCIM group security: - bearerAuth: [] @@ -807,6 +991,9 @@ paths: "200": description: Group patched. delete: + tags: + - Auth And Identity + operationId: deleteScimV2GroupsGroupId summary: Deactivate SCIM group security: - bearerAuth: [] @@ -821,6 +1008,9 @@ paths: description: Group deactivated. /v1/role-bindings: get: + tags: + - Auth And Identity + operationId: getRoleBindings summary: List resource-aware role bindings security: - bearerAuth: [] @@ -832,6 +1022,9 @@ paths: schema: $ref: "#/components/schemas/RoleBindingPage" post: + tags: + - Auth And Identity + operationId: postRoleBindings summary: Create resource-aware role binding security: - bearerAuth: [] @@ -850,6 +1043,9 @@ paths: $ref: "#/components/schemas/RoleBinding" /v1/role-bindings/{binding_id}: patch: + tags: + - Auth And Identity + operationId: patchRoleBindingsBindingId summary: Update role binding security: - bearerAuth: [] @@ -869,6 +1065,9 @@ paths: "200": description: Role binding updated. delete: + tags: + - Auth And Identity + operationId: deleteRoleBindingsBindingId summary: Disable role binding security: - bearerAuth: [] @@ -889,6 +1088,9 @@ paths: description: Role binding disabled. /v1/access-policies: get: + tags: + - Auth And Identity + operationId: getAccessPolicies summary: List access policy rules security: - bearerAuth: [] @@ -900,6 +1102,9 @@ paths: schema: $ref: "#/components/schemas/AccessPolicyRulePage" post: + tags: + - Auth And Identity + operationId: postAccessPolicies summary: Create access policy rule security: - bearerAuth: [] @@ -918,6 +1123,9 @@ paths: $ref: "#/components/schemas/AccessPolicyRule" /v1/access-policies/{policy_id}: patch: + tags: + - Auth And Identity + operationId: patchAccessPoliciesPolicyId summary: Update access policy rule security: - bearerAuth: [] @@ -937,6 +1145,9 @@ paths: "200": description: Access policy rule updated. delete: + tags: + - Auth And Identity + operationId: deleteAccessPoliciesPolicyId summary: Disable access policy rule security: - bearerAuth: [] @@ -957,6 +1168,9 @@ paths: description: Access policy rule disabled. /v1/authz:explain: post: + tags: + - Auth And Identity + operationId: postAuthzExplain summary: Explain authorization decision description: Returns a redacted allow/deny explanation for security operators. security: @@ -976,6 +1190,9 @@ paths: $ref: "#/components/schemas/AuthzDecision" /v1/sources: get: + tags: + - Sources And Providers + operationId: getSources summary: List inbound sources security: - bearerAuth: [] @@ -987,6 +1204,9 @@ paths: schema: $ref: "#/components/schemas/SourcePage" post: + tags: + - Sources And Providers + operationId: postSources summary: Create inbound source security: - bearerAuth: [] @@ -996,6 +1216,10 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateSourceRequest" + example: + name: stripe-prod + provider: stripe + verification_secret: "" responses: "201": description: Source created. @@ -1005,6 +1229,9 @@ paths: $ref: "#/components/schemas/Source" /v1/sources/{source_id}: get: + tags: + - Sources And Providers + operationId: getSourcesSourceId summary: Get inbound source security: - bearerAuth: [] @@ -1024,6 +1251,9 @@ paths: "404": description: Source not found in this tenant. patch: + tags: + - Sources And Providers + operationId: patchSourcesSourceId summary: Update inbound source metadata or state description: Updates mutable source metadata. Verification secrets are rotated through the dedicated rotate endpoint and are never returned. security: @@ -1048,6 +1278,9 @@ paths: schema: $ref: "#/components/schemas/Source" delete: + tags: + - Sources And Providers + operationId: deleteSourcesSourceId summary: Disable inbound source description: Disables the source without deleting historical evidence. security: @@ -1073,6 +1306,9 @@ paths: $ref: "#/components/schemas/Source" /v1/sources/{source_id}/secrets:rotate: post: + tags: + - Sources And Providers + operationId: postSourcesSourceIdSecretsRotate summary: Rotate source verification secret description: Creates a new active source secret version and keeps the previous version during the requested grace period. Plaintext secrets are never returned. security: @@ -1098,6 +1334,9 @@ paths: $ref: "#/components/schemas/SourceSecretVersion" /v1/provider-connections: get: + tags: + - Sources And Providers + operationId: getProviderConnections summary: List provider API reconciliation connections description: Plaintext provider credentials are never returned. security: @@ -1112,6 +1351,9 @@ paths: schema: $ref: "#/components/schemas/ProviderConnectionPage" post: + tags: + - Sources And Providers + operationId: postProviderConnections summary: Create provider API reconciliation connection security: - bearerAuth: [] @@ -1121,6 +1363,13 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateProviderConnectionRequest" + example: + name: stripe-reconciliation + provider: stripe + credential_type: api_key + credential: "" + config: + source_id: src_stripe responses: "201": description: Provider connection created with encrypted credentials. @@ -1130,6 +1379,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/provider-connections/{connection_id}: get: + tags: + - Sources And Providers + operationId: getProviderConnectionsConnectionId summary: Get provider API reconciliation connection security: - bearerAuth: [] @@ -1148,6 +1400,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/provider-connections/{connection_id}:verify: post: + tags: + - Sources And Providers + operationId: postProviderConnectionsConnectionIdVerify summary: Verify provider API connection credentials security: - bearerAuth: [] @@ -1172,6 +1427,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/provider-connections/{connection_id}:revoke: post: + tags: + - Sources And Providers + operationId: postProviderConnectionsConnectionIdRevoke summary: Revoke provider API connection credentials security: - bearerAuth: [] @@ -1196,6 +1454,9 @@ paths: $ref: "#/components/schemas/ProviderConnection" /v1/adapters: get: + tags: + - Sources And Providers + operationId: getAdapters summary: List built-in and tenant custom adapters description: Returns adapter governance metadata only. Plugin packages are not executed by this endpoint. security: @@ -1210,6 +1471,9 @@ paths: schema: $ref: "#/components/schemas/ProviderAdapterPage" post: + tags: + - Sources And Providers + operationId: postAdapters summary: Create tenant custom adapter security: - bearerAuth: [] @@ -1228,6 +1492,9 @@ paths: $ref: "#/components/schemas/ProviderAdapter" /v1/adapters/{adapter_id}: get: + tags: + - Sources And Providers + operationId: getAdaptersAdapterId summary: Get adapter metadata security: - bearerAuth: [] @@ -1246,6 +1513,9 @@ paths: $ref: "#/components/schemas/ProviderAdapter" /v1/adapters/{adapter_id}/versions: get: + tags: + - Sources And Providers + operationId: getAdaptersAdapterIdVersions summary: List adapter versions security: - bearerAuth: [] @@ -1264,6 +1534,9 @@ paths: schema: $ref: "#/components/schemas/AdapterVersionPage" post: + tags: + - Sources And Providers + operationId: postAdaptersAdapterIdVersions summary: Create adapter version description: Declarative versions store JSON definitions. Plugin versions store signed package metadata only and are not executed. security: @@ -1289,6 +1562,9 @@ paths: $ref: "#/components/schemas/AdapterVersion" /v1/adapters/{adapter_id}/versions/{version_id}/test-vectors: post: + tags: + - Sources And Providers + operationId: postAdaptersAdapterIdVersionsVersionIdTestVectors summary: Add adapter version test vector security: - bearerAuth: [] @@ -1318,6 +1594,9 @@ paths: $ref: "#/components/schemas/AdapterTestVector" /v1/adapters/{adapter_id}/versions/{version_id}:transition: post: + tags: + - Sources And Providers + operationId: postAdaptersAdapterIdVersionsVersionIdTransition summary: Transition adapter version through approval workflow description: Supported actions are submit_tests, request_review, approve_staging, activate, deprecate, and retire. security: @@ -1348,6 +1627,9 @@ paths: $ref: "#/components/schemas/AdapterVersion" /v1/endpoints: get: + tags: + - Endpoints And Routing + operationId: getEndpoints summary: List outbound endpoints security: - bearerAuth: [] @@ -1359,6 +1641,9 @@ paths: schema: $ref: "#/components/schemas/EndpointPage" post: + tags: + - Endpoints And Routing + operationId: postEndpoints summary: Create outbound endpoint security: - bearerAuth: [] @@ -1368,6 +1653,9 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateEndpointRequest" + example: + name: billing-receiver + url: https://receiver.example/webhook responses: "201": description: Endpoint created after SSRF validation. @@ -1377,6 +1665,9 @@ paths: $ref: "#/components/schemas/Endpoint" /v1/endpoints/{endpoint_id}: get: + tags: + - Endpoints And Routing + operationId: getEndpointsEndpointId summary: Get outbound endpoint security: - bearerAuth: [] @@ -1396,6 +1687,9 @@ paths: "404": description: Endpoint not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchEndpointsEndpointId summary: Update outbound endpoint metadata, URL, retry policy, or state description: URL updates rerun the SSRF policy before persistence. Signing secrets and mTLS key material are managed through dedicated endpoints. security: @@ -1422,6 +1716,9 @@ paths: "422": description: Endpoint URL blocked by SSRF policy. delete: + tags: + - Endpoints And Routing + operationId: deleteEndpointsEndpointId summary: Disable outbound endpoint description: Disables future delivery claims without deleting historical deliveries, attempts, or evidence. security: @@ -1447,6 +1744,9 @@ paths: $ref: "#/components/schemas/Endpoint" /v1/endpoints:validate-url: post: + tags: + - Endpoints And Routing + operationId: postEndpointsValidateUrl summary: Validate endpoint URL against SSRF policy security: - bearerAuth: [] @@ -1455,6 +1755,9 @@ paths: description: URL validation result. /v1/endpoints/{endpoint_id}:test: post: + tags: + - Endpoints And Routing + operationId: postEndpointsEndpointIdTest summary: Schedule a signed endpoint test delivery security: - bearerAuth: [] @@ -1475,6 +1778,9 @@ paths: description: Endpoint test delivery scheduled. /v1/endpoints/{endpoint_id}/secrets:rotate: post: + tags: + - Endpoints And Routing + operationId: postEndpointsEndpointIdSecretsRotate summary: Rotate endpoint signing secret description: Creates a new active endpoint signing secret version. Outbound deliveries include signing key metadata headers. security: @@ -1500,6 +1806,9 @@ paths: $ref: "#/components/schemas/EndpointSecretVersion" /v1/subscriptions: get: + tags: + - Endpoints And Routing + operationId: getSubscriptions summary: List subscriptions security: - bearerAuth: [] @@ -1511,6 +1820,9 @@ paths: schema: $ref: "#/components/schemas/SubscriptionPage" post: + tags: + - Endpoints And Routing + operationId: postSubscriptions summary: Create subscription security: - bearerAuth: [] @@ -1529,6 +1841,9 @@ paths: $ref: "#/components/schemas/Subscription" /v1/subscriptions/{subscription_id}: get: + tags: + - Endpoints And Routing + operationId: getSubscriptionsSubscriptionId summary: Get subscription security: - bearerAuth: [] @@ -1548,6 +1863,9 @@ paths: "404": description: Subscription not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchSubscriptionsSubscriptionId summary: Update subscription description: Updates fanout configuration and records a new immutable subscription version. security: @@ -1572,6 +1890,9 @@ paths: schema: $ref: "#/components/schemas/Subscription" delete: + tags: + - Endpoints And Routing + operationId: deleteSubscriptionsSubscriptionId summary: Disable subscription description: Disables future fanout without deleting historical deliveries or configuration evidence. security: @@ -1597,6 +1918,9 @@ paths: $ref: "#/components/schemas/Subscription" /v1/transformations: get: + tags: + - Schemas And Transformations + operationId: getTransformations summary: List deterministic transformations security: - bearerAuth: [] @@ -1610,6 +1934,9 @@ paths: schema: $ref: "#/components/schemas/TransformationPage" post: + tags: + - Schemas And Transformations + operationId: postTransformations summary: Create deterministic transformation security: - bearerAuth: [] @@ -1628,6 +1955,9 @@ paths: $ref: "#/components/schemas/Transformation" /v1/transformations/{transformation_id}: get: + tags: + - Schemas And Transformations + operationId: getTransformationsTransformationId summary: Get deterministic transformation security: - bearerAuth: [] @@ -1646,6 +1976,9 @@ paths: $ref: "#/components/schemas/Transformation" /v1/transformations/{transformation_id}/versions: get: + tags: + - Schemas And Transformations + operationId: getTransformationsTransformationIdVersions summary: List immutable transformation versions security: - bearerAuth: [] @@ -1664,6 +1997,9 @@ paths: schema: $ref: "#/components/schemas/TransformationVersionPage" post: + tags: + - Schemas And Transformations + operationId: postTransformationsTransformationIdVersions summary: Create immutable transformation version security: - bearerAuth: [] @@ -1688,6 +2024,9 @@ paths: $ref: "#/components/schemas/TransformationVersion" /v1/transformations/{transformation_id}/versions/{version_id}:activate: post: + tags: + - Schemas And Transformations + operationId: postTransformationsTransformationIdVersionsVersionIdActivate summary: Activate transformation version security: - bearerAuth: [] @@ -1717,6 +2056,9 @@ paths: $ref: "#/components/schemas/TransformationVersion" /v1/retry-policies: get: + tags: + - Endpoints And Routing + operationId: getRetryPolicies summary: List retry policies security: - bearerAuth: [] @@ -1728,6 +2070,9 @@ paths: schema: $ref: "#/components/schemas/RetryPolicyPage" post: + tags: + - Endpoints And Routing + operationId: postRetryPolicies summary: Create retry policy version security: - bearerAuth: [] @@ -1746,6 +2091,9 @@ paths: $ref: "#/components/schemas/RetryPolicy" /v1/retry-policies/{retry_policy_id}: get: + tags: + - Endpoints And Routing + operationId: getRetryPoliciesRetryPolicyId summary: Get retry policy security: - bearerAuth: [] @@ -1765,6 +2113,9 @@ paths: "404": description: Retry policy not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchRetryPoliciesRetryPolicyId summary: Create a new retry policy version from an existing policy description: Leaves the existing policy row intact and returns the new version row. security: @@ -1789,6 +2140,9 @@ paths: schema: $ref: "#/components/schemas/RetryPolicy" delete: + tags: + - Endpoints And Routing + operationId: deleteRetryPoliciesRetryPolicyId summary: Disable retry policy description: Disables future use of the referenced retry policy row without deleting historical delivery evidence. security: @@ -1814,6 +2168,9 @@ paths: $ref: "#/components/schemas/RetryPolicy" /v1/routes: get: + tags: + - Endpoints And Routing + operationId: getRoutes summary: List routes security: - bearerAuth: [] @@ -1825,6 +2182,9 @@ paths: schema: $ref: "#/components/schemas/RoutePage" post: + tags: + - Endpoints And Routing + operationId: postRoutes summary: Create route security: - bearerAuth: [] @@ -1834,6 +2194,15 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateRouteRequest" + example: + source_id: src_stripe + name: invoice-events + endpoint_id: end_billing + event_types: + - invoice.paid + - invoice.updated + priority: 100 + state: active responses: "201": description: Route created. @@ -1843,6 +2212,9 @@ paths: $ref: "#/components/schemas/Route" /v1/routes/{route_id}: get: + tags: + - Endpoints And Routing + operationId: getRoutesRouteId summary: Get route security: - bearerAuth: [] @@ -1862,6 +2234,9 @@ paths: "404": description: Route not found in this tenant. patch: + tags: + - Endpoints And Routing + operationId: patchRoutesRouteId summary: Update route description: Updates route matching/destination metadata and records a new immutable route version. security: @@ -1886,6 +2261,9 @@ paths: schema: $ref: "#/components/schemas/Route" delete: + tags: + - Endpoints And Routing + operationId: deleteRoutesRouteId summary: Inactivate route description: Moves the route to inactive without deleting historical decisions, deliveries, or version evidence. security: @@ -1911,6 +2289,9 @@ paths: $ref: "#/components/schemas/Route" /v1/routes/{route_id}/versions: get: + tags: + - Endpoints And Routing + operationId: getRoutesRouteIdVersions summary: List immutable route versions security: - bearerAuth: [] @@ -1930,6 +2311,9 @@ paths: $ref: "#/components/schemas/RouteVersionPage" /v1/routes/{route_id}:activate: post: + tags: + - Endpoints And Routing + operationId: postRoutesRouteIdActivate summary: Activate route security: - bearerAuth: [] @@ -1944,6 +2328,9 @@ paths: description: Route activated. /v1/routes/{route_id}:dry-run: post: + tags: + - Endpoints And Routing + operationId: postRoutesRouteIdDryRun summary: Dry-run route against event security: - bearerAuth: [] @@ -1958,6 +2345,9 @@ paths: description: Route match explanation. /v1/event-types: get: + tags: + - Schemas And Transformations + operationId: getEventTypes summary: List event types security: - bearerAuth: [] @@ -1969,6 +2359,9 @@ paths: schema: $ref: "#/components/schemas/EventTypePage" post: + tags: + - Schemas And Transformations + operationId: postEventTypes summary: Create event type security: - bearerAuth: [] @@ -1987,6 +2380,9 @@ paths: $ref: "#/components/schemas/EventType" /v1/event-types/{event_type}: get: + tags: + - Schemas And Transformations + operationId: getEventTypesEventType summary: Get event type security: - bearerAuth: [] @@ -2006,6 +2402,9 @@ paths: "404": description: Event type not found in this tenant. patch: + tags: + - Schemas And Transformations + operationId: patchEventTypesEventType summary: Update event type metadata or state description: Event type names remain immutable; delete disables the event type. security: @@ -2030,6 +2429,9 @@ paths: schema: $ref: "#/components/schemas/EventType" delete: + tags: + - Schemas And Transformations + operationId: deleteEventTypesEventType summary: Disable event type description: Event type delete is a state transition to disabled; historical schemas and evidence remain. security: @@ -2055,6 +2457,9 @@ paths: $ref: "#/components/schemas/EventType" /v1/event-types/{event_type}/schemas: get: + tags: + - Schemas And Transformations + operationId: getEventTypesEventTypeSchemas summary: List schemas for event type security: - bearerAuth: [] @@ -2072,6 +2477,9 @@ paths: schema: $ref: "#/components/schemas/EventSchemaPage" post: + tags: + - Schemas And Transformations + operationId: postEventTypesEventTypeSchemas summary: Create schema for event type security: - bearerAuth: [] @@ -2096,6 +2504,9 @@ paths: $ref: "#/components/schemas/EventSchema" /v1/event-types/{event_type}/schemas/{schema_version}: get: + tags: + - Schemas And Transformations + operationId: getEventTypesEventTypeSchemasSchemaVersion summary: Get schema for event type security: - bearerAuth: [] @@ -2120,6 +2531,9 @@ paths: "404": description: Schema not found in this tenant. patch: + tags: + - Schemas And Transformations + operationId: patchEventTypesEventTypeSchemasSchemaVersion summary: Update schema lifecycle state description: Schema body and version remain immutable; lifecycle state changes are audited and config-versioned. security: @@ -2149,6 +2563,9 @@ paths: schema: $ref: "#/components/schemas/EventSchema" delete: + tags: + - Schemas And Transformations + operationId: deleteEventTypesEventTypeSchemasSchemaVersion summary: Retire schema description: Schema delete is a state transition to retired; historical evidence remains. security: @@ -2179,6 +2596,9 @@ paths: $ref: "#/components/schemas/EventSchema" /v1/event-types/{event_type}/schemas/{schema_version}:validate: post: + tags: + - Schemas And Transformations + operationId: postEventTypesEventTypeSchemasSchemaVersionValidate summary: Validate payload against schema security: - bearerAuth: [] @@ -2208,6 +2628,9 @@ paths: $ref: "#/components/schemas/SchemaValidationResult" /v1/event-types/{event_type}/schemas/{schema_version}:check-compatibility: post: + tags: + - Schemas And Transformations + operationId: postEventTypesEventTypeSchemasSchemaVersionCheckCompatibility summary: Check schema compatibility description: Performs conservative JSON-object compatibility checks for required fields and property type changes. security: @@ -2238,13 +2661,70 @@ paths: $ref: "#/components/schemas/SchemaCompatibilityResult" /v1/events: get: + tags: + - Events And Ingestion + operationId: getEvents summary: Search events security: - bearerAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + minimum: 1 + maximum: 100 + - name: provider + in: query + schema: + type: string + description: Provider name, such as `stripe`, `github`, or `shopify`. + - name: external_id + in: query + schema: + type: string + description: Provider event ID, such as a Stripe event ID. + - name: delivery_id + in: query + schema: + type: string + description: Webhookery delivery ID linked to the event. + - name: status + in: query + schema: + type: string + enum: [dlq, dead_lettered] + description: Forensic status preset. `dlq` and `dead_lettered` return events with open DLQ evidence. + - name: verification + in: query + schema: + type: string + enum: [valid, invalid] + description: Provider signature verification result. + - name: received_after + in: query + schema: + type: string + format: date-time + description: Lower bound for event receipt time. + - name: route_id + in: query + schema: + type: string + description: Route ID with delivery evidence linked to the event. responses: "200": description: Paginated event list. + content: + application/json: + schema: + $ref: "#/components/schemas/EventPage" + "400": + $ref: "#/components/responses/ValidationProblem" post: + tags: + - Events And Ingestion + operationId: postEvents summary: Ingest product event description: Accepts product events from management API keys, producer OAuth bearer tokens, or verified producer mTLS identities. Source-bound credentials must match the submitted `source_id`. security: @@ -2256,15 +2736,36 @@ paths: application/json: schema: $ref: "#/components/schemas/ProductEventIngestRequest" + example: + source_id: src_internal + id: evt_demo_001 + type: demo.created + data: + ok: true responses: "202": description: Product event accepted after durable capture. + content: + application/json: + example: + Accepted: true + EventID: evt_example + ReceiptID: rcp_example + RawPayloadID: raw_example + TraceID: req_example + VerifyReason: verified + DedupeStatus: unique + "400": + $ref: "#/components/responses/ValidationProblem" "401": - $ref: "#/components/responses/Problem" + $ref: "#/components/responses/UnauthorizedProblem" "403": - $ref: "#/components/responses/Problem" + $ref: "#/components/responses/ForbiddenProblem" /v1/events/{event_id}: get: + tags: + - Events And Ingestion + operationId: getEventsEventId summary: Get event security: - bearerAuth: [] @@ -2277,10 +2778,22 @@ paths: responses: "200": description: Event detail. + content: + application/json: + example: + id: evt_example + source_id: src_internal + event_type: demo.created + verification_status: verified + dedupe_status: unique + created_at: "2026-05-26T12:00:00Z" "404": - description: Event not found or not visible. + $ref: "#/components/responses/NotFoundProblem" /v1/events/{event_id}/raw: get: + tags: + - Events And Ingestion + operationId: getEventsEventIdRaw summary: Get raw payload evidence security: - bearerAuth: [] @@ -2290,15 +2803,38 @@ paths: required: true schema: type: string + - name: reason + in: query + required: true + description: Operator reason for elevated raw payload access. The reason is recorded on the raw_payload.read audit event. + schema: + type: string + minLength: 1 + maxLength: 500 responses: "200": description: Raw body as base64 plus hash and storage metadata. + content: + application/json: + example: + event_id: evt_example + raw_payload_hash: sha256:0f343b0931126a20f133d67c2b018a3b + content_type: application/json + size_bytes: 42 + storage_backend: postgres + storage_status: stored + body_base64: eyJpZCI6ImV2dF9leGFtcGxlIn0= + "400": + $ref: "#/components/responses/ValidationProblem" "403": - description: Actor lacks raw payload permission. + $ref: "#/components/responses/ForbiddenProblem" "410": description: Raw payload body has expired or was deleted by retention; metadata remains on related records. /v1/events/{event_id}/normalized: get: + tags: + - Events And Ingestion + operationId: getEventsEventIdNormalized summary: Get normalized event evidence description: Metadata and hashes require events:read. Including normalized data requires events:raw and writes an audit event. security: @@ -2323,11 +2859,14 @@ paths: schema: $ref: "#/components/schemas/NormalizedEnvelope" "403": - description: Actor lacks events:raw for include_data=true. + $ref: "#/components/responses/ForbiddenProblem" "410": description: Normalized data was deleted by retention; metadata and hashes remain. /v1/events/{event_id}/timeline: get: + tags: + - Events And Ingestion + operationId: getEventsEventIdTimeline summary: Get event timeline security: - bearerAuth: [] @@ -2341,8 +2880,237 @@ paths: responses: "200": description: Timeline entries for event, receipts, deliveries, attempts, and audit records. + content: + application/json: + schema: + $ref: "#/components/schemas/EventTimelinePage" + /v1/incidents: + get: + tags: + - Incidents + operationId: getIncidents + summary: List webhook incidents + security: + - bearerAuth: [] + parameters: + - $ref: "#/components/parameters/Limit" + responses: + "200": + description: Paginated tenant-scoped incidents. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentPage" + post: + tags: + - Incidents + operationId: postIncidents + summary: Create webhook incident + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateIncidentRequest" + example: + title: "Stripe payment webhook failed" + reason: "customer support investigation" + responses: + "201": + description: Incident created. + content: + application/json: + schema: + $ref: "#/components/schemas/Incident" + "403": + $ref: "#/components/responses/ForbiddenProblem" + /v1/incidents/{incident_id}: + get: + tags: + - Incidents + operationId: getIncidentsIncidentId + summary: Get webhook incident + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + responses: + "200": + description: Incident metadata. + content: + application/json: + schema: + $ref: "#/components/schemas/Incident" + "404": + description: Incident not found or not visible. + /v1/incidents/{incident_id}/events: + post: + tags: + - Incidents + operationId: postIncidentsIncidentIdEvents + summary: Attach event to incident + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/AddIncidentEventRequest" + example: + event_id: evt_example + reason: "failed downstream delivery" + responses: + "201": + description: Event attached to incident. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentEvent" + "404": + description: Incident or event not found in the actor tenant. + /v1/incidents/{incident_id}/events/{event_id}: + delete: + tags: + - Incidents + operationId: deleteIncidentsIncidentIdEventsEventId + summary: Remove event from incident + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + - name: event_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/StateChangeRequest" + example: + reason: "event not related to this incident" + responses: + "200": + description: Event removed from incident. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentEvent" + /v1/incidents/{incident_id}/generate-report: + post: + tags: + - Incidents + operationId: postIncidentsIncidentIdGenerateReport + summary: Generate incident report snapshot + description: Generates a tenant-scoped JSON and Markdown report from attached event timelines. Raw payload bodies are omitted by default. + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentReportRequest" + example: + reason: "support handoff" + responses: + "201": + description: Incident report snapshot generated. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentReportSnapshot" + /v1/incidents/{incident_id}/report: + get: + tags: + - Incidents + operationId: getIncidentsIncidentIdReport + summary: Get latest incident report snapshot + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + - name: format + in: query + required: false + schema: + type: string + enum: [json, markdown] + default: json + responses: + "200": + description: Latest incident report snapshot. + content: + application/json: + schema: + $ref: "#/components/schemas/IncidentReportSnapshot" + text/markdown: + schema: + type: string + /v1/incidents/{incident_id}/evidence-export: + post: + tags: + - Incidents + operationId: postIncidentsIncidentIdEvidenceExport + summary: Create incident evidence export + description: Creates a tenant-scoped evidence bundle that includes incident_report.json and incident_report.md. Raw payload bodies are excluded. + security: + - bearerAuth: [] + parameters: + - name: incident_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateIncidentEvidenceExportRequest" + example: + reason: "customer evidence package" + responses: + "202": + description: Incident evidence export created. + content: + application/json: + schema: + $ref: "#/components/schemas/EvidenceExport" /v1/ingest/{tenant_id}/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestTenantIdSourceId summary: Generic provider webhook ingestion parameters: - name: tenant_id @@ -2355,70 +3123,113 @@ paths: required: true schema: type: string + requestBody: + required: true + content: + application/json: + schema: + type: object + additionalProperties: true + example: + id: evt_provider_123 + type: invoice.paid + data: + object: invoice responses: "200": description: Accepted after durable capture. + content: + application/json: + example: + Accepted: true + EventID: evt_example + ReceiptID: rcp_example + RawPayloadID: raw_example + TraceID: "" + VerifyReason: verified + DedupeStatus: unique "401": - description: Invalid signature. + $ref: "#/components/responses/UnauthorizedProblem" "413": - description: Payload too large. + $ref: "#/components/responses/PayloadTooLargeProblem" "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" "503": - description: Durable storage unavailable before acknowledgement. + $ref: "#/components/responses/StorageUnavailableProblem" /v1/ingest/stripe/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestStripeSourceId summary: Stripe webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/github/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestGithubSourceId summary: GitHub webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/shopify/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestShopifySourceId summary: Shopify webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/slack/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestSlackSourceId summary: Slack webhook ingestion responses: "200": description: Accepted after durable capture. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/cloudevents/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestCloudeventsSourceId summary: CloudEvents webhook ingestion responses: "200": description: Accepted after durable capture for binary or structured CloudEvents envelopes. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/ingest/generic-jwt/{source_id}: post: + tags: + - Events And Ingestion + operationId: postIngestGenericJwtSourceId summary: Generic JWT/JWS webhook ingestion - description: Accepts compact JWT/JWS signatures from `Authorization: Bearer ...` or `Webhook-JWT`; only HS256 is supported and the token must include `exp` plus a `body_sha256` claim for the exact raw body. + description: "Accepts compact JWT/JWS signatures from `Authorization: Bearer ...` or `Webhook-JWT`; only HS256 is supported and the token must include `exp` plus a `body_sha256` claim for the exact raw body." responses: "200": description: Accepted after durable capture. "401": description: Invalid or unsupported JWT signature. "431": - description: Request headers too large. + $ref: "#/components/responses/HeadersTooLargeProblem" /v1/deliveries: get: + tags: + - Delivery And Replay + operationId: getDeliveries summary: List deliveries security: - bearerAuth: [] @@ -2427,6 +3238,9 @@ paths: description: Paginated delivery list, including retry seed evidence for reproducible scheduling. /v1/deliveries/{delivery_id}/attempts: get: + tags: + - Delivery And Replay + operationId: getDeliveriesDeliveryIdAttempts summary: List delivery attempts security: - bearerAuth: [] @@ -2441,6 +3255,9 @@ paths: description: Paginated delivery attempts, including deterministic retry delay and next retry timestamp when retryable. /v1/deliveries/{delivery_id}:retry: post: + tags: + - Delivery And Replay + operationId: postDeliveriesDeliveryIdRetry summary: Manually retry delivery security: - bearerAuth: [] @@ -2461,6 +3278,9 @@ paths: description: Delivery retry scheduled. /v1/deliveries/{delivery_id}:cancel: post: + tags: + - Delivery And Replay + operationId: postDeliveriesDeliveryIdCancel summary: Cancel scheduled delivery security: - bearerAuth: [] @@ -2481,6 +3301,9 @@ paths: description: Delivery canceled. /v1/delivery-attempts/{attempt_id}: get: + tags: + - Delivery And Replay + operationId: getDeliveryAttemptsAttemptId summary: Get delivery attempt security: - bearerAuth: [] @@ -2495,6 +3318,9 @@ paths: description: Delivery attempt detail, including deterministic retry delay evidence when retryable. /v1/replay-jobs:dry-run: post: + tags: + - Delivery And Replay + operationId: postReplayJobsDryRun summary: Dry-run replay security: - bearerAuth: [] @@ -2507,8 +3333,29 @@ paths: responses: "200": description: Replay dry-run result. + /v1/replay-jobs/preview: + post: + tags: + - Delivery And Replay + operationId: postReplayJobsPreview + summary: Preview replay + description: Additive alias for replay dry-run. It validates the same request and does not create replay jobs or delivery work. + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayRequest" + responses: + "200": + description: Replay preview result. /v1/replay-jobs: get: + tags: + - Delivery And Replay + operationId: getReplayJobs summary: List replay jobs security: - bearerAuth: [] @@ -2516,6 +3363,9 @@ paths: "200": description: Paginated replay job list. post: + tags: + - Delivery And Replay + operationId: postReplayJobs summary: Create replay job security: - bearerAuth: [] @@ -2525,13 +3375,40 @@ paths: application/json: schema: $ref: "#/components/schemas/ReplayRequest" + example: + event_id: evt_example + endpoint_id: end_example + reason_code: receiver_fixed + reason: "customer requested replay after downstream fix" + config_mode: current + rate_limit_per_minute: 60 + require_approval: true + approval_expires_at: "2026-06-05T12:00:00Z" responses: "202": description: Replay job scheduled or pending approval when require_approval is true. + content: + application/json: + example: + id: rpl_example + state: pending_approval + scope_hash: sha256:2c26b46b68ffc68ff99b453c1d304134 + reason_code: receiver_fixed + reason: "customer requested replay after downstream fix" + config_mode: current + rate_limit_per_minute: 60 + total_items: 1 + processed_items: 0 + failed_items: 0 + approval_required: true + approval_expires_at: "2026-06-05T12:00:00Z" /v1/replay-jobs/{replay_job_id}:approve: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdApprove summary: Approve pending replay job - description: Moves a tenant-scoped pending replay job into scheduled state and enqueues durable replay work. + description: Moves a tenant-scoped pending replay job into scheduled state and enqueues durable replay work. The approving actor must be different from the creator and the approval window must not be expired. security: - bearerAuth: [] parameters: @@ -2551,6 +3428,9 @@ paths: description: Replay job approved and scheduled. /v1/replay-jobs/{replay_job_id}:pause: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdPause summary: Pause replay job security: - bearerAuth: [] @@ -2571,6 +3451,9 @@ paths: description: Replay job paused. /v1/replay-jobs/{replay_job_id}:resume: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdResume summary: Resume replay job security: - bearerAuth: [] @@ -2591,6 +3474,9 @@ paths: description: Replay job scheduled again. /v1/replay-jobs/{replay_job_id}:cancel: post: + tags: + - Delivery And Replay + operationId: postReplayJobsReplayJobIdCancel summary: Cancel replay job security: - bearerAuth: [] @@ -2609,8 +3495,81 @@ paths: responses: "200": description: Replay job canceled. + /v1/replay-approval-policies: + get: + tags: + - Delivery And Replay + operationId: getReplayApprovalPolicies + summary: List replay approval policies + description: Lists tenant-scoped policies that automatically require replay approval for tenant, source, or route scopes. + security: + - bearerAuth: [] + responses: + "200": + description: Paginated replay approval policy list. + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayApprovalPolicyPage" + post: + tags: + - Delivery And Replay + operationId: postReplayApprovalPolicies + summary: Create or reactivate replay approval policy + description: Creates or reactivates an active policy that makes matching replay jobs pending approval before delivery work is enqueued. + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateReplayApprovalPolicyRequest" + example: + scope_type: source + scope_id: src_stripe + default_expiry_seconds: 86400 + reason: "payments source requires maker-checker replay" + responses: + "201": + description: Replay approval policy created or reactivated. + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayApprovalPolicy" + /v1/replay-approval-policies/{policy_id}: + delete: + tags: + - Delivery And Replay + operationId: deleteReplayApprovalPoliciesPolicyId + summary: Disable replay approval policy + description: Disables a tenant-scoped replay approval policy. Historical pending jobs keep their approval requirement. + security: + - bearerAuth: [] + parameters: + - name: policy_id + in: path + required: true + schema: + type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/ReasonRequest" + responses: + "200": + description: Replay approval policy disabled. + content: + application/json: + schema: + $ref: "#/components/schemas/ReplayApprovalPolicy" /v1/reconciliation-jobs:dry-run: post: + tags: + - Reconciliation + operationId: postReconciliationJobsDryRun summary: Dry-run provider reconciliation description: Reports expected matched, missing, recoverable, redelivery, or unrecoverable outcomes without capturing recovered events or requesting redelivery. security: @@ -2630,6 +3589,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/reconciliation-jobs: get: + tags: + - Reconciliation + operationId: getReconciliationJobs summary: List provider reconciliation jobs security: - bearerAuth: [] @@ -2643,6 +3605,9 @@ paths: schema: $ref: "#/components/schemas/ReconciliationJobPage" post: + tags: + - Reconciliation + operationId: postReconciliationJobs summary: Create provider reconciliation job description: Recovered provider API events are not marked as signed webhooks and route only when `route_recovered=true`. security: @@ -2676,6 +3641,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/reconciliation-jobs/{job_id}: get: + tags: + - Reconciliation + operationId: getReconciliationJobsJobId summary: Get provider reconciliation job security: - bearerAuth: [] @@ -2694,6 +3662,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/reconciliation-jobs/{job_id}/items: get: + tags: + - Reconciliation + operationId: getReconciliationJobsJobIdItems summary: List provider reconciliation gap items security: - bearerAuth: [] @@ -2713,6 +3684,9 @@ paths: $ref: "#/components/schemas/ReconciliationItemPage" /v1/reconciliation-jobs/{job_id}:cancel: post: + tags: + - Reconciliation + operationId: postReconciliationJobsJobIdCancel summary: Cancel provider reconciliation job security: - bearerAuth: [] @@ -2737,6 +3711,9 @@ paths: $ref: "#/components/schemas/ReconciliationJob" /v1/dead-letter: get: + tags: + - Delivery And Replay + operationId: getDeadLetter summary: List dead-letter entries security: - bearerAuth: [] @@ -2745,6 +3722,9 @@ paths: description: Paginated dead-letter list. /v1/dead-letter/{entry_id}:release: post: + tags: + - Delivery And Replay + operationId: postDeadLetterEntryIdRelease summary: Release dead-letter entry into replay work security: - bearerAuth: [] @@ -2754,11 +3734,20 @@ paths: required: true schema: type: string + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DeadLetterReleaseRequest" responses: "202": description: Replay job scheduled for dead-letter entry. /v1/dead-letter:bulk-release: post: + tags: + - Delivery And Replay + operationId: postDeadLetterBulkRelease summary: Bulk release dead-letter entries security: - bearerAuth: [] @@ -2773,6 +3762,9 @@ paths: description: Replay jobs scheduled for matching dead-letter entries. /v1/quarantine: get: + tags: + - Delivery And Replay + operationId: getQuarantine summary: List quarantine entries security: - bearerAuth: [] @@ -2781,6 +3773,9 @@ paths: description: Paginated quarantine list. /v1/quarantine/{entry_id}:approve: post: + tags: + - Delivery And Replay + operationId: postQuarantineEntryIdApprove summary: Approve quarantine entry security: - bearerAuth: [] @@ -2795,6 +3790,9 @@ paths: description: Quarantine entry approved. /v1/quarantine/{entry_id}:reject: post: + tags: + - Delivery And Replay + operationId: postQuarantineEntryIdReject summary: Reject quarantine entry security: - bearerAuth: [] @@ -2809,6 +3807,9 @@ paths: description: Quarantine entry rejected. /v1/audit-events: get: + tags: + - Audit And Retention + operationId: getAuditEvents summary: List audit events security: - bearerAuth: [] @@ -2817,6 +3818,9 @@ paths: description: Paginated audit event list. /v1/audit-chain/head: get: + tags: + - Audit And Retention + operationId: getAuditChainHead summary: Get audit chain head security: - bearerAuth: [] @@ -2829,6 +3833,9 @@ paths: $ref: "#/components/schemas/AuditChainHead" /v1/audit-chain:verify: post: + tags: + - Audit And Retention + operationId: postAuditChainVerify summary: Verify audit chain continuity security: - bearerAuth: [] @@ -2847,6 +3854,9 @@ paths: $ref: "#/components/schemas/AuditChainVerification" /v1/audit-chain:anchor: post: + tags: + - Audit And Retention + operationId: postAuditChainAnchor summary: Anchor a verified audit chain range security: - bearerAuth: [] @@ -2867,6 +3877,9 @@ paths: description: Actor lacks security:write. /v1/audit-chain/anchors: get: + tags: + - Audit And Retention + operationId: getAuditChainAnchors summary: List audit chain anchors security: - bearerAuth: [] @@ -2881,6 +3894,9 @@ paths: $ref: "#/components/schemas/AuditChainAnchorPage" /v1/audit-chain/anchors/{anchor_id}: get: + tags: + - Audit And Retention + operationId: getAuditChainAnchorsAnchorId summary: Get audit chain anchor security: - bearerAuth: [] @@ -2901,6 +3917,9 @@ paths: description: Anchor not found. /v1/audit-events:export: post: + tags: + - Audit And Retention + operationId: postAuditEventsExport summary: Create audit evidence export description: Creates a tenant-scoped tar.gz bundle containing manifest.json, audit_events.jsonl, and optional timeline or raw payload JSONL files. Raw payload bodies require events:raw in addition to audit:read. security: @@ -2911,6 +3930,13 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateAuditExportRequest" + example: + from: "2026-05-26T00:00:00Z" + to: "2026-05-26T23:59:59Z" + include_raw_payloads: true + include_payload_bodies: false + include_timelines: true + reason: "support evidence package" responses: "202": description: Export created and ready for download. @@ -2918,10 +3944,27 @@ paths: application/json: schema: $ref: "#/components/schemas/EvidenceExport" + example: + id: exp_example + tenant_id: ten_example + state: ready + include_raw_payloads: true + include_timelines: true + include_payload_bodies: false + format: tar+gzip+jsonl + storage_backend: postgres + sha256: sha256:2c26b46b68ffc68ff99b453c1d304134 + manifest_sha256: sha256:fcde2b2edba56bf408601fb721fe9b5c + size_bytes: 4096 + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" "403": description: Actor lacks audit:read or requested raw payload bodies without events:raw. /v1/audit-exports: get: + tags: + - Audit And Retention + operationId: getAuditExports summary: List audit evidence exports security: - bearerAuth: [] @@ -2936,6 +3979,9 @@ paths: $ref: "#/components/schemas/EvidenceExportPage" /v1/audit-exports/{export_id}: get: + tags: + - Audit And Retention + operationId: getAuditExportsExportId summary: Get audit evidence export status security: - bearerAuth: [] @@ -2956,6 +4002,9 @@ paths: description: Export not found or not visible. /v1/audit-exports/{export_id}:download: get: + tags: + - Audit And Retention + operationId: getAuditExportsExportIdDownload summary: Download audit evidence export bundle security: - bearerAuth: [] @@ -2983,6 +4032,9 @@ paths: description: Export bundle is unavailable. /v1/admin/retention-policies: get: + tags: + - Audit And Retention + operationId: getAdminRetentionPolicies summary: List retention policies security: - bearerAuth: [] @@ -2996,6 +4048,9 @@ paths: schema: $ref: "#/components/schemas/RetentionPolicyPage" post: + tags: + - Audit And Retention + operationId: postAdminRetentionPolicies summary: Create or update retention policy security: - bearerAuth: [] @@ -3014,6 +4069,9 @@ paths: $ref: "#/components/schemas/RetentionPolicy" /v1/admin/retention-policies/{policy_id}: patch: + tags: + - Audit And Retention + operationId: patchAdminRetentionPoliciesPolicyId summary: Update retention policy security: - bearerAuth: [] @@ -3038,6 +4096,9 @@ paths: $ref: "#/components/schemas/RetentionPolicy" /v1/endpoint-health: get: + tags: + - Operations + operationId: getEndpointHealth summary: List endpoint health security: - bearerAuth: [] @@ -3050,6 +4111,9 @@ paths: $ref: "#/components/schemas/EndpointHealthPage" /v1/ops/metrics: get: + tags: + - Operations + operationId: getOpsMetrics summary: Get tenant ops metrics security: - bearerAuth: [] @@ -3062,6 +4126,9 @@ paths: $ref: "#/components/schemas/OpsMetrics" /v1/ops/metrics/rollups: get: + tags: + - Operations + operationId: getOpsMetricsRollups summary: List tenant metrics rollups description: Returns tenant-scoped derived operational rollups. Rollups summarize state for dashboards and alerts and are not evidence source-of-truth. security: @@ -3085,6 +4152,9 @@ paths: description: Invalid metric filter. /v1/ops/storage: get: + tags: + - Operations + operationId: getOpsStorage summary: Get tenant storage status description: Returns redacted storage metadata and tenant-scoped counts without object-store credentials or raw payload bodies. security: @@ -3098,6 +4168,9 @@ paths: $ref: "#/components/schemas/OpsStorageStatus" /v1/ops/config: get: + tags: + - Operations + operationId: getOpsConfig summary: Get redacted runtime configuration description: Returns safe runtime configuration metadata only; secret values and connection strings are never included. security: @@ -3111,6 +4184,9 @@ paths: $ref: "#/components/schemas/OpsConfig" /v1/ops/workers: get: + tags: + - Operations + operationId: getOpsWorkers summary: List worker leases description: Lists runtime worker lease metadata only; no tenant payload data is exposed. security: @@ -3126,6 +4202,9 @@ paths: $ref: "#/components/schemas/WorkerStatusPage" /v1/ops/workers/{worker_id}: get: + tags: + - Operations + operationId: getOpsWorkersWorkerId summary: Get worker lease security: - bearerAuth: [] @@ -3146,6 +4225,9 @@ paths: description: Worker lease not found. /v1/ops/queues: get: + tags: + - Operations + operationId: getOpsQueues summary: List tenant queue stats description: Returns tenant-scoped durable outbox and delivery queue counts. security: @@ -3159,6 +4241,9 @@ paths: $ref: "#/components/schemas/QueueStatsPage" /v1/alerts: get: + tags: + - Operations + operationId: getAlerts summary: List alert rules security: - bearerAuth: [] @@ -3172,6 +4257,9 @@ paths: schema: $ref: "#/components/schemas/AlertRulePage" post: + tags: + - Operations + operationId: postAlerts summary: Create alert rule security: - bearerAuth: [] @@ -3181,6 +4269,14 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateAlertRuleRequest" + example: + name: "DLQ backlog" + rule_type: dead_letter_open + metric_name: dead_letter.open + threshold: 0 + comparator: ">" + window_seconds: 300 + state: active responses: "201": description: Alert rule created. @@ -3188,10 +4284,26 @@ paths: application/json: schema: $ref: "#/components/schemas/AlertRule" + example: + id: alr_example + tenant_id: ten_example + name: "DLQ backlog" + rule_type: dead_letter_open + metric_name: dead_letter.open + threshold: 0 + comparator: ">" + window_seconds: 300 + state: active + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" + updated_at: "2026-05-26T12:00:00Z" "403": description: Actor lacks ops:write. /v1/alerts/{alert_id}: get: + tags: + - Operations + operationId: getAlertsAlertId summary: Get alert rule security: - bearerAuth: [] @@ -3209,6 +4321,9 @@ paths: schema: $ref: "#/components/schemas/AlertRule" patch: + tags: + - Operations + operationId: patchAlertsAlertId summary: Update alert rule security: - bearerAuth: [] @@ -3232,6 +4347,9 @@ paths: schema: $ref: "#/components/schemas/AlertRule" delete: + tags: + - Operations + operationId: deleteAlertsAlertId summary: Disable alert rule description: Disables the rule and preserves historical firings. security: @@ -3257,6 +4375,9 @@ paths: $ref: "#/components/schemas/AlertRule" /v1/alert-firings: get: + tags: + - Operations + operationId: getAlertFirings summary: List alert firings security: - bearerAuth: [] @@ -3277,6 +4398,9 @@ paths: $ref: "#/components/schemas/AlertFiringPage" /v1/alert-firings/{firing_id}: get: + tags: + - Operations + operationId: getAlertFiringsFiringId summary: Get alert firing security: - bearerAuth: [] @@ -3295,6 +4419,9 @@ paths: $ref: "#/components/schemas/AlertFiring" /v1/alert-firings/{firing_id}:acknowledge: post: + tags: + - Operations + operationId: postAlertFiringsFiringIdAcknowledge summary: Acknowledge alert firing security: - bearerAuth: [] @@ -3319,6 +4446,9 @@ paths: $ref: "#/components/schemas/AlertFiring" /v1/notification-channels: get: + tags: + - Signal Egress + operationId: getNotificationChannels summary: List notification channels description: Lists tenant-scoped generic HTTPS webhook alert notification channels. Signing secrets are never returned. security: @@ -3333,6 +4463,9 @@ paths: schema: $ref: "#/components/schemas/NotificationChannelPage" post: + tags: + - Signal Egress + operationId: postNotificationChannels summary: Create notification channel security: - bearerAuth: [] @@ -3342,6 +4475,11 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateNotificationChannelRequest" + example: + name: "Ops webhook" + channel_type: webhook + url: "https://alerts.example.com/webhook" + signing_secret: "" responses: "201": description: Notification channel created. @@ -3349,8 +4487,22 @@ paths: application/json: schema: $ref: "#/components/schemas/NotificationChannel" + example: + id: nch_example + tenant_id: ten_example + name: "Ops webhook" + channel_type: webhook + url: "https://alerts.example.com/webhook" + state: active + secret_hint: configured + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" + updated_at: "2026-05-26T12:00:00Z" /v1/notification-channels/{channel_id}: get: + tags: + - Signal Egress + operationId: getNotificationChannelsChannelId summary: Get notification channel security: - bearerAuth: [] @@ -3368,6 +4520,9 @@ paths: schema: $ref: "#/components/schemas/NotificationChannel" patch: + tags: + - Signal Egress + operationId: patchNotificationChannelsChannelId summary: Update notification channel security: - bearerAuth: [] @@ -3391,6 +4546,9 @@ paths: schema: $ref: "#/components/schemas/NotificationChannel" delete: + tags: + - Signal Egress + operationId: deleteNotificationChannelsChannelId summary: Disable notification channel security: - bearerAuth: [] @@ -3415,6 +4573,9 @@ paths: $ref: "#/components/schemas/NotificationChannel" /v1/notification-channels/{channel_id}:test: post: + tags: + - Signal Egress + operationId: postNotificationChannelsChannelIdTest summary: Queue a test notification delivery security: - bearerAuth: [] @@ -3439,6 +4600,9 @@ paths: $ref: "#/components/schemas/NotificationDelivery" /v1/notification-deliveries: get: + tags: + - Signal Egress + operationId: getNotificationDeliveries summary: List notification deliveries security: - bearerAuth: [] @@ -3459,6 +4623,9 @@ paths: $ref: "#/components/schemas/NotificationDeliveryPage" /v1/notification-deliveries/{delivery_id}/attempts: get: + tags: + - Signal Egress + operationId: getNotificationDeliveriesDeliveryIdAttempts summary: List notification delivery attempts security: - bearerAuth: [] @@ -3478,6 +4645,9 @@ paths: $ref: "#/components/schemas/NotificationDeliveryAttemptPage" /v1/notification-deliveries/{delivery_id}:retry: post: + tags: + - Signal Egress + operationId: postNotificationDeliveriesDeliveryIdRetry summary: Retry notification delivery security: - bearerAuth: [] @@ -3502,6 +4672,9 @@ paths: $ref: "#/components/schemas/NotificationDelivery" /v1/siem-sinks: get: + tags: + - Signal Egress + operationId: getSiemSinks summary: List SIEM sinks description: Lists tenant-scoped generic HTTPS audit-chain stream sinks. Signing secrets are never returned. security: @@ -3516,6 +4689,9 @@ paths: schema: $ref: "#/components/schemas/SIEMSinkPage" post: + tags: + - Signal Egress + operationId: postSiemSinks summary: Create SIEM sink security: - bearerAuth: [] @@ -3525,6 +4701,11 @@ paths: application/json: schema: $ref: "#/components/schemas/CreateSIEMSinkRequest" + example: + name: "Security log stream" + sink_type: webhook + url: "https://siem.example.com/ingest" + signing_secret: "" responses: "201": description: SIEM sink created. @@ -3532,8 +4713,23 @@ paths: application/json: schema: $ref: "#/components/schemas/SIEMSink" + example: + id: siem_example + tenant_id: ten_example + name: "Security log stream" + sink_type: webhook + url: "https://siem.example.com/ingest" + state: active + secret_hint: configured + cursor_sequence: 0 + created_by: usr_example + created_at: "2026-05-26T12:00:00Z" + updated_at: "2026-05-26T12:00:00Z" /v1/siem-sinks/{sink_id}: get: + tags: + - Signal Egress + operationId: getSiemSinksSinkId summary: Get SIEM sink security: - bearerAuth: [] @@ -3551,6 +4747,9 @@ paths: schema: $ref: "#/components/schemas/SIEMSink" patch: + tags: + - Signal Egress + operationId: patchSiemSinksSinkId summary: Update SIEM sink security: - bearerAuth: [] @@ -3574,6 +4773,9 @@ paths: schema: $ref: "#/components/schemas/SIEMSink" delete: + tags: + - Signal Egress + operationId: deleteSiemSinksSinkId summary: Disable SIEM sink security: - bearerAuth: [] @@ -3598,6 +4800,9 @@ paths: $ref: "#/components/schemas/SIEMSink" /v1/siem-sinks/{sink_id}:test: post: + tags: + - Signal Egress + operationId: postSiemSinksSinkIdTest summary: Queue a test SIEM delivery security: - bearerAuth: [] @@ -3622,6 +4827,9 @@ paths: $ref: "#/components/schemas/SIEMDelivery" /v1/siem-deliveries: get: + tags: + - Signal Egress + operationId: getSiemDeliveries summary: List SIEM deliveries security: - bearerAuth: [] @@ -3642,6 +4850,9 @@ paths: $ref: "#/components/schemas/SIEMDeliveryPage" /v1/siem-deliveries/{delivery_id}/attempts: get: + tags: + - Signal Egress + operationId: getSiemDeliveriesDeliveryIdAttempts summary: List SIEM delivery attempts security: - bearerAuth: [] @@ -3661,6 +4872,9 @@ paths: $ref: "#/components/schemas/SIEMDeliveryAttemptPage" /v1/siem-deliveries/{delivery_id}:retry: post: + tags: + - Signal Egress + operationId: postSiemDeliveriesDeliveryIdRetry summary: Retry SIEM delivery security: - bearerAuth: [] @@ -3709,10 +4923,117 @@ components: application/problem+json: schema: $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Forbidden + status: 403 + code: authorization_error + stable_code: WEBHOOKERY_TENANT_ACCESS_DENIED + request_id: req_example + retryable: false + ValidationProblem: + description: The request is malformed or violates validation rules. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Bad request + status: 400 + code: validation_error + stable_code: WEBHOOKERY_VALIDATION_FAILED + detail: Invalid JSON body. + request_id: req_example + retryable: false + UnauthorizedProblem: + description: Authentication is missing, invalid, or expired. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Unauthorized + status: 401 + code: authentication_error + stable_code: WEBHOOKERY_AUTHENTICATION_REQUIRED + request_id: req_example + retryable: false + ForbiddenProblem: + description: The authenticated actor lacks the required role, scope, tenant membership, or raw-payload permission. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Forbidden + status: 403 + code: authorization_error + stable_code: WEBHOOKERY_TENANT_ACCESS_DENIED + request_id: req_example + retryable: false + NotFoundProblem: + description: The resource does not exist or is not visible to the actor. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Not found + status: 404 + code: not_found + stable_code: WEBHOOKERY_RESOURCE_NOT_FOUND + request_id: req_example + retryable: false + PayloadTooLargeProblem: + description: The request body exceeds the configured capture limit. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Payload too large + status: 413 + code: payload_too_large + stable_code: WEBHOOKERY_PAYLOAD_TOO_LARGE + request_id: req_example + retryable: false + HeadersTooLargeProblem: + description: Header count or header bytes exceed the configured ingress limits. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Headers too large + status: 431 + code: headers_too_large + stable_code: WEBHOOKERY_HEADERS_TOO_LARGE + request_id: req_example + retryable: false + StorageUnavailableProblem: + description: Durable storage was unavailable before acknowledgement. + content: + application/problem+json: + schema: + $ref: "#/components/schemas/Problem" + example: + type: about:blank + title: Internal server error + status: 503 + code: storage_unavailable + stable_code: WEBHOOKERY_DURABLE_CAPTURE_UNAVAILABLE + request_id: req_example + retryable: true schemas: Problem: type: object - required: [type, title, status, code, request_id] + required: [type, title, status, code, stable_code, request_id] properties: type: type: string @@ -3724,6 +5045,12 @@ components: type: string code: type: string + description: Legacy short problem code retained for compatibility. + stable_code: + type: string + description: Namespaced stable code for SDK, CLI, support, and incident handling. + examples: + - WEBHOOKERY_PROVIDER_SIGNATURE_INVALID request_id: type: string retryable: @@ -5049,6 +6376,212 @@ components: nullable: true has_more: type: boolean + CreateIncidentRequest: + type: object + required: [title, reason] + additionalProperties: false + properties: + title: + type: string + minLength: 1 + maxLength: 200 + reason: + type: string + minLength: 1 + maxLength: 500 + AddIncidentEventRequest: + type: object + required: [event_id, reason] + additionalProperties: false + properties: + event_id: + type: string + minLength: 1 + reason: + type: string + minLength: 1 + maxLength: 500 + IncidentReportRequest: + type: object + required: [reason] + additionalProperties: false + properties: + reason: + type: string + minLength: 1 + maxLength: 500 + CreateIncidentEvidenceExportRequest: + type: object + required: [reason] + additionalProperties: false + properties: + reason: + type: string + minLength: 1 + maxLength: 500 + Incident: + type: object + required: [id, tenant_id, title, reason, state, created_by, created_at] + properties: + id: + type: string + tenant_id: + type: string + title: + type: string + reason: + type: string + state: + type: string + enum: [active, disabled] + created_by: + type: string + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + IncidentPage: + type: object + required: [data, has_more] + properties: + data: + type: array + items: + $ref: "#/components/schemas/Incident" + next_cursor: + type: string + nullable: true + has_more: + type: boolean + IncidentEvent: + type: object + required: [id, tenant_id, incident_id, event_id, added_by, reason, created_at] + properties: + id: + type: string + tenant_id: + type: string + incident_id: + type: string + event_id: + type: string + added_by: + type: string + reason: + type: string + created_at: + type: string + format: date-time + IncidentReportSnapshot: + type: object + required: [id, tenant_id, incident_id, schema_version, report, markdown, generated_by, generated_at] + properties: + id: + type: string + tenant_id: + type: string + incident_id: + type: string + schema_version: + type: string + enum: [webhookery.incident_report.v1] + report: + type: object + additionalProperties: true + description: Machine-readable incident report. Raw payload bodies, secrets, and signatures are omitted by default. + markdown: + type: string + description: Human-readable Markdown incident report. + generated_by: + type: string + generated_at: + type: string + format: date-time + EventTimelineEntry: + type: object + required: [schema_version, sequence, kind, ref_id, state, detail, occurred_at] + additionalProperties: false + properties: + schema_version: + type: string + enum: [webhookery.event_timeline.v1] + sequence: + type: integer + minimum: 1 + kind: + type: string + enum: [event, receipt, raw_payload, normalized, delivery, delivery_payload, attempt, reconciliation, replay, audit] + ref_id: + type: string + state: + type: string + detail: + type: string + description: Redacted timeline detail; raw bodies and secrets are not included. + occurred_at: + type: string + format: date-time + EventTimelinePage: + type: object + required: [data, next_cursor, has_more] + properties: + data: + type: array + items: + $ref: "#/components/schemas/EventTimelineEntry" + next_cursor: + type: string + nullable: true + has_more: + type: boolean + Event: + type: object + required: [id, tenant_id, source_id, provider, type, raw_payload_id, raw_payload_hash, signature_verified, verification_reason, deduplication_key, dedupe_status, received_at, trace_id] + properties: + id: + type: string + tenant_id: + type: string + source_id: + type: string + provider: + type: string + type: + type: string + provider_event_id: + type: string + raw_payload_id: + type: string + raw_payload_hash: + type: string + signature_verified: + type: boolean + verification_reason: + type: string + deduplication_key: + type: string + dedupe_status: + type: string + received_at: + type: string + format: date-time + trace_id: + type: string + EventPage: + type: object + required: [data, next_cursor, has_more] + properties: + data: + type: array + items: + $ref: "#/components/schemas/Event" + next_cursor: + type: string + nullable: true + has_more: + type: boolean NormalizedEnvelope: type: object required: [id, tenant_id, event_id, provider, type, source, envelope_sha256, data_sha256, metadata_sha256, storage_status, created_at] @@ -5658,10 +7191,6 @@ components: type: integer minimum: 0 maximum: 60000 - require_approval: - type: boolean - default: false - description: Create the replay job in pending_approval state. Delivery work is not enqueued until the job is approved. state: type: string enum: [active, disabled] @@ -5824,9 +7353,20 @@ components: type: array items: type: string + DeadLetterReleaseRequest: + type: object + required: [reason_code, reason] + additionalProperties: false + properties: + reason_code: + type: string + enum: [receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery] + reason: + type: string + minLength: 1 DeadLetterBulkReleaseRequest: type: object - required: [reason] + required: [reason_code, reason] additionalProperties: false properties: entry_ids: @@ -5834,11 +7374,15 @@ components: description: Empty or omitted means up to 100 currently open entries. items: type: string + reason_code: + type: string + enum: [receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery] reason: type: string minLength: 1 ReplayRequest: type: object + required: [reason_code, reason] additionalProperties: false properties: event_id: @@ -5847,8 +7391,12 @@ components: type: string endpoint_id: type: string + reason_code: + type: string + enum: [receiver_fixed, provider_reconciliation, operator_requested, support_investigation, customer_dispute, test_drill, incident_recovery] reason: type: string + minLength: 1 dry_run: type: boolean config_mode: @@ -5859,6 +7407,79 @@ components: type: integer minimum: 0 maximum: 60000 + require_approval: + type: boolean + default: false + description: Create the replay job in pending_approval state. Delivery work is not enqueued until the job is approved. + approval_expires_at: + type: string + format: date-time + description: Optional approval expiry for pending replay jobs. When omitted with require_approval=true, the server defaults to 24 hours from creation. The field is rejected unless require_approval is true. + CreateReplayApprovalPolicyRequest: + type: object + required: [scope_type, reason] + additionalProperties: false + properties: + scope_type: + type: string + enum: [tenant, source, route] + scope_id: + type: string + description: Required for source and route policies. Omit for tenant-wide policies. + require_approval: + type: boolean + default: true + description: Active v1 policies require approval. Disable the policy to stop automatic approval gating. + default_expiry_seconds: + type: integer + minimum: 300 + maximum: 604800 + default: 86400 + reason: + type: string + minLength: 1 + maxLength: 500 + ReplayApprovalPolicy: + type: object + required: [id, tenant_id, scope_type, require_approval, default_expiry_seconds, state, created_by, created_at, updated_at] + properties: + id: + type: string + tenant_id: + type: string + scope_type: + type: string + enum: [tenant, source, route] + scope_id: + type: string + require_approval: + type: boolean + default_expiry_seconds: + type: integer + state: + type: string + reason: + type: string + created_by: + type: string + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + ReplayApprovalPolicyPage: + type: object + properties: + data: + type: array + items: + $ref: "#/components/schemas/ReplayApprovalPolicy" + next_cursor: + type: string + nullable: true + has_more: + type: boolean CreateAuditExportRequest: type: object additionalProperties: false diff --git a/sdk/typescript/examples/evidence-workflow.ts b/sdk/typescript/examples/evidence-workflow.ts new file mode 100644 index 0000000..ddfa1b9 --- /dev/null +++ b/sdk/typescript/examples/evidence-workflow.ts @@ -0,0 +1,91 @@ +import { writeFile } from "node:fs/promises"; +import { WebhookeryClient } from "../src/index"; + +const baseUrl = requiredEnv("WEBHOOKERY_BASE_URL").replace(/\/+$/, ""); +const apiKey = requiredEnv("WEBHOOKERY_API_KEY"); +const sourceId = requiredEnv("WEBHOOKERY_SOURCE_ID"); +const output = process.env.WEBHOOKERY_EVIDENCE_OUTPUT ?? "evidence-workflow.tar.gz"; + +const client = new WebhookeryClient(baseUrl, apiKey); + +const eventId = `evt_ts_sdk_${new Date().toISOString().replace(/[-:.]/g, "")}`; +const created = await client.createEvent( + { + id: eventId, + type: "sdk.evidence.demo", + source_id: sourceId, + data: { sanitized: true }, + }, + { idempotencyKey: eventId }, +); + +const canonicalEventId = String(created.EventID ?? created.event_id ?? eventId); +const incident = await apiJson<{ id: string }>("/v1/incidents", { + title: "TypeScript SDK evidence workflow", + reason: "local SDK evidence example", +}); + +await apiJson(`/v1/incidents/${encodeURIComponent(incident.id)}/events`, { + event_id: canonicalEventId, + reason: "attach SDK-created event to evidence workflow", +}); +await apiJson(`/v1/incidents/${encodeURIComponent(incident.id)}/generate-report`, { + reason: "generate TypeScript SDK example report", +}); +const evidenceExport = await apiJson<{ id: string }>( + `/v1/incidents/${encodeURIComponent(incident.id)}/evidence-export`, + { reason: "create TypeScript SDK example evidence export" }, +); + +const bundle = await fetch(`${baseUrl}/v1/audit-exports/${encodeURIComponent(evidenceExport.id)}:download`, { + headers: { Authorization: `Bearer ${apiKey}` }, +}); +if (!bundle.ok) { + throw await problemError(bundle); +} +await writeFile(output, Buffer.from(await bundle.arrayBuffer()), { mode: 0o600 }); + +const verification = await client.verifyAuditChain(); +if (verification.valid !== true) { + throw new Error("audit chain did not verify after evidence workflow"); +} + +console.log(`wrote evidence bundle to ${output}`); + +async function apiJson>(path: string, body: unknown): Promise { + const response = await fetch(`${baseUrl}${path}`, { + method: "POST", + headers: { + Accept: "application/json", + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + }); + if (!response.ok) { + throw await problemError(response); + } + return (await response.json()) as T; +} + +async function problemError(response: Response): Promise { + let code = "unknown_error"; + let requestId = ""; + try { + const body = (await response.json()) as { code?: string; stable_code?: string; request_id?: string }; + code = body.stable_code ?? body.code ?? code; + requestId = body.request_id ?? ""; + } catch { + // Leave the sanitized fallback code in place. + } + const suffix = requestId ? ` (${code}, request_id=${requestId})` : ` (${code})`; + return new Error(`webhookery API returned HTTP ${response.status}${suffix}`); +} + +function requiredEnv(name: string): string { + const value = process.env[name]?.trim(); + if (!value) { + throw new Error(`${name} is required`); + } + return value; +} diff --git a/site/index.html b/site/index.html new file mode 100644 index 0000000..dec5f55 --- /dev/null +++ b/site/index.html @@ -0,0 +1,145 @@ + + + + + + Webhookery - Self-hosted webhook evidence infrastructure + + + + +
+ + + Webhookery + + +
+ +
+
+
+

Release-candidate self-hosted control plane

+

Self-hosted webhook evidence infrastructure

+

Durable capture, replay, delivery evidence, and audit trails for teams that need to prove what happened to webhook events.

+ +
+
+
+ Provider event + verified raw bytes + Durable receipt +
+
+ Route version + payload snapshot + Signed delivery +
+
+ Retry / replay + reason captured + Audit proof +
+
Evidence survives delivery failure, replay, retention, and review.
+
+
+ +
+
+

What Webhookery proves

+

Designed around inspectable webhook history

+
+
+
+

Durable capture

+

Inbound success means raw request evidence and verification metadata were durably recorded.

+
+
+

Delivery control

+

Routes, retries, payload snapshots, delivery attempts, DLQ, and replay keep downstream outcomes visible.

+
+
+

Audit evidence

+

Audit-chain verification, retention metadata, and export packages help reviewers reconstruct decisions later.

+
+
+
+ +
+
+

How it works

+

Receive, verify, store, route, deliver, replay, audit

+
+
    +
  1. ReceivePreserve raw bytes and headers before trusting content.
  2. +
  3. VerifyApply provider-specific signatures and timestamp rules.
  4. +
  5. StoreCommit receipts, payload hashes, dedupe records, and audit events.
  6. +
  7. DeliverSend signed payload snapshots with retry and DLQ evidence.
  8. +
  9. RecoverReplay or reconcile where provider APIs make recovery possible.
  10. +
+
+ +
+
+

Evaluator path

+

Run the evidence loop locally

+

Use the deterministic local demo to see valid ingest, invalid signature quarantine, retry, DLQ release, replay, retention, and audit-chain verification.

+
+
+ docker compose up -d postgres + export WEBHOOKERY_TEST_DATABASE_URL=... + examples/webhook-evidence-demo/run.sh + make rc-check +
+
+ +
+
+

Commercial path

+

AGPL public project, commercial exceptions available

+
+
+
+

Self-host under AGPL

+

Use the public repository when AGPL obligations fit your deployment and distribution model.

+ Read the license +
+
+

Use commercially

+

Commercial evaluation, release evidence packages, production-readiness reviews, and support are available by written agreement.

+ Review commercial options +
+
+
+ +
+
+

What this is not

+

Honest boundaries are part of the product

+
+
    +
  • No exactly-once delivery claim.
  • +
  • No provider-side event completeness guarantee.
  • +
  • No claim that downstream business processing succeeded.
  • +
  • No compliance certification or legal evidence certification.
  • +
  • No live-provider calls in local release acceptance.
  • +
+
+
+ + + + diff --git a/site/styles.css b/site/styles.css new file mode 100644 index 0000000..0959d20 --- /dev/null +++ b/site/styles.css @@ -0,0 +1,358 @@ +:root { + color-scheme: light; + --ink: #17202a; + --muted: #536170; + --line: #d8dee6; + --paper: #fbfcfd; + --band: #eef3f6; + --blue: #1d5f91; + --green: #287a5a; + --red: #a43f3b; + --gold: #9c6b15; +} + +* { + box-sizing: border-box; +} + +body { + margin: 0; + font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; + color: var(--ink); + background: var(--paper); + letter-spacing: 0; +} + +a { + color: var(--blue); + font-weight: 650; + text-decoration-thickness: 1px; + text-underline-offset: 3px; +} + +.topbar { + display: flex; + align-items: center; + justify-content: space-between; + gap: 24px; + padding: 18px clamp(20px, 5vw, 72px); + border-bottom: 1px solid var(--line); + background: rgba(251, 252, 253, 0.94); + position: sticky; + top: 0; + z-index: 10; +} + +.brand, +nav { + display: flex; + align-items: center; + gap: 14px; +} + +.brand { + color: var(--ink); + text-decoration: none; + font-weight: 760; +} + +.brand-mark { + display: inline-grid; + width: 32px; + height: 32px; + place-items: center; + border: 1px solid var(--line); + border-radius: 6px; + background: #ffffff; + color: var(--blue); +} + +nav a { + color: var(--muted); + font-size: 0.94rem; + text-decoration: none; +} + +.hero { + min-height: calc(100vh - 70px); + display: grid; + grid-template-columns: minmax(0, 1.02fr) minmax(360px, 0.98fr); + gap: clamp(36px, 6vw, 88px); + align-items: center; + padding: clamp(56px, 8vw, 110px) clamp(20px, 5vw, 72px); + border-bottom: 1px solid var(--line); + background: #f4f8fa; +} + +.eyebrow { + margin: 0 0 12px; + color: var(--green); + font-size: 0.82rem; + font-weight: 760; + text-transform: uppercase; +} + +h1, +h2, +h3, +p { + margin-top: 0; +} + +h1 { + max-width: 820px; + margin-bottom: 24px; + font-size: clamp(3rem, 7vw, 6.8rem); + line-height: 0.96; +} + +h2 { + max-width: 840px; + margin-bottom: 18px; + font-size: clamp(2rem, 4vw, 4rem); + line-height: 1.02; +} + +h3 { + margin-bottom: 10px; + font-size: 1.1rem; +} + +.lede { + max-width: 720px; + color: var(--muted); + font-size: clamp(1.13rem, 2vw, 1.45rem); + line-height: 1.55; +} + +.actions { + display: flex; + flex-wrap: wrap; + gap: 12px; + margin-top: 34px; +} + +.button { + min-height: 46px; + display: inline-flex; + align-items: center; + justify-content: center; + padding: 12px 18px; + border-radius: 6px; + border: 1px solid var(--line); + text-decoration: none; +} + +.button.primary { + color: #ffffff; + background: var(--blue); + border-color: var(--blue); +} + +.button.secondary { + color: var(--ink); + background: #ffffff; +} + +.evidence-visual { + margin: 0; + padding: clamp(18px, 3vw, 30px); + border: 1px solid #cbd5df; + border-radius: 8px; + background: rgba(255, 255, 255, 0.92); + box-shadow: 0 22px 50px rgba(23, 32, 42, 0.12); +} + +.flow-row { + display: grid; + grid-template-columns: 1fr auto 1fr; + gap: 12px; + align-items: center; + padding: 14px 0; + border-bottom: 1px solid var(--line); +} + +.flow-row:last-of-type { + border-bottom: 0; +} + +.node { + min-height: 44px; + display: grid; + place-items: center; + padding: 10px; + border-radius: 6px; + border: 1px solid var(--line); + background: #ffffff; + text-align: center; + font-weight: 700; +} + +.arrow { + color: var(--muted); + font-size: 0.86rem; + text-align: center; +} + +.verified .node:first-child { + border-color: rgba(29, 95, 145, 0.44); +} + +.routed .node:first-child { + border-color: rgba(156, 107, 21, 0.45); +} + +.audited .node:first-child { + border-color: rgba(164, 63, 59, 0.42); +} + +figcaption { + margin-top: 18px; + color: var(--muted); + font-size: 0.95rem; +} + +.band { + padding: clamp(54px, 8vw, 96px) clamp(20px, 5vw, 72px); + border-bottom: 1px solid var(--line); +} + +.band.muted { + background: var(--band); +} + +.section-head { + margin-bottom: 30px; +} + +.grid { + display: grid; + gap: 16px; +} + +.grid.three { + grid-template-columns: repeat(3, minmax(0, 1fr)); +} + +.grid.two { + grid-template-columns: repeat(2, minmax(0, 1fr)); +} + +article { + min-height: 168px; + padding: 22px; + border: 1px solid var(--line); + border-radius: 8px; + background: #ffffff; +} + +article p, +.band p, +.steps span, +.boundaries { + color: var(--muted); + line-height: 1.55; +} + +.steps { + display: grid; + grid-template-columns: repeat(5, minmax(0, 1fr)); + gap: 12px; + padding: 0; + margin: 0; + list-style: none; +} + +.steps li { + min-height: 150px; + padding: 18px; + border-left: 4px solid var(--blue); + background: #ffffff; +} + +.steps strong, +.steps span { + display: block; +} + +.steps strong { + margin-bottom: 10px; +} + +.split { + display: grid; + grid-template-columns: minmax(0, 1fr) minmax(300px, 0.8fr); + gap: clamp(24px, 5vw, 72px); + align-items: center; +} + +.command-box { + display: grid; + gap: 8px; + padding: 18px; + border: 1px solid var(--line); + border-radius: 8px; + background: #111827; +} + +code { + display: block; + overflow-wrap: anywhere; + color: #e6f2ff; + font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; + font-size: 0.9rem; +} + +.boundaries { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 10px 28px; + margin: 0; + padding-left: 20px; +} + +footer { + display: flex; + flex-wrap: wrap; + gap: 16px; + align-items: center; + justify-content: space-between; + padding: 26px clamp(20px, 5vw, 72px); + color: var(--muted); +} + +footer span { + color: var(--ink); + font-weight: 760; +} + +@media (max-width: 980px) { + .hero, + .split, + .grid.three, + .grid.two, + .steps, + .boundaries { + grid-template-columns: 1fr; + } + + .hero { + min-height: auto; + } +} + +@media (max-width: 680px) { + .topbar, + nav { + align-items: flex-start; + flex-direction: column; + } + + nav { + gap: 8px; + } + + .flow-row { + grid-template-columns: 1fr; + } +}