From 4f573fcfe6a8725d52837bd5ff6591d8a1e23dc9 Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 09:59:05 +1200 Subject: [PATCH 1/9] docs(blog): stub for hooks native tool audit post --- site/astro.config.mjs | 4 +++ .../docs/blog/hooks-native-tool-audit.mdx | 29 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 site/src/content/docs/blog/hooks-native-tool-audit.mdx diff --git a/site/astro.config.mjs b/site/astro.config.mjs index b508dfd2..43343e97 100644 --- a/site/astro.config.mjs +++ b/site/astro.config.mjs @@ -168,6 +168,10 @@ export default defineConfig({ label: "The audit boundary belongs outside the agent", slug: "blog/daemon-process-separation", }, + { + label: "Native Tool Calls in the Audit Trail", + slug: "blog/hooks-native-tool-audit", + }, ], }, ], diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx new file mode 100644 index 00000000..7b49f7d6 --- /dev/null +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -0,0 +1,29 @@ +--- +title: "Native Tool Calls in the Audit Trail" +description: "How the Claude Code hook captures Bash, Read, Write, Edit and other native tools — and why this matters for a complete agent audit." +--- + +import { Aside } from '@astrojs/starlight/components'; + + + + From 7fd58643648536c7fbcde13aba024680361da410 Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 10:01:31 +1200 Subject: [PATCH 2/9] docs(blog): flesh out hooks native tool audit post --- .../docs/blog/hooks-native-tool-audit.mdx | 199 ++++++++++++++++-- 1 file changed, 176 insertions(+), 23 deletions(-) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index 7b49f7d6..434458cf 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -1,29 +1,182 @@ --- title: "Native Tool Calls in the Audit Trail" -description: "How the Claude Code hook captures Bash, Read, Write, Edit and other native tools — and why this matters for a complete agent audit." +description: "How the Claude Code hook captures Bash, Read, Write, Edit and other native tools — closing the gap the MCP proxy leaves open." --- import { Aside } from '@astrojs/starlight/components'; - - - +The MCP proxy covers MCP tool calls. Everything that flows through an MCP server — GitHub API calls, database queries, Atlassian writes — is intercepted, receipted, and hash-chained. But Claude Code has another class of tools that never touch an MCP server at all. + +`Bash`. `Read`. `Write`. `Edit`. `WebFetch`. `WebSearch`. + +These are native tools — built into Claude Code itself. They run locally, directly, with no proxy in between. Without additional instrumentation, they're invisible to the audit trail. A receipt chain that captures every GitHub API call but misses every file write isn't a complete audit. + +--- + +## The gap + +Here's what a typical Claude Code session looks like without the hook: + +```mermaid +sequenceDiagram + participant CC as Claude Code + participant MP as mcp-proxy + participant MS as MCP Server + participant D as agent-receipts-daemon + + CC->>MP: mcp tool call (stdio) + MP->>MS: forward + MS-->>MP: response + MP-->>CC: response + MP--)D: receipt ✓ + + CC->>CC: Bash / Read / Write / Edit + Note over D: no receipt ✗ +``` + +The daemon chain has gaps wherever native tools ran. An auditor sees the MCP calls but not the file system activity, shell commands, or web fetches that happened between them. + +--- + +## The hook + +Claude Code exposes a `PostToolUse` hook — a shell command it calls after every tool completes, passing a JSON payload describing the call. The `agent-receipts-hook` binary reads that payload and forwards it to the daemon over the same Unix socket the MCP proxy uses. + +```mermaid +sequenceDiagram + participant CC as Claude Code + participant MP as mcp-proxy + participant MS as MCP Server + participant Hook as agent-receipts-hook + participant D as agent-receipts-daemon + + CC->>MP: mcp tool call (stdio) + MP->>MS: forward + MS-->>MP: response + MP-->>CC: response + MP--)D: emit · channel: mcp ✓ + CC--)Hook: PostToolUse event + Hook--)D: emit · channel: claude-code ✓ + + CC->>CC: Bash / Read / Write + CC--)Hook: PostToolUse event + Hook--)D: emit · channel: claude-code ✓ +``` + +All emitters — the proxy and the hook — fire to the same daemon socket. The daemon assigns consecutive sequence numbers. One chain, all channels. + +--- + +## Installation + +One entry in `~/.claude/settings.json`: + +```json +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "agent-receipts-hook" + } + ] + } + ] + } +} +``` + +The empty `matcher` means the hook fires for every tool. The `agent-receipts-hook` binary needs to be on `$PATH` — install it via Homebrew: + +```sh +brew install agent-receipts/tap/agent-receipts-hook +``` + +That's it. No MCP server to wrap, no proxy config to write. The next tool call you make will land a receipt. + +--- + +## What a native tool receipt looks like + +Here's a real `Bash` receipt from a Claude Code session, captured while running a shell command that included a fake API key: + +```json +{ + "action": { + "type": "claude-code.Bash", + "tool_name": "Bash", + "risk_level": "medium", + "parameters_hash": "sha256:dc143db4ad2fd5df10685749c688e018610dfb254bd75bbf753ed3ddd84d97a2", + "parameters_disclosure": { + "input": "{\"command\":\"echo \\\"Connecting with api_key=[REDACTED] to service\\\"\"}", + "output": "{\"stdout\":\"Connecting with api_key=[REDACTED] to service\",\"stderr\":\"\"}", + "peer.platform": "darwin", + "peer.uid": "501", + "peer.pid": "74389" + } + }, + "outcome": { "status": "success" }, + "chain": { + "sequence": 2051, + "chain_id": "default" + } +} +``` + +A few things worth noting: + +**`action.type` prefix is `claude-code.`** — every hook-sourced receipt carries this prefix, so you can filter by channel at query time. + +**Secrets are redacted** — the `api_key=sk-...` value was caught by the daemon's built-in redaction patterns before the receipt was stored. The `parameters_hash` still commits to the original unredacted input, so the call is verifiable without the secret being retained. + +**Peer credentials** — the daemon captured the hook process's uid, pid, and platform at socket connect time, independent of anything the hook claims about itself. + +--- + +## Side by side with an MCP call + +When the hook and the MCP proxy both fire for the same underlying call (an MCP tool call triggers both a proxy intercept and a PostToolUse event), you get two consecutive receipts in the chain: + +| seq | channel | action.type | parameters_hash | +|-----|---------|-------------|-----------------| +| 2046 | mcp-proxy | `mcp.github.pull_request_read` | `sha256:2f9911...` | +| 2047 | hook | `claude-code.mcp__github-audited__pull_request_read` | `sha256:2f9911...` | + +The identical `parameters_hash` correlates the two receipts to the same underlying call. An auditor can verify from either receipt that the same input was processed. + + + +--- + +## Fail-hard, not silent + +When the daemon is unreachable, the hook exits non-zero. Claude Code surfaces this as a visible non-blocking error in the session — the tool call still completes, but the gap is visible rather than silent. + +This is a deliberate design choice. Silent drops mean invisible audit gaps. A visible error means an operator knows to investigate whether the daemon is down. + +--- + +## Current scope and roadmap + +The hook today handles `PostToolUse` only — it fires after a tool has already run. This makes it audit-only: it records what happened but cannot influence what runs. + +`PreToolUse` hooks (which fire before the tool runs and can block it by exiting non-zero) are on the roadmap. That opens the door to policy enforcement at the native tool level — the same pattern the MCP proxy uses today for MCP calls. Once both `Pre` and `PostToolUse` are covered, the hook becomes a full policy + audit layer for native tools. + +Until then: install the hook, get the audit trail, know what your agent ran. + +--- + +## The complete picture + +With both the MCP proxy and the hook in place, a Claude Code session produces one chain covering everything: + +- Every MCP tool call — intercepted and receipted by the proxy +- Every native tool call — captured and receipted by the hook +- All in one hash-chained sequence, signed by the daemon + +An agent that could previously obscure its activity by choosing native tools over MCP calls no longer has that option. The chain is complete. From ba39c4a5e0cdb2ecf41de2efe9a453e4ede63b59 Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 10:03:20 +1200 Subject: [PATCH 3/9] docs(blog): add PreToolUse FAQ section explaining the design decision --- .../docs/blog/hooks-native-tool-audit.mdx | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index 434458cf..b7bffb2e 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -161,13 +161,21 @@ This is a deliberate design choice. Silent drops mean invisible audit gaps. A vi --- -## Current scope and roadmap +## Why PostToolUse and not PreToolUse? -The hook today handles `PostToolUse` only — it fires after a tool has already run. This makes it audit-only: it records what happened but cannot influence what runs. +This is the first question people ask. The short answer: audit first, policy second — and for good reason. -`PreToolUse` hooks (which fire before the tool runs and can block it by exiting non-zero) are on the roadmap. That opens the door to policy enforcement at the native tool level — the same pattern the MCP proxy uses today for MCP calls. Once both `Pre` and `PostToolUse` are covered, the hook becomes a full policy + audit layer for native tools. +`PreToolUse` hooks fire *before* the tool runs. Exiting non-zero blocks the call. That sounds powerful, but it changes the contract: you're no longer just observing, you're in the critical path. If the hook is slow, the agent is slow. If the hook crashes, the tool call fails — even for calls that should succeed. If the hook blocks something it shouldn't, the agent is broken in a way that's hard to debug. -Until then: install the hook, get the audit trail, know what your agent ran. +`PostToolUse` has none of these failure modes. The tool has already run. The hook fires after the fact. If it crashes or times out, the agent doesn't notice — the call completed, Claude Code continues. The audit trail has a gap, not a breakage. + +There's also an information argument. A `PostToolUse` hook sees the *output* — what the tool actually returned. A `PreToolUse` hook only sees the *intent* — what the agent asked for. For forensics and breach investigation, the output is often the more interesting half. + +The current approach is deliberate: ship the audit foundation with the lowest possible blast radius, get it running in production, then add policy enforcement once the system is trusted. The MCP proxy already does `PreToolUse`-equivalent blocking for MCP calls. The hook will follow the same path once the audit baseline is solid. + + --- From 1677e7dff980d5ec3b3a03ef792c93f1089d61a1 Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 10:05:08 +1200 Subject: [PATCH 4/9] docs(blog): expand hook events FAQ with full Claude Code hooks surface area --- .../docs/blog/hooks-native-tool-audit.mdx | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index b7bffb2e..082c85a2 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -161,20 +161,22 @@ This is a deliberate design choice. Silent drops mean invisible audit gaps. A vi --- -## Why PostToolUse and not PreToolUse? +## Why PostToolUse and not the other ~20 hook events? -This is the first question people ask. The short answer: audit first, policy second — and for good reason. +Claude Code's [hooks system](https://code.claude.com/docs/en/hooks) has grown to around 20 lifecycle events: `PreToolUse`, `PostToolUse`, `PostToolUseFailure`, `UserPromptSubmit`, `SessionStart`, `SessionEnd`, `Stop`, `SubagentStop`, `PreCompact`, `Notification`, `WorktreeCreate`, `WorktreeRemove`, and more. Each represents a different moment in the agent's lifecycle. -`PreToolUse` hooks fire *before* the tool runs. Exiting non-zero blocks the call. That sounds powerful, but it changes the contract: you're no longer just observing, you're in the critical path. If the hook is slow, the agent is slow. If the hook crashes, the tool call fails — even for calls that should succeed. If the hook blocks something it shouldn't, the agent is broken in a way that's hard to debug. +Agent Receipts currently handles `PostToolUse` only. That's deliberate. -`PostToolUse` has none of these failure modes. The tool has already run. The hook fires after the fact. If it crashes or times out, the agent doesn't notice — the call completed, Claude Code continues. The audit trail has a gap, not a breakage. +**Audit first, policy second.** `PreToolUse` can block a tool call by exiting non-zero — which puts the hook in the critical path. If it's slow, the agent is slow. If it crashes, the tool call fails. If it incorrectly blocks something, the agent breaks in ways that are hard to debug. `PostToolUse` has none of these failure modes: the tool has already run, the hook fires after the fact, and if it fails the agent doesn't notice. The audit trail has a gap, not a breakage. -There's also an information argument. A `PostToolUse` hook sees the *output* — what the tool actually returned. A `PreToolUse` hook only sees the *intent* — what the agent asked for. For forensics and breach investigation, the output is often the more interesting half. +**The output is more interesting than the intent.** A `PostToolUse` hook sees what the tool *returned*. A `PreToolUse` hook only sees what the agent *asked for*. For forensics and breach investigation, the output — what actually came back — is often the more useful half of the record. -The current approach is deliberate: ship the audit foundation with the lowest possible blast radius, get it running in production, then add policy enforcement once the system is trusted. The MCP proxy already does `PreToolUse`-equivalent blocking for MCP calls. The hook will follow the same path once the audit baseline is solid. +**Expanding deliberately.** `SessionStart`, `UserPromptSubmit`, and `Stop` are natural next candidates: they'd let the daemon chain record session boundaries and the prompts that triggered tool use, not just the tool calls themselves. But each new event type adds surface area — and there's a technical complication specific to short-lived hook processes (each hook invocation is a fresh process, so the ADR-0010 drop-counter mechanism needs adapting before expanding coverage). + +The MCP proxy already does `PreToolUse`-equivalent blocking for MCP calls. The hook will follow the same path — audit baseline first, then policy enforcement once it's proven in production. --- From 58056f3e4608b0718865c87e90b3dd38da748718 Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 10:05:51 +1200 Subject: [PATCH 5/9] docs(blog): add Why Go and not Rust section to hooks post --- .../content/docs/blog/hooks-native-tool-audit.mdx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index 082c85a2..e7bc03c0 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -181,6 +181,18 @@ The drop-counter mechanism that makes backpressure visible in the chain doesn't --- +## Why Go and not Rust? + +Rust would produce a smaller, faster binary with compile-time memory safety guarantees. It's a reasonable question. + +The honest answer is that the emitter is designed to be so thin that language choice barely matters. Its entire job is: read stdin, open a Unix socket, write a length-prefixed frame, close. The 25ms dial timeout and 100ms write timeout are the latency budget — not the language runtime. A Rust binary would hit those same wall-clock limits. The bottleneck is IPC, not CPU or memory. + +The safety argument for Rust is real but narrow here. There's no complex memory management in the emitter, no pointer arithmetic, no unsafe operations the Go runtime can't handle safely. The Go emitter has been running in production without memory-safety incidents because it genuinely doesn't do anything that requires Rust's guarantees. + +There's also a practical argument: the hook binary uses the Go emitter package directly, sharing code with the daemon, the SDK, and the proxy. Rust would mean a separate implementation of the IPC framing, the frame schema, and the timeout logic — more surface area for drift between the hook and every other emitter in the system. + +If the emitter ever needs to do heavy local processing before forwarding — structured field extraction, client-side pattern matching, local buffering for offline operation — that calculus changes. For fire-and-forget over a Unix socket, Go is more than adequate. + ## The complete picture With both the MCP proxy and the hook in place, a Claude Code session produces one chain covering everything: From c30d9b26275f0a0abb8c7d97536696148064f63e Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 13:20:05 +1200 Subject: [PATCH 6/9] docs(blog): polish hooks-native-tool-audit post --- .../docs/blog/hooks-native-tool-audit.mdx | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index e7bc03c0..f0b2ab16 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -140,7 +140,7 @@ A few things worth noting: When the hook and the MCP proxy both fire for the same underlying call (an MCP tool call triggers both a proxy intercept and a PostToolUse event), you get two consecutive receipts in the chain: -| seq | channel | action.type | parameters_hash | +| seq | emitter | action.type | parameters_hash | |-----|---------|-------------|-----------------| | 2046 | mcp-proxy | `mcp.github.pull_request_read` | `sha256:2f9911...` | | 2047 | hook | `claude-code.mcp__github-audited__pull_request_read` | `sha256:2f9911...` | @@ -155,28 +155,28 @@ The two receipts carry different `session_id` values — the hook forwards Claud ## Fail-hard, not silent -When the daemon is unreachable, the hook exits non-zero. Claude Code surfaces this as a visible non-blocking error in the session — the tool call still completes, but the gap is visible rather than silent. +When the daemon is unreachable, the hook exits non-zero. Claude Code surfaces this as a non-blocking error in the session — the tool call still completes, but the missing receipt is reported rather than swallowed. This is a deliberate design choice. Silent drops mean invisible audit gaps. A visible error means an operator knows to investigate whether the daemon is down. --- -## Why PostToolUse and not the other ~20 hook events? +## Why PostToolUse and not the other hook events? -Claude Code's [hooks system](https://code.claude.com/docs/en/hooks) has grown to around 20 lifecycle events: `PreToolUse`, `PostToolUse`, `PostToolUseFailure`, `UserPromptSubmit`, `SessionStart`, `SessionEnd`, `Stop`, `SubagentStop`, `PreCompact`, `Notification`, `WorktreeCreate`, `WorktreeRemove`, and more. Each represents a different moment in the agent's lifecycle. +Claude Code's [hooks system](https://code.claude.com/docs/en/hooks) now exposes close to 30 lifecycle events: `PreToolUse`, `PostToolUse`, `PostToolUseFailure`, `UserPromptSubmit`, `SessionStart`, `SessionEnd`, `Stop`, `SubagentStop`, `PreCompact`, `Notification`, `WorktreeCreate`, `WorktreeRemove`, and many more. Each represents a different moment in the agent's lifecycle. -Agent Receipts currently handles `PostToolUse` only. That's deliberate. +The recommended Agent Receipts configuration wires up `PostToolUse` only. That's deliberate. -**Audit first, policy second.** `PreToolUse` can block a tool call by exiting non-zero — which puts the hook in the critical path. If it's slow, the agent is slow. If it crashes, the tool call fails. If it incorrectly blocks something, the agent breaks in ways that are hard to debug. `PostToolUse` has none of these failure modes: the tool has already run, the hook fires after the fact, and if it fails the agent doesn't notice. The audit trail has a gap, not a breakage. +**Audit first, policy second.** `PreToolUse` can block a tool call by exiting non-zero — which puts the hook in the critical path. If it's slow, the agent is slow. If it crashes, the tool call fails. If it incorrectly blocks something, the agent breaks in ways that are hard to debug. `PostToolUse` has none of these failure modes: the tool has already run, the hook fires after the fact, and a failure to record surfaces as a non-blocking error rather than a broken tool call. The audit trail has a gap, not a breakage. **The output is more interesting than the intent.** A `PostToolUse` hook sees what the tool *returned*. A `PreToolUse` hook only sees what the agent *asked for*. For forensics and breach investigation, the output — what actually came back — is often the more useful half of the record. -**Expanding deliberately.** `SessionStart`, `UserPromptSubmit`, and `Stop` are natural next candidates: they'd let the daemon chain record session boundaries and the prompts that triggered tool use, not just the tool calls themselves. But each new event type adds surface area — and there's a technical complication specific to short-lived hook processes (each hook invocation is a fresh process, so the ADR-0010 drop-counter mechanism needs adapting before expanding coverage). +**Expanding deliberately.** `SessionStart`, `UserPromptSubmit`, and `Stop` are natural next candidates: they would let the chain record session boundaries and the prompts that triggered tool use, not just the tool calls themselves. Each new event type adds surface area, and there's a wire-format question to resolve first — see the note below. The MCP proxy already does `PreToolUse`-equivalent blocking for MCP calls. The hook will follow the same path — audit baseline first, then policy enforcement once it's proven in production. --- @@ -187,12 +187,14 @@ Rust would produce a smaller, faster binary with compile-time memory safety guar The honest answer is that the emitter is designed to be so thin that language choice barely matters. Its entire job is: read stdin, open a Unix socket, write a length-prefixed frame, close. The 25ms dial timeout and 100ms write timeout are the latency budget — not the language runtime. A Rust binary would hit those same wall-clock limits. The bottleneck is IPC, not CPU or memory. -The safety argument for Rust is real but narrow here. There's no complex memory management in the emitter, no pointer arithmetic, no unsafe operations the Go runtime can't handle safely. The Go emitter has been running in production without memory-safety incidents because it genuinely doesn't do anything that requires Rust's guarantees. +The safety argument for Rust is real but narrow here. The emitter does no manual memory management, no pointer arithmetic, no unsafe operations — nothing that exercises the failure modes Rust's borrow checker is designed to prevent. Go's runtime covers what little surface there is. There's also a practical argument: the hook binary uses the Go emitter package directly, sharing code with the daemon, the SDK, and the proxy. Rust would mean a separate implementation of the IPC framing, the frame schema, and the timeout logic — more surface area for drift between the hook and every other emitter in the system. If the emitter ever needs to do heavy local processing before forwarding — structured field extraction, client-side pattern matching, local buffering for offline operation — that calculus changes. For fire-and-forget over a Unix socket, Go is more than adequate. +--- + ## The complete picture With both the MCP proxy and the hook in place, a Claude Code session produces one chain covering everything: From 9d78b891614493d00d30bd9d29583195dce8611f Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 13:24:13 +1200 Subject: [PATCH 7/9] docs(blog): add series framing to hooks post --- site/src/content/docs/blog/hooks-native-tool-audit.mdx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index f0b2ab16..a3f66cdf 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -5,6 +5,10 @@ description: "How the Claude Code hook captures Bash, Read, Write, Edit and othe import { Aside } from '@astrojs/starlight/components'; +**Series: Auditing AI Agents** · Part 3 of 3 · [← One Chain, Two Channels, Zero Secrets](/blog/unified-chain-redaction-demo/) + +--- + The MCP proxy covers MCP tool calls. Everything that flows through an MCP server — GitHub API calls, database queries, Atlassian writes — is intercepted, receipted, and hash-chained. But Claude Code has another class of tools that never touch an MCP server at all. `Bash`. `Read`. `Write`. `Edit`. `WebFetch`. `WebSearch`. From 31e4b46a1b3e55e5878c23683a2c9920eaa3887e Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 20:17:51 +1200 Subject: [PATCH 8/9] docs(blog): tone pass on hooks-native-tool-audit post MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two edits to align with the series register: - Add publish date (2026-05-28, one week after post 2 per the weekly cadence). - Soften the closing — "an agent that could previously obscure its activity" framed the agent as adversarial in the same shape as the "compromised or misbehaving agent" line we flattened in post 1. Replace with a structural-completeness framing: what's missing without the hook (file writes, shell commands, web fetches) and what is present with both. Things I considered but think work as-is: - The `Bash`. `Read`. `Write`. ... staccato is name-the-things presentation, not abstract negation. - The "That's it. No MCP server to wrap, no proxy config to write." in the install section caps a contrast against real prior setup pain. - The triple "If it..." in the PreToolUse rationale enumerates real failure modes rather than rhetorical buildup. --- site/src/content/docs/blog/hooks-native-tool-audit.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index a3f66cdf..f794b818 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -5,7 +5,7 @@ description: "How the Claude Code hook captures Bash, Read, Write, Edit and othe import { Aside } from '@astrojs/starlight/components'; -**Series: Auditing AI Agents** · Part 3 of 3 · [← One Chain, Two Channels, Zero Secrets](/blog/unified-chain-redaction-demo/) +_Published 2026-05-28_ · **Series: Auditing AI Agents** · Part 3 of 3 · [← One Chain, Two Channels, Zero Secrets](/blog/unified-chain-redaction-demo/) --- @@ -207,4 +207,4 @@ With both the MCP proxy and the hook in place, a Claude Code session produces on - Every native tool call — captured and receipted by the hook - All in one hash-chained sequence, signed by the daemon -An agent that could previously obscure its activity by choosing native tools over MCP calls no longer has that option. The chain is complete. +Without the hook, a Claude Code session's audit trail skips over whatever happened through native tools — file writes, shell commands, web fetches. With both in place, every tool call from the session lands in one signed, hash-chained sequence. From ee66ef19548372012d3e75d5f1c2b60769d49fd4 Mon Sep 17 00:00:00 2001 From: Otto Jongerius Date: Sun, 17 May 2026 20:43:16 +1200 Subject: [PATCH 9/9] docs: address Copilot review on PR #444 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related fixes that came out of the same review: - **Receipt JSON example** (`hooks-native-tool-audit.mdx`): the example was missing the `credentialSubject` envelope and the `proof` block, flattening fields that the schema nests. Match post 1's level of detail by wrapping `outcome` and `chain` in `credentialSubject` and adding an abbreviated `proof` block. Also abbreviate the `parameters_hash` value to match post 1's convention. Reader no longer sees a partial-shape receipt presented as the canonical example. - **Hook exit-behaviour docs** (`reference/cli-commands.mdx`): the page claimed the hook "Always exits 0" and that emit failures "are dropped silently." That's not what the code does — `agent-receipts-hook` runs with `emitter.WithStrictErrors()` and exits 1 with a stderr message on emit failure. ADR-0010's "events truly drop silently" is about the in-chain ledger (no daemon to record the gap), not about the hook process itself. Reword to describe both: stdin/format issues exit 0 silently, emit failures exit 1 with stderr; the in-chain gap is still silent by design. --- .../content/docs/blog/hooks-native-tool-audit.mdx | 14 +++++++++----- site/src/content/docs/reference/cli-commands.mdx | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/site/src/content/docs/blog/hooks-native-tool-audit.mdx b/site/src/content/docs/blog/hooks-native-tool-audit.mdx index f794b818..9687e1d2 100644 --- a/site/src/content/docs/blog/hooks-native-tool-audit.mdx +++ b/site/src/content/docs/blog/hooks-native-tool-audit.mdx @@ -113,7 +113,7 @@ Here's a real `Bash` receipt from a Claude Code session, captured while running "type": "claude-code.Bash", "tool_name": "Bash", "risk_level": "medium", - "parameters_hash": "sha256:dc143db4ad2fd5df10685749c688e018610dfb254bd75bbf753ed3ddd84d97a2", + "parameters_hash": "sha256:dc143db4ad...", "parameters_disclosure": { "input": "{\"command\":\"echo \\\"Connecting with api_key=[REDACTED] to service\\\"\"}", "output": "{\"stdout\":\"Connecting with api_key=[REDACTED] to service\",\"stderr\":\"\"}", @@ -122,10 +122,14 @@ Here's a real `Bash` receipt from a Claude Code session, captured while running "peer.pid": "74389" } }, - "outcome": { "status": "success" }, - "chain": { - "sequence": 2051, - "chain_id": "default" + "credentialSubject": { + "outcome": { "status": "success" }, + "chain": { "sequence": 2051, "chain_id": "default" } + }, + "proof": { + "type": "Ed25519Signature2020", + "verificationMethod": "did:agent-receipts-daemon:local#k1", + "proofValue": "uZ8bqK5Zgr..." } } ``` diff --git a/site/src/content/docs/reference/cli-commands.mdx b/site/src/content/docs/reference/cli-commands.mdx index ad5fb2e1..898af5f2 100644 --- a/site/src/content/docs/reference/cli-commands.mdx +++ b/site/src/content/docs/reference/cli-commands.mdx @@ -247,7 +247,7 @@ agent-receipts-hook [--format=] |---|---| | `--format` | Force a specific input format (default: auto-detected from environment variables). Currently supported: `claude-code`. | -**Exit behaviour:** Always exits 0. Emit failures (daemon not running, socket missing, malformed frame) are dropped silently — the hook never blocks the agent. +**Exit behaviour:** The hook never blocks the agent — the tool call has already completed by the time `PostToolUse` fires. Stdin/format issues exit 0 silently (unreadable stdin, unrecognised runtime). Emit failures (daemon unreachable, parse error, socket write timeout) exit 1 with a message on stderr so the operator can surface a missing-receipt event out-of-band. The in-chain audit gap from a daemon-down event is still silent by design — see [ADR-0010](https://github.com/agent-receipts/ar/blob/main/docs/adr/0010-daemon-process-separation.md) — but the hook itself is loud about its own failures. **Auto-detection:** When `--format` is omitted, the binary inspects environment variables to identify the calling runtime. `CLAUDE_SESSION_ID` set → `claude-code` format.