Skip to content

Commit b6a181b

Browse files
committed
fix(otel): gate codemode script body on capture, sanitize fetch URL attrs
- `pkg/tools/codemode/exec.go`: emit `cagent.tool.codemode.script_hash` (SHA-256) + `script_length` unconditionally so dashboards can correlate identical scripts and spot oversize submissions, but gate the full `cagent.tool.codemode.script` body behind `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT`. Codemode scripts are kilobyte-scale arbitrary JS that routinely embed auth tokens / pasted user data / inline secrets, so the bundle decision (Option B, ship body unconditionally) was the wrong call for this attribute specifically - `pkg/tools/builtin/fetch.go`: strip query strings, fragments, and userinfo from `cagent.tool.fetch.urls` so the attribute can ship by default without leaking signed-URL tokens, OAuth codes, or inline credentials. Path stays intact so dashboards still answer "which sites/endpoints did the agent hit?". Unparseable URLs are emitted as `<unparseable>` rather than passed through verbatim Both span attributes were flagged on the upstream PR review for the same root cause — emitting unbounded user-controlled content as a default-on telemetry attribute creates a PII/secret-exfiltration surface. The other Option B attributes (`shell.cmd`, `filesystem.path`, `script_shell.cmd`) stay unconditional: they are short, do not carry the same query-token / arbitrary-content risk, and remain decision-relevant for incident response
1 parent 4356a83 commit b6a181b

2 files changed

Lines changed: 44 additions & 14 deletions

File tree

pkg/tools/builtin/fetch.go

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,44 @@ type FetchToolArgs struct {
5050
Format string `json:"format,omitempty"`
5151
}
5252

53+
// sanitizeFetchURLs strips query strings and userinfo from each URL so
54+
// the resulting span attribute can ship by default without leaking
55+
// signed-URL tokens, OAuth codes, or inline credentials. URLs that fail
56+
// to parse are emitted as a sentinel rather than the raw string, since
57+
// an unparseable URL could also carry sensitive material.
58+
func sanitizeFetchURLs(urls []string) []string {
59+
out := make([]string, len(urls))
60+
for i, raw := range urls {
61+
u, err := url.Parse(raw)
62+
if err != nil {
63+
out[i] = "<unparseable>"
64+
continue
65+
}
66+
u.RawQuery = ""
67+
u.Fragment = ""
68+
u.User = nil
69+
out[i] = u.String()
70+
}
71+
return out
72+
}
73+
5374
func (h *fetchHandler) CallTool(ctx context.Context, params FetchToolArgs) (*tools.ToolCallResult, error) {
5475
if len(params.URLs) == 0 {
5576
return nil, errors.New("at least one URL is required")
5677
}
5778

58-
// Decorate the active runtime.tool.handler span with the URL list
59-
// and request shape. Each fetched URL still produces its own HTTP
60-
// CLIENT child span via `httpclient.WrapWithOTel` below, so the
61-
// per-request status / latency / target host all show up there;
62-
// the parent span gets the requested URLs so a quick glance answers
63-
// "which sites did the agent hit on this turn?" without expanding
64-
// the children.
79+
// Decorate the active runtime.tool.handler span with the requested
80+
// URLs. Strip query params and userinfo first: query strings often
81+
// carry signed-URL tokens, OAuth codes, or session IDs, and userinfo
82+
// carries credentials inline. The path stays intact so dashboards
83+
// can still answer "which sites/endpoints did the agent hit?" — the
84+
// HTTP CLIENT child span emitted by `httpclient.WrapWithOTel` below
85+
// retains the full URL under `http.url` for callers that opt into
86+
// that backend's full-URL capture.
6587
if span := trace.SpanFromContext(ctx); span.IsRecording() {
6688
attrs := []attribute.KeyValue{
6789
attribute.Int("cagent.tool.fetch.url_count", len(params.URLs)),
68-
attribute.StringSlice("cagent.tool.fetch.urls", params.URLs),
90+
attribute.StringSlice("cagent.tool.fetch.urls", sanitizeFetchURLs(params.URLs)),
6991
}
7092
if params.Format != "" {
7193
attrs = append(attrs, attribute.String("cagent.tool.fetch.format", params.Format))

pkg/tools/codemode/exec.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package codemode
33
import (
44
"bytes"
55
"context"
6+
"crypto/sha256"
7+
"encoding/hex"
68
"encoding/json"
79
"fmt"
810
"slices"
@@ -11,6 +13,7 @@ import (
1113
"go.opentelemetry.io/otel/attribute"
1214
"go.opentelemetry.io/otel/trace"
1315

16+
"github.com/docker/docker-agent/pkg/telemetry/genai"
1417
"github.com/docker/docker-agent/pkg/tools"
1518
)
1619

@@ -42,17 +45,22 @@ func (c *codeModeTool) runJavascript(ctx context.Context, script string) (Script
4245
vm := goja.New()
4346
tracker := &toolCallTracker{}
4447

45-
// Stamp the script body and length onto the active span; the
46-
// post-run defer adds the tool-call count. Script ships
47-
// unconditionally — it's the main signal of what a code-mode turn
48-
// did. Drop or hash `cagent.tool.codemode.script` at the OTel
49-
// collector if scripts routinely carry secrets.
48+
// Always stamp a hash + length so dashboards can correlate
49+
// identical scripts ("model ran the same script 200 times this
50+
// hour") without ever shipping the body. Codemode scripts are
51+
// kilobyte-scale arbitrary JS — embedded auth tokens, pasted
52+
// user data, and inline secrets are common — so the body itself
53+
// is gated behind the GenAI content-capture opt-in.
5054
span := trace.SpanFromContext(ctx)
5155
if span.IsRecording() {
56+
sum := sha256.Sum256([]byte(script))
5257
span.SetAttributes(
53-
attribute.String("cagent.tool.codemode.script", script),
58+
attribute.String("cagent.tool.codemode.script_hash", hex.EncodeToString(sum[:])),
5459
attribute.Int("cagent.tool.codemode.script_length", len(script)),
5560
)
61+
if genai.IsContentCaptureEnabled() {
62+
span.SetAttributes(attribute.String("cagent.tool.codemode.script", script))
63+
}
5664
}
5765
defer func() {
5866
if span.IsRecording() {

0 commit comments

Comments
 (0)