diff --git a/review-enrichment/src/analyzers/secret-scan.ts b/review-enrichment/src/analyzers/secret-scan.ts new file mode 100644 index 000000000..bcb504ae9 --- /dev/null +++ b/review-enrichment/src/analyzers/secret-scan.ts @@ -0,0 +1,90 @@ +// Secret-scan analyzer (#1476). Scans the ADDED lines of the PR diff for credential patterns and high-entropy +// assignments, citing file:line and the KIND only — the matched secret VALUE is never returned (so the brief is +// safe to splice into a public review). Higher-recall than the engine's in-process regex pass, and line-cited via +// the hunk headers so the reviewer can point at the exact line. +import type { EnrichRequest, SecretFinding } from "../types.js"; + +interface Rule { + kind: string; + re: RegExp; + confidence: "high" | "medium"; +} + +// Ordered specific → generic. The generic assignment rule is medium-confidence (it catches real keys but also the +// occasional long opaque non-secret), so the reviewer treats it as "verify" rather than "block". +const RULES: Rule[] = [ + { kind: "aws_access_key_id", re: /\bAKIA[0-9A-Z]{16}\b/, confidence: "high" }, + { + kind: "github_token", + re: /\bgh[pousr]_[A-Za-z0-9]{36,}\b/, + confidence: "high", + }, + { + kind: "slack_token", + re: /\bxox[baprs]-[A-Za-z0-9-]{10,}\b/, + confidence: "high", + }, + { + kind: "google_api_key", + re: /\bAIza[0-9A-Za-z_-]{35}\b/, + confidence: "high", + }, + { + kind: "private_key", + re: /-----BEGIN (?:RSA |EC |OPENSSH |DSA |PGP )?PRIVATE KEY-----/, + confidence: "high", + }, + { + kind: "jwt", + re: /\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/, + confidence: "medium", + }, + { + kind: "generic_secret_assignment", + re: /(?:api[_-]?key|secret|token|password|passwd|access[_-]?key|client[_-]?secret)["']?\s*[:=]\s*["'][A-Za-z0-9+/=_-]{16,}["']/i, + confidence: "medium", + }, +]; + +/** Scan one file's unified-diff patch, tracking new-file line numbers via hunk headers. Pure. Value never captured. */ +export function scanPatch(path: string, patch: string): SecretFinding[] { + const findings: SecretFinding[] = []; + let newLine = 0; + for (const line of patch.split("\n")) { + if (line.startsWith("+++") || line.startsWith("---")) continue; + const hunk = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/.exec(line); + if (hunk) { + newLine = Number(hunk[1]); + continue; + } + if (line.startsWith("+")) { + const content = line.slice(1); + for (const rule of RULES) { + if (rule.re.test(content)) { + findings.push({ + file: path, + line: newLine, + kind: rule.kind, + confidence: rule.confidence, + }); + break; // one finding per line — first (most specific) rule wins + } + } + newLine++; + } else if (!line.startsWith("-")) { + newLine++; // context line advances the new-file counter; removed lines do not + } + } + return findings; +} + +/** Analyzer entrypoint: scan every changed file's patch for leaked credentials. */ +export async function scanSecrets( + req: EnrichRequest, +): Promise { + const findings: SecretFinding[] = []; + for (const file of req.files ?? []) { + if (file.patch) findings.push(...scanPatch(file.path, file.patch)); + } + return findings; +} diff --git a/review-enrichment/src/brief.ts b/review-enrichment/src/brief.ts index e4ceb6657..5b5c4f16e 100644 --- a/review-enrichment/src/brief.ts +++ b/review-enrichment/src/brief.ts @@ -8,6 +8,7 @@ import type { AnalyzerStatus, } from "./types.js"; import { scanDependencies } from "./analyzers/dependency-scan.js"; +import { scanSecrets } from "./analyzers/secret-scan.js"; import { renderBrief } from "./render.js"; type AnalyzerFn = (req: EnrichRequest) => Promise; @@ -15,6 +16,7 @@ type AnalyzerFn = (req: EnrichRequest) => Promise; // The analyzer registry. More land behind this same shape: license (#1475), secret (#1476), static (#1477), history (#1478). const ANALYZERS: Record = { dependency: (req) => scanDependencies(req), + secret: (req) => scanSecrets(req), }; function withTimeout(promise: Promise, ms: number): Promise { diff --git a/review-enrichment/src/render.ts b/review-enrichment/src/render.ts index 9ac293964..5614dae7a 100644 --- a/review-enrichment/src/render.ts +++ b/review-enrichment/src/render.ts @@ -35,6 +35,18 @@ export function renderBrief( } } + const secrets = findings.secret ?? []; + if (secrets.length) { + lines.push( + "### Potential leaked secrets (value-redacted — verify + rotate)", + ); + for (const secret of secrets) { + lines.push( + `- \`${secret.file}:${secret.line}\` — ${secret.kind} (${secret.confidence} confidence)`, + ); + } + } + if (!lines.length) return { promptSection: "", systemSuffix: "" }; const header = diff --git a/review-enrichment/src/types.ts b/review-enrichment/src/types.ts index 1a0e7d35e..0fedfdd2d 100644 --- a/review-enrichment/src/types.ts +++ b/review-enrichment/src/types.ts @@ -42,9 +42,18 @@ export interface DependencyFinding { cves: Cve[]; } -/** Structured analyzer output. Each analyzer fills its own key; more land as analyzers ship (#1475–#1478). */ +/** A potential leaked credential. Value-redacted by construction — only the location + kind are ever reported. */ +export interface SecretFinding { + file: string; + line: number; + kind: string; + confidence: "high" | "medium"; +} + +/** Structured analyzer output. Each analyzer fills its own key; more land as analyzers ship (#1475/#1477/#1478). */ export interface BriefFindings { dependency?: DependencyFinding[]; + secret?: SecretFinding[]; } export type AnalyzerStatus = "ok" | "degraded" | "skipped"; diff --git a/review-enrichment/test/enrichment.test.ts b/review-enrichment/test/enrichment.test.ts index a47482228..d5364e55c 100644 --- a/review-enrichment/test/enrichment.test.ts +++ b/review-enrichment/test/enrichment.test.ts @@ -7,6 +7,7 @@ import { } from "../dist/analyzers/dependency-scan.js"; import { renderBrief } from "../dist/render.js"; import { buildBrief } from "../dist/brief.js"; +import { scanPatch, scanSecrets } from "../dist/analyzers/secret-scan.js"; const okFetch = (vulns) => async () => ({ ok: true, @@ -170,3 +171,86 @@ test("buildBrief: analyzer throw → degraded + partial, still returns a brief", globalThis.fetch = realFetch; } }); + +test("scanPatch: detects credentials, cites new-file line via hunk header, never returns the value", () => { + const patch = [ + "@@ -1,1 +1,4 @@", + " const config = {", + '+ awsKey: "AKIAIOSFODNN7EXAMPLE",', + '+ token: "ghp_0123456789012345678901234567890123456",', + "+ safe: true,", + ].join("\n"); + const findings = scanPatch("src/config.ts", patch); + const kinds = findings.map((f) => f.kind); + assert.ok(kinds.includes("aws_access_key_id")); + assert.ok(kinds.includes("github_token")); + const aws = findings.find((f) => f.kind === "aws_access_key_id"); + assert.equal(aws.file, "src/config.ts"); + assert.equal(aws.line, 2); // line 1 = context, line 2 = the AWS key + assert.ok( + !JSON.stringify(findings).includes("AKIAIOSFODNN7EXAMPLE"), + "value never captured", + ); +}); + +test("scanPatch: private key (high) + generic assignment line; removed lines don't advance new counter", () => { + const pk = scanPatch( + "k.pem", + "@@ -0,0 +1,1 @@\n+-----BEGIN RSA PRIVATE KEY-----", + ); + assert.equal(pk[0].kind, "private_key"); + assert.equal(pk[0].confidence, "high"); + const gen = scanPatch( + "a.ts", + '@@ -5,0 +5,1 @@\n-old\n+const password = "s3cr3t_value_long_enough_x"', + ); + assert.equal(gen[0].kind, "generic_secret_assignment"); + assert.equal(gen[0].line, 5); +}); + +test("scanSecrets: scans across files, ignores files without patches", async () => { + const findings = await scanSecrets({ + repoFullName: "o/r", + prNumber: 1, + files: [ + { path: "a.ts", patch: '@@ -1,0 +1,1 @@\n+key = "AKIAIOSFODNN7EXAMPLE"' }, + { path: "b.ts" }, + ], + }); + assert.equal(findings.length, 1); + assert.equal(findings[0].file, "a.ts"); +}); + +test("renderBrief: renders the value-redacted secret block", () => { + const r = renderBrief({ + secret: [ + { file: "x.ts", line: 3, kind: "github_token", confidence: "high" }, + ], + }); + assert.match(r.promptSection, /leaked secrets/); + assert.match(r.promptSection, /`x\.ts:3` — github_token \(high/); +}); + +test("buildBrief: dependency + secret analyzers both run", async () => { + const realFetch = globalThis.fetch; + globalThis.fetch = okFetch([]); + try { + const brief = await buildBrief({ + repoFullName: "o/r", + prNumber: 9, + files: [ + { + path: "app.ts", + patch: + '@@ -1,0 +1,1 @@\n+const t = "ghp_0123456789012345678901234567890123456"', + }, + ], + }); + assert.equal(brief.analyzerStatus.dependency, "ok"); + assert.equal(brief.analyzerStatus.secret, "ok"); + assert.equal(brief.findings.secret.length, 1); + assert.match(brief.promptSection, /github_token/); + } finally { + globalThis.fetch = realFetch; + } +});