flipt-io · markphelps · Jun 18, 2026 · Jun 10, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/.gitignore b/.gitignore
@@ -8,4 +8,5 @@ dist/
 !.env.example
 .dev.vars
 
-.claude
+.claude
+docs
diff --git a/lib/review-timeout.ts b/lib/review-timeout.ts
@@ -0,0 +1,84 @@
+const DEFAULT_REVIEW_TIMEOUT_MS = 8 * 60_000;
+
+export type ReviewStats = {
+  changedFiles: number;
+  additions: number;
+  deletions: number;
+};
+
+export type ReviewBudget = {
+  tier: 'tiny' | 'small' | 'medium' | 'large';
+  timeoutMs: number;
+  maxToolCalls: number;
+  allowSubagents: boolean;
+  scope: string;
+  stats: ReviewStats;
+};
+
+export function resolveReviewTimeoutMs(env: Record<string, string | undefined>, fallback = DEFAULT_REVIEW_TIMEOUT_MS) {
+  const value = Number(env.REVIEW_TIMEOUT_MS ?? fallback);
+  return Number.isFinite(value) && value > 0 ? value : fallback;
+}
+
+function shellQuote(value: string): string {
+  return `'${value.replaceAll("'", "'\\''")}'`;
+}
+
+export function reviewStatsCommand(prNumber: number, repo: string) {
+  return `gh pr view ${prNumber} --repo ${shellQuote(repo)} --json additions,deletions,changedFiles`;
+}
+
+export function parseReviewStats(stdout: string): ReviewStats {
+  const parsed = JSON.parse(stdout) as Partial<ReviewStats>;
+  return {
+    changedFiles: Number(parsed.changedFiles) || 0,
+    additions: Number(parsed.additions) || 0,
+    deletions: Number(parsed.deletions) || 0,
+  };
+}
+
+export function resolveReviewBudget(stats: ReviewStats, env: Record<string, string | undefined>): ReviewBudget {
+  const lines = stats.additions + stats.deletions;
+  const base =
+    stats.changedFiles <= 3 && lines <= 100
+      ? {
+          tier: 'tiny' as const,
+          timeoutMs: 3 * 60_000,
+          maxToolCalls: 6,
+          allowSubagents: false,
+          scope: 'Review the diff directly. Do not delegate. Return as soon as concrete findings are clear.',
+        }
+      : stats.changedFiles <= 10 && lines <= 400
+        ? {
+            tier: 'small' as const,
+            timeoutMs: 5 * 60_000,
+            maxToolCalls: 10,
+            allowSubagents: false,
+            scope: 'Review the whole diff, with brief repo-context lookups only when needed. Do not delegate.',
+          }
+        : stats.changedFiles <= 25 && lines <= 1_000
+          ? {
+              tier: 'medium' as const,
+              timeoutMs: 8 * 60_000,
+              maxToolCalls: 16,
+              allowSubagents: true,
+              scope: 'Review high-risk files first, then the rest of the diff if budget remains. Delegate at most once for clear security or correctness risk.',
+            }
+          : {
+              tier: 'large' as const,
+              timeoutMs: 12 * 60_000,
+              maxToolCalls: 20,
+              allowSubagents: true,
+              scope: 'Run a scoped review: prioritize auth, data access, migrations, concurrency, public APIs, and untrusted input. Do not attempt exhaustive coverage.',
+            };
+
+  return { ...base, timeoutMs: resolveReviewTimeoutMs(env, base.timeoutMs), stats };
+}
+
+export function timeoutReviewResult(timeoutMs: number) {
+  return {
+    verdict: 'comment' as const,
+    summary: `Automated review timed out after ${Math.round(timeoutMs / 1000)}s. No reliable findings were produced.`,
+    findings: [],
+  };
+}
diff --git a/skills/code-review/SKILL.md b/skills/code-review/SKILL.md
@@ -18,6 +18,10 @@ these inputs:
 - `localConfigDir` — absolute path to the under-review repo's `.agents/`
   overrides, or empty if it ships none.
 - `overrideMode` — `merge` (defaults + local) or `replace` (local only).
+- `reviewBudget` — the workflow-computed effort budget for this PR. It includes
+  `tier`, `timeoutMs`, `maxToolCalls`, `allowSubagents`, `scope`, and diff
+  `stats` (`changedFiles`, `additions`, `deletions`). The workflow enforces the
+  timeout; you must shape your review to fit the rest.
 
 Throughout, refer to the PR with `gh`'s `--repo "$repo"` and the number
 `prNumber`. The `gh` CLI is authenticated via `GH_TOKEN`.
@@ -49,6 +53,10 @@ apply, regardless of repo or override:
   whole PR, or keep hunting for marginal nits once you have the real issues. A
   fast, confident review of the things that matter is the goal — extra
   deliberation rarely changes the verdict and burns time and tokens.
+- **Respect `reviewBudget`.** Treat `maxToolCalls` as a hard cap for shell/docs
+  calls you initiate, and never delegate when `allowSubagents` is false. Use
+  `scope` to decide how exhaustive to be. For `large` PRs, explicitly run a
+  scoped high-risk review rather than trying to inspect everything.
 
 ## Step 1 — Load guidance and repo context
 
@@ -98,10 +106,10 @@ gh api "repos/$repo/pulls/$prNumber/comments" \
   --jq '.[] | {path, line, user: .user.login, body}' 2>/dev/null || true
 ```
 
-Read the description to understand intent, then read the full diff. If the diff
-is large, focus your attention budget on the highest-risk files first
-(auth, payments, data access, migrations, concurrency, anything touching
-untrusted input).
+Read the description to understand intent, then read the diff according to
+`reviewBudget.scope`. If the diff is large, focus your attention budget on the
+highest-risk files first (auth, payments, data access, migrations, concurrency,
+anything touching untrusted input).
 
 Skim the existing discussion. Your own prior reviews are the ones ending with
 the `🤖 Automated review` footer; everything else is humans. You'll use this in

diff --git a/test/review-timeout.test.mjs b/test/review-timeout.test.mjs
@@ -0,0 +1,56 @@
+import assert from 'node:assert/strict';
+import test from 'node:test';
+import {
+  parseReviewStats,
+  resolveReviewBudget,
+  reviewStatsCommand,
+  resolveReviewTimeoutMs,
+  timeoutReviewResult,
+} from '../lib/review-timeout.ts';
+
+test('resolveReviewTimeoutMs defaults to eight minutes', () => {
+  assert.equal(resolveReviewTimeoutMs({}), 8 * 60_000);
+});
+
+test('resolveReviewTimeoutMs accepts REVIEW_TIMEOUT_MS', () => {
+  assert.equal(resolveReviewTimeoutMs({ REVIEW_TIMEOUT_MS: '1000' }), 1000);
+});
+
+test('resolveReviewTimeoutMs ignores invalid values', () => {
+  assert.equal(resolveReviewTimeoutMs({ REVIEW_TIMEOUT_MS: 'nope' }), 8 * 60_000);
+  assert.equal(resolveReviewTimeoutMs({ REVIEW_TIMEOUT_MS: '0' }), 8 * 60_000);
+});
+
+test('reviewStatsCommand quotes repo names', () => {
+  assert.equal(
+    reviewStatsCommand(12, "owner/re'po"),
+    "gh pr view 12 --repo 'owner/re'\\''po' --json additions,deletions,changedFiles",
+  );
+});
+
+test('parseReviewStats reads gh pr view output', () => {
+  assert.deepEqual(parseReviewStats('{"changedFiles":2,"additions":10,"deletions":5}'), {
+    changedFiles: 2,
+    additions: 10,
+    deletions: 5,
+  });
+});
+
+test('resolveReviewBudget chooses tiers from PR size', () => {
+  assert.equal(resolveReviewBudget({ changedFiles: 2, additions: 50, deletions: 25 }, {}).tier, 'tiny');
+  assert.equal(resolveReviewBudget({ changedFiles: 8, additions: 200, deletions: 100 }, {}).tier, 'small');
+  assert.equal(resolveReviewBudget({ changedFiles: 20, additions: 700, deletions: 200 }, {}).tier, 'medium');
+  assert.equal(resolveReviewBudget({ changedFiles: 40, additions: 1200, deletions: 100 }, {}).tier, 'large');
+});
+
+test('resolveReviewBudget lets REVIEW_TIMEOUT_MS override tier timeout', () => {
+  assert.equal(resolveReviewBudget({ changedFiles: 2, additions: 1, deletions: 1 }, { REVIEW_TIMEOUT_MS: '1234' }).timeoutMs, 1234);
+});
+
+test('timeoutReviewResult returns a non-blocking comment result', () => {
+  assert.deepEqual(timeoutReviewResult(1500), {
+    verdict: 'comment',
+    summary: 'Automated review timed out after 2s. No reliable findings were produced.',
+    findings: [],
+  });
+});
diff --git a/workflows/pr-review.ts b/workflows/pr-review.ts
@@ -8,6 +8,13 @@ import {
 import { local } from '@flue/runtime/node';
 import * as v from 'valibot';
 import { postReview, renderReview } from '../lib/review-comments.ts';
+import {
+  parseReviewStats,
+  resolveReviewBudget,
+  reviewStatsCommand,
+  timeoutReviewResult,
+  type ReviewStats,
+} from '../lib/review-timeout.ts';
 
 // ---------------------------------------------------------------------------
 // Global skills
@@ -121,21 +128,49 @@ export async function run({ init, payload, env }: FlueContext) {
     const harness = await init(agent, { tools: docs?.tools ?? [] });
     const session = await harness.session();
 
+    const stats: ReviewStats = { changedFiles: 999, additions: 1_000, deletions: 0 };
+    if (targetRepo) {
+      try {
+        const result = await harness.shell(reviewStatsCommand(prNumber, targetRepo), {
+          signal: AbortSignal.timeout(30_000),
+        });
+        if (result.exitCode === 0) {
+          Object.assign(stats, parseReviewStats(result.stdout));
+        } else {
+          console.error(`Failed to fetch PR stats for ${targetRepo}#${prNumber}; using conservative review budget.`);
+        }
+      } catch {
+        console.error(`Timed out fetching PR stats for ${targetRepo}#${prNumber}; using conservative review budget.`);
+      }
+    }
+    const reviewBudget = resolveReviewBudget(stats, env);
+
     // Activate the global review skill. It layers the central skills/prompts in
     // `agentDir` with any per-repo overrides in `localConfigDir` (per
     // `overrideMode`), fetches the diff with `gh`, and returns its findings.
-    const { data } = await session.skill('code-review', {
-      model: cfg.model,
-      args: {
-        prNumber,
-        repo: targetRepo,
-        agentDir: cfg.agentDir,
-        targetDir: cfg.targetDir,
-        localConfigDir: cfg.localConfigDir,
-        overrideMode: cfg.overrideMode,
-      },
-      result: SkillResult,
-    });
+    // REVIEW_TIMEOUT_MS overrides the tier-derived hard wall-clock kill switch.
+    const reviewSignal = AbortSignal.timeout(reviewBudget.timeoutMs);
+    let data: SkillResult;
+
+    try {
+      ({ data } = await session.skill('code-review', {
+        model: cfg.model,
+        signal: reviewSignal,
+        args: {
+          prNumber,
+          repo: targetRepo,
+          agentDir: cfg.agentDir,
+          targetDir: cfg.targetDir,
+          localConfigDir: cfg.localConfigDir,
+          overrideMode: cfg.overrideMode,
+          reviewBudget,
+        },
+        result: SkillResult,
+      }));
+    } catch (error) {
+      if (!reviewSignal.aborted) throw error;
+      data = timeoutReviewResult(reviewBudget.timeoutMs);
+    }
 
     // Post deterministically from the structured result — the model doesn't post.
     const posted = targetRepo ? await postReview(session, prNumber, targetRepo, renderReview(data)) : false;
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,4 +8,5 @@ dist/ @@
     !.env.example
     .dev.vars
-    .claude
+    .claude
+    docs