Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ dist/
!.env.example
.dev.vars

.claude
.claude
docs
84 changes: 84 additions & 0 deletions lib/review-timeout.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
const DEFAULT_REVIEW_TIMEOUT_MS = 8 * 60_000;

export type ReviewStats = {
changedFiles: number;
additions: number;
deletions: number;
};

export type ReviewBudget = {
tier: 'tiny' | 'small' | 'medium' | 'large';
timeoutMs: number;
maxToolCalls: number;
allowSubagents: boolean;
scope: string;
stats: ReviewStats;
};

export function resolveReviewTimeoutMs(env: Record<string, string | undefined>, fallback = DEFAULT_REVIEW_TIMEOUT_MS) {
const value = Number(env.REVIEW_TIMEOUT_MS ?? fallback);
return Number.isFinite(value) && value > 0 ? value : fallback;
}

function shellQuote(value: string): string {
return `'${value.replaceAll("'", "'\\''")}'`;
}

export function reviewStatsCommand(prNumber: number, repo: string) {
return `gh pr view ${prNumber} --repo ${shellQuote(repo)} --json additions,deletions,changedFiles`;
}

export function parseReviewStats(stdout: string): ReviewStats {
const parsed = JSON.parse(stdout) as Partial<ReviewStats>;
return {
changedFiles: Number(parsed.changedFiles) || 0,
additions: Number(parsed.additions) || 0,
deletions: Number(parsed.deletions) || 0,
};
}

export function resolveReviewBudget(stats: ReviewStats, env: Record<string, string | undefined>): ReviewBudget {
const lines = stats.additions + stats.deletions;
const base =
stats.changedFiles <= 3 && lines <= 100
? {
tier: 'tiny' as const,
timeoutMs: 3 * 60_000,
maxToolCalls: 6,
allowSubagents: false,
scope: 'Review the diff directly. Do not delegate. Return as soon as concrete findings are clear.',
}
: stats.changedFiles <= 10 && lines <= 400
? {
tier: 'small' as const,
timeoutMs: 5 * 60_000,
maxToolCalls: 10,
allowSubagents: false,
scope: 'Review the whole diff, with brief repo-context lookups only when needed. Do not delegate.',
}
: stats.changedFiles <= 25 && lines <= 1_000
? {
tier: 'medium' as const,
timeoutMs: 8 * 60_000,
maxToolCalls: 16,
allowSubagents: true,
scope: 'Review high-risk files first, then the rest of the diff if budget remains. Delegate at most once for clear security or correctness risk.',
}
: {
tier: 'large' as const,
timeoutMs: 12 * 60_000,
maxToolCalls: 20,
allowSubagents: true,
scope: 'Run a scoped review: prioritize auth, data access, migrations, concurrency, public APIs, and untrusted input. Do not attempt exhaustive coverage.',
};

return { ...base, timeoutMs: resolveReviewTimeoutMs(env, base.timeoutMs), stats };
}

export function timeoutReviewResult(timeoutMs: number) {
return {
verdict: 'comment' as const,
summary: `Automated review timed out after ${Math.round(timeoutMs / 1000)}s. No reliable findings were produced.`,
findings: [],
};
}
16 changes: 12 additions & 4 deletions skills/code-review/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ these inputs:
- `localConfigDir` — absolute path to the under-review repo's `.agents/`
overrides, or empty if it ships none.
- `overrideMode` — `merge` (defaults + local) or `replace` (local only).
- `reviewBudget` — the workflow-computed effort budget for this PR. It includes
`tier`, `timeoutMs`, `maxToolCalls`, `allowSubagents`, `scope`, and diff
`stats` (`changedFiles`, `additions`, `deletions`). The workflow enforces the
timeout; you must shape your review to fit the rest.

Throughout, refer to the PR with `gh`'s `--repo "$repo"` and the number
`prNumber`. The `gh` CLI is authenticated via `GH_TOKEN`.
Expand Down Expand Up @@ -49,6 +53,10 @@ apply, regardless of repo or override:
whole PR, or keep hunting for marginal nits once you have the real issues. A
fast, confident review of the things that matter is the goal — extra
deliberation rarely changes the verdict and burns time and tokens.
- **Respect `reviewBudget`.** Treat `maxToolCalls` as a hard cap for shell/docs
calls you initiate, and never delegate when `allowSubagents` is false. Use
`scope` to decide how exhaustive to be. For `large` PRs, explicitly run a
scoped high-risk review rather than trying to inspect everything.

## Step 1 — Load guidance and repo context

Expand Down Expand Up @@ -98,10 +106,10 @@ gh api "repos/$repo/pulls/$prNumber/comments" \
--jq '.[] | {path, line, user: .user.login, body}' 2>/dev/null || true
```

Read the description to understand intent, then read the full diff. If the diff
is large, focus your attention budget on the highest-risk files first
(auth, payments, data access, migrations, concurrency, anything touching
untrusted input).
Read the description to understand intent, then read the diff according to
`reviewBudget.scope`. If the diff is large, focus your attention budget on the
highest-risk files first (auth, payments, data access, migrations, concurrency,
anything touching untrusted input).

Skim the existing discussion. Your own prior reviews are the ones ending with
the `🤖 Automated review` footer; everything else is humans. You'll use this in
Expand Down
56 changes: 56 additions & 0 deletions test/review-timeout.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import assert from 'node:assert/strict';
import test from 'node:test';
import {
parseReviewStats,
resolveReviewBudget,
reviewStatsCommand,
resolveReviewTimeoutMs,
timeoutReviewResult,
} from '../lib/review-timeout.ts';

test('resolveReviewTimeoutMs defaults to eight minutes', () => {
assert.equal(resolveReviewTimeoutMs({}), 8 * 60_000);
});

test('resolveReviewTimeoutMs accepts REVIEW_TIMEOUT_MS', () => {
assert.equal(resolveReviewTimeoutMs({ REVIEW_TIMEOUT_MS: '1000' }), 1000);
});

test('resolveReviewTimeoutMs ignores invalid values', () => {
assert.equal(resolveReviewTimeoutMs({ REVIEW_TIMEOUT_MS: 'nope' }), 8 * 60_000);
assert.equal(resolveReviewTimeoutMs({ REVIEW_TIMEOUT_MS: '0' }), 8 * 60_000);
});

test('reviewStatsCommand quotes repo names', () => {
assert.equal(
reviewStatsCommand(12, "owner/re'po"),
"gh pr view 12 --repo 'owner/re'\\''po' --json additions,deletions,changedFiles",
);
});

test('parseReviewStats reads gh pr view output', () => {
assert.deepEqual(parseReviewStats('{"changedFiles":2,"additions":10,"deletions":5}'), {
changedFiles: 2,
additions: 10,
deletions: 5,
});
});

test('resolveReviewBudget chooses tiers from PR size', () => {
assert.equal(resolveReviewBudget({ changedFiles: 2, additions: 50, deletions: 25 }, {}).tier, 'tiny');
assert.equal(resolveReviewBudget({ changedFiles: 8, additions: 200, deletions: 100 }, {}).tier, 'small');
assert.equal(resolveReviewBudget({ changedFiles: 20, additions: 700, deletions: 200 }, {}).tier, 'medium');
assert.equal(resolveReviewBudget({ changedFiles: 40, additions: 1200, deletions: 100 }, {}).tier, 'large');
});

test('resolveReviewBudget lets REVIEW_TIMEOUT_MS override tier timeout', () => {
assert.equal(resolveReviewBudget({ changedFiles: 2, additions: 1, deletions: 1 }, { REVIEW_TIMEOUT_MS: '1234' }).timeoutMs, 1234);
});

test('timeoutReviewResult returns a non-blocking comment result', () => {
assert.deepEqual(timeoutReviewResult(1500), {
verdict: 'comment',
summary: 'Automated review timed out after 2s. No reliable findings were produced.',
findings: [],
});
});
59 changes: 47 additions & 12 deletions workflows/pr-review.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ import {
import { local } from '@flue/runtime/node';
import * as v from 'valibot';
import { postReview, renderReview } from '../lib/review-comments.ts';
import {
parseReviewStats,
resolveReviewBudget,
reviewStatsCommand,
timeoutReviewResult,
type ReviewStats,
} from '../lib/review-timeout.ts';

// ---------------------------------------------------------------------------
// Global skills
Expand Down Expand Up @@ -121,21 +128,49 @@ export async function run({ init, payload, env }: FlueContext) {
const harness = await init(agent, { tools: docs?.tools ?? [] });
const session = await harness.session();

const stats: ReviewStats = { changedFiles: 999, additions: 1_000, deletions: 0 };
if (targetRepo) {
try {
const result = await harness.shell(reviewStatsCommand(prNumber, targetRepo), {
signal: AbortSignal.timeout(30_000),
});
if (result.exitCode === 0) {
Object.assign(stats, parseReviewStats(result.stdout));
} else {
console.error(`Failed to fetch PR stats for ${targetRepo}#${prNumber}; using conservative review budget.`);
}
} catch {
console.error(`Timed out fetching PR stats for ${targetRepo}#${prNumber}; using conservative review budget.`);
}
}
const reviewBudget = resolveReviewBudget(stats, env);

// Activate the global review skill. It layers the central skills/prompts in
// `agentDir` with any per-repo overrides in `localConfigDir` (per
// `overrideMode`), fetches the diff with `gh`, and returns its findings.
const { data } = await session.skill('code-review', {
model: cfg.model,
args: {
prNumber,
repo: targetRepo,
agentDir: cfg.agentDir,
targetDir: cfg.targetDir,
localConfigDir: cfg.localConfigDir,
overrideMode: cfg.overrideMode,
},
result: SkillResult,
});
// REVIEW_TIMEOUT_MS overrides the tier-derived hard wall-clock kill switch.
const reviewSignal = AbortSignal.timeout(reviewBudget.timeoutMs);
let data: SkillResult;

try {
({ data } = await session.skill('code-review', {
model: cfg.model,
signal: reviewSignal,
args: {
prNumber,
repo: targetRepo,
agentDir: cfg.agentDir,
targetDir: cfg.targetDir,
localConfigDir: cfg.localConfigDir,
overrideMode: cfg.overrideMode,
reviewBudget,
},
result: SkillResult,
}));
} catch (error) {
if (!reviewSignal.aborted) throw error;
data = timeoutReviewResult(reviewBudget.timeoutMs);
}

// Post deterministically from the structured result — the model doesn't post.
const posted = targetRepo ? await postReview(session, prNumber, targetRepo, renderReview(data)) : false;
Expand Down
Loading