From a35e16eab744e94d8cd510517b548be60b4434c1 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:06:01 -0700 Subject: [PATCH 01/17] =?UTF-8?q?feat(cli):=20add=20`stats`=20subcommand?= =?UTF-8?q?=20=E2=80=94=20per-model=20React=20Doctor=20leaderboard=20from?= =?UTF-8?q?=20agent=20history?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `react-doctor stats`, which reads local AI agent history (Claude Code + Codex transcripts, the Cursor composer database), reconstructs the React code each model actually wrote, lints it with the existing engine, and ranks models and providers by a confidence-weighted React Doctor score. - Reconstructs faithful post-edit file content per provider (Claude snapshots, Cursor `afterContentId` blobs, Codex `apply_patch`), filtered to real React. - Confidence-weighted ranking: each group's raw score regresses toward the global mean by its evidence (files dominant, lightly discounted by sessions), so a tiny clean sample can't top the board. - Plain-language terminal leaderboard with color-coded tools (adds an `orange` to the shared highlighter for Claude); `--json` for the machine-readable report. --- .changeset/stats-agent-leaderboard.md | 14 ++ packages/core/src/highlighter.ts | 10 + .../react-doctor/src/cli/commands/stats.ts | 119 ++++++++++ packages/react-doctor/src/cli/index.ts | 51 ++++- .../react-doctor/src/cli/utils/constants.ts | 3 + .../src/cli/utils/strip-unknown-cli-flags.ts | 10 + .../react-doctor/src/stats/aggregate-stats.ts | 216 ++++++++++++++++++ packages/react-doctor/src/stats/constants.ts | 44 ++++ packages/react-doctor/src/stats/cursor-db.ts | 185 +++++++++++++++ .../src/stats/discover-sessions.ts | 40 ++++ .../react-doctor/src/stats/is-react-source.ts | 42 ++++ .../stats/materialize-reconstructed-tree.ts | 71 ++++++ .../src/stats/parse-apply-patch.ts | 124 ++++++++++ .../src/stats/reconstruct-files.ts | 136 +++++++++++ .../react-doctor/src/stats/render-stats.ts | 140 ++++++++++++ .../react-doctor/src/stats/run-stats-scan.ts | 156 +++++++++++++ .../react-doctor/src/stats/sources/claude.ts | 150 ++++++++++++ .../react-doctor/src/stats/sources/codex.ts | 85 +++++++ .../react-doctor/src/stats/sources/cursor.ts | 134 +++++++++++ .../react-doctor/src/stats/sources/index.ts | 26 +++ packages/react-doctor/src/stats/types.ts | 137 +++++++++++ .../src/stats/walk-transcripts.ts | 99 ++++++++ .../react-doctor/tests/stats-adapters.test.ts | 216 ++++++++++++++++++ .../tests/stats-aggregate.test.ts | 123 ++++++++++ .../tests/stats-apply-patch.test.ts | 60 +++++ .../tests/stats-is-react-source.test.ts | 56 +++++ .../tests/stats-reconstruct.test.ts | 116 ++++++++++ .../react-doctor/tests/stats-render.test.ts | 80 +++++++ 28 files changed, 2641 insertions(+), 2 deletions(-) create mode 100644 .changeset/stats-agent-leaderboard.md create mode 100644 packages/react-doctor/src/cli/commands/stats.ts create mode 100644 packages/react-doctor/src/stats/aggregate-stats.ts create mode 100644 packages/react-doctor/src/stats/constants.ts create mode 100644 packages/react-doctor/src/stats/cursor-db.ts create mode 100644 packages/react-doctor/src/stats/discover-sessions.ts create mode 100644 packages/react-doctor/src/stats/is-react-source.ts create mode 100644 packages/react-doctor/src/stats/materialize-reconstructed-tree.ts create mode 100644 packages/react-doctor/src/stats/parse-apply-patch.ts create mode 100644 packages/react-doctor/src/stats/reconstruct-files.ts create mode 100644 packages/react-doctor/src/stats/render-stats.ts create mode 100644 packages/react-doctor/src/stats/run-stats-scan.ts create mode 100644 packages/react-doctor/src/stats/sources/claude.ts create mode 100644 packages/react-doctor/src/stats/sources/codex.ts create mode 100644 packages/react-doctor/src/stats/sources/cursor.ts create mode 100644 packages/react-doctor/src/stats/sources/index.ts create mode 100644 packages/react-doctor/src/stats/types.ts create mode 100644 packages/react-doctor/src/stats/walk-transcripts.ts create mode 100644 packages/react-doctor/tests/stats-adapters.test.ts create mode 100644 packages/react-doctor/tests/stats-aggregate.test.ts create mode 100644 packages/react-doctor/tests/stats-apply-patch.test.ts create mode 100644 packages/react-doctor/tests/stats-is-react-source.test.ts create mode 100644 packages/react-doctor/tests/stats-reconstruct.test.ts create mode 100644 packages/react-doctor/tests/stats-render.test.ts diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md new file mode 100644 index 000000000..0d99b8e11 --- /dev/null +++ b/.changeset/stats-agent-leaderboard.md @@ -0,0 +1,14 @@ +--- +"react-doctor": minor +--- + +Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history. + +`stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus the Cursor composer database — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo". + +- Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. +- Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor. The terminal table shows both the raw `Score` and the `Weighted` score it ranks on. +- Cursor attribution reads the canonical composer database (`state.vscdb`) directly, so each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2`) and an exact post-edit snapshot of every edited file — the model-less agent-transcript JSONL files are no longer used. Attribution falls back to `unknown` only for chats left on the "Auto" model. +- Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work. +- `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows model + provider tables with score bars and a best/worst callout. +- Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), the Cursor composer database requires `node:sqlite` (Node 22.13+) and covers GUI agent sessions (not cursor-agent CLI runs), and the score requires network access. diff --git a/packages/core/src/highlighter.ts b/packages/core/src/highlighter.ts index 2beee1325..c216d3728 100644 --- a/packages/core/src/highlighter.ts +++ b/packages/core/src/highlighter.ts @@ -1,5 +1,13 @@ import pc from "picocolors"; +// picocolors only ships the 16-color palette, so orange (Claude's brand) is a +// 256-color escape built by hand. Honors color-disabled by returning the input. +const ORANGE_ANSI_CODE = 208; +const makeOrange = + (enabled: boolean): ((input: string | number) => string) => + (input) => + enabled ? `\u001b[38;5;${ORANGE_ANSI_CODE}m${input}\u001b[39m` : String(input); + export const highlighter = { error: pc.red, warn: pc.yellow, @@ -7,6 +15,7 @@ export const highlighter = { success: pc.green, dim: pc.dim, gray: pc.gray, + orange: makeOrange(pc.isColorSupported), bold: pc.bold, }; @@ -27,5 +36,6 @@ export const setColorEnabled = (enabled: boolean): void => { highlighter.success = colors.green; highlighter.dim = colors.dim; highlighter.gray = colors.gray; + highlighter.orange = makeOrange(enabled); highlighter.bold = colors.bold; }; diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts new file mode 100644 index 000000000..a132da3c4 --- /dev/null +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -0,0 +1,119 @@ +import * as path from "node:path"; +import { resolveScanTarget, type ReactDoctorConfig } from "@react-doctor/core"; +import { aggregateStats } from "../../stats/aggregate-stats.js"; +import { STATS_DEFAULT_SESSION_LIMIT } from "../../stats/constants.js"; +import { discoverSessions } from "../../stats/discover-sessions.js"; +import { renderStatsReport } from "../../stats/render-stats.js"; +import { runStatsScan } from "../../stats/run-stats-scan.js"; +import type { StatsProvider, StatsReport, StatsScopeOptions } from "../../stats/types.js"; +import { METRIC } from "../utils/constants.js"; +import { recordCount } from "../utils/record-metric.js"; +import { spinner } from "../utils/spinner.js"; + +export interface StatsFlags { + global?: boolean; + since?: string; + limit?: string; + provider?: string; + json?: boolean; + cwd?: string; +} + +const VALID_PROVIDERS = new Set(["claude", "codex", "cursor"]); + +const parseProvider = (value: string | undefined): StatsProvider | undefined => { + if (value === undefined) return undefined; + if (!VALID_PROVIDERS.has(value as StatsProvider)) { + throw new Error(`Unknown provider "${value}". Expected one of: claude, codex, cursor.`); + } + return value as StatsProvider; +}; + +const parseSince = (value: string | undefined): Date | undefined => { + if (value === undefined) return undefined; + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) { + throw new Error(`Invalid --since date "${value}". Use e.g. 2026-06-01.`); + } + return parsed; +}; + +const parseLimit = (value: string | undefined): number => { + if (value === undefined) return STATS_DEFAULT_SESSION_LIMIT; + const parsed = Number.parseInt(value, 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : STATS_DEFAULT_SESSION_LIMIT; +}; + +const resolveTarget = async ( + directory: string, +): Promise<{ root: string; userConfig: ReactDoctorConfig | null }> => { + try { + const target = await resolveScanTarget(directory); + return { root: target.resolvedDirectory, userConfig: target.userConfig }; + } catch { + return { root: path.resolve(directory), userConfig: null }; + } +}; + +export const statsAction = async (flags: StatsFlags): Promise => { + const directory = flags.cwd ?? process.cwd(); + const scope: StatsScopeOptions = { + global: flags.global ?? false, + since: parseSince(flags.since), + limit: parseLimit(flags.limit), + provider: parseProvider(flags.provider), + }; + + const { root, userConfig } = await resolveTarget(directory); + + // ora renders to stderr; suppress it in JSON mode so the run stays quiet. + const progress = flags.json ? null : spinner("Looking through your agent history…").start(); + let report: StatsReport; + let providerCount: number; + try { + const sessions = discoverSessions(root, scope); + progress?.update("Checking the code each agent wrote…"); + const results = await runStatsScan(sessions, scope.global ? null : root, { + onProgress: (completedCount, totalCount) => + progress?.update(`Checking the code each agent wrote… (${completedCount}/${totalCount})`), + }); + progress?.update("Scoring…"); + const aggregated = await aggregateStats(results, userConfig); + providerCount = aggregated.providers.length; + + report = { + scope: scope.global ? "global" : "repo", + directory: root, + models: aggregated.models, + providers: aggregated.providers, + best: aggregated.best, + worst: aggregated.worst, + sessionsAnalyzed: results.length, + sessionsRanked: results.filter((result) => result.filesScanned > 0).length, + sessionsNonReact: results.filter( + (result) => result.filesScanned === 0 && result.reconstructedFiles > 0, + ).length, + sessionsUnreconstructable: results.filter( + (result) => result.filesScanned === 0 && result.reconstructedFiles === 0, + ).length, + generatedAt: new Date().toISOString(), + }; + progress?.succeed("Done."); + } finally { + progress?.stop(); + } + + recordCount(METRIC.statsRun, 1, { + scope: report.scope, + sessions: report.sessionsAnalyzed, + providers: providerCount, + provider: scope.provider ?? "all", + }); + + if (flags.json) { + process.stdout.write(`${JSON.stringify({ schemaVersion: 1, ...report }, null, 2)}\n`); + return; + } + + process.stdout.write(`${renderStatsReport(report)}\n`); +}; diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts index f8425d28e..755dc8bcb 100644 --- a/packages/react-doctor/src/cli/index.ts +++ b/packages/react-doctor/src/cli/index.ts @@ -13,6 +13,7 @@ import { rulesSetAction, rulesUnignoreTagAction, } from "./commands/rules.js"; +import { statsAction } from "./commands/stats.js"; import { versionAction } from "./commands/version.js"; import { whyAction } from "./commands/why.js"; import { applyColorPreference } from "./utils/apply-color-preference.js"; @@ -80,8 +81,12 @@ ${formatExampleLines([ ])} ${highlighter.dim("Configuration:")} - Add a ${highlighter.info("doctor.config.ts")} (or .js/.mjs/.json — or a ${highlighter.info('"reactDoctor"')} key in your package.json) in the project root. - Use ${highlighter.info("react-doctor rules")} to list, explain, and configure rules. CLI flags always override config values. + Add a ${highlighter.info("doctor.config.ts")} (or .js/.mjs/.json — or a ${highlighter.info( + '"reactDoctor"', + )} key in your package.json) in the project root. + Use ${highlighter.info( + "react-doctor rules", + )} to list, explain, and configure rules. CLI flags always override config values. ${highlighter.dim("Feedback & bug reports:")} ${highlighter.info(`${CANONICAL_GITHUB_URL}/issues`)} @@ -103,6 +108,31 @@ ${highlighter.dim("Learn more:")} ${highlighter.info(CANONICAL_GITHUB_URL)} `; +const renderStatsHelpEpilog = (): string => ` +${highlighter.dim("Examples:")} +${formatExampleLines([ + ["react-doctor stats", "rank agents on sessions that touched this repo"], + ["react-doctor stats --global", "rank across every repository on this machine"], + ["react-doctor stats --provider claude", "only Claude Code sessions"], + ["react-doctor stats --since 2026-06-01", "only recent sessions"], + ["react-doctor stats --json", "machine-readable leaderboard"], +])} + +${highlighter.dim("How it works:")} + Reads local agent history (Claude Code + Codex transcripts, the Cursor + composer database), reconstructs the code each model wrote, lints it, and + ranks models + providers by score. + +${highlighter.dim("Caveats:")} + Codex shell-based edits aren't reconstructable (partial coverage). Cursor uses + the GUI composer database (cursor-agent CLI transcripts are not included), and + attribution falls back to "unknown" only for chats left on "Auto". The score + requires network access. + +${highlighter.dim("Learn more:")} + ${highlighter.info(CANONICAL_GITHUB_URL)} +`; + const collectCategoryOption = (value: string, previousValues: string[] | undefined): string[] => [ ...(previousValues ?? []), value, @@ -227,6 +257,23 @@ program .option("--no-color", "disable colored output (also honors NO_COLOR)") .action(versionAction); +program + .command("stats") + .description("Rank agents/models by the React Doctor health of the code they wrote") + .option("--global", "include sessions from every repository (default: this repo only)") + .option("--since ", "only sessions modified on or after this date (e.g. 2026-06-01)") + .option("--limit ", "max sessions to analyze, newest first (default: 200)") + .option("--provider ", "only one source: claude, codex, or cursor") + .option("--json", "output a structured JSON leaderboard") + .option("-c, --cwd ", "working directory", process.cwd()) + .option("--color", "force colored output") + .option("--no-color", "disable colored output (also honors NO_COLOR)") + .addHelpText("after", renderStatsHelpEpilog) + // HACK: `--json` is also declared on the root program, so Commander stashes + // it on the parent. Route through `optsWithGlobals()` so the merged option + // set (subcommand + inherited globals) is what the action sees. + .action((_options, command) => statsAction(command.optsWithGlobals())); + const rules = program .command("rules") .description("List, explain, and configure which React Doctor rules run"); diff --git a/packages/react-doctor/src/cli/utils/constants.ts b/packages/react-doctor/src/cli/utils/constants.ts index 7210b6230..508668180 100644 --- a/packages/react-doctor/src/cli/utils/constants.ts +++ b/packages/react-doctor/src/cli/utils/constants.ts @@ -181,6 +181,9 @@ export const METRIC = { installDependency: "install.dependency", rulesChanged: "rules.changed", rulesQueried: "rules.queried", + // `react-doctor stats`: one counter per run (adoption), with the providers + // discovered and the number of agent sessions scored as attributes. + statsRun: "stats.run", // Editor language server (`react-doctor experimental-lsp`). Each workspace // scan burst is one wide-event span (op `lsp.scan`) plus these metrics. lspSessionStarted: "lsp.session.started", diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts index 88502c907..84cd5d06d 100644 --- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts +++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts @@ -99,12 +99,22 @@ const WHY_FLAG_SPEC: CliFlagSpec = { shortOptionsWithRequiredValues: new Set(["-c"]), }; +// `stats` takes no positionals — just the scope/output options. +const STATS_FLAG_SPEC: CliFlagSpec = { + longOptionsWithoutValues: new Set(["--color", "--global", "--help", "--json", "--no-color"]), + longOptionsWithRequiredValues: new Set(["--cwd", "--limit", "--provider", "--since"]), + longOptionsWithOptionalValues: new Set(), + shortOptionsWithoutValues: new Set(["-h"]), + shortOptionsWithRequiredValues: new Set(["-c"]), +}; + const COMMAND_FLAG_SPECS = new Map([ ["install", INSTALL_FLAG_SPEC], ["setup", INSTALL_FLAG_SPEC], ["version", VERSION_FLAG_SPEC], ["rules", RULES_FLAG_SPEC], ["why", WHY_FLAG_SPEC], + ["stats", STATS_FLAG_SPEC], ]); const isFlagLike = (argument: string): boolean => argument.startsWith("-") && argument !== "-"; diff --git a/packages/react-doctor/src/stats/aggregate-stats.ts b/packages/react-doctor/src/stats/aggregate-stats.ts new file mode 100644 index 000000000..f04e4db64 --- /dev/null +++ b/packages/react-doctor/src/stats/aggregate-stats.ts @@ -0,0 +1,216 @@ +import { + calculateScore, + filterDiagnosticsForSurface, + type Diagnostic, + type ReactDoctorConfig, + type ScoreResult, +} from "@react-doctor/core"; +import { + STATS_MIN_FILES_FOR_SCORE, + STATS_SCORE_PRIOR_FILES, + STATS_SCORE_SESSION_FLOOR, + STATS_SCORE_SESSION_PRIOR, + STATS_TOP_RULES_PER_GROUP, +} from "./constants.js"; +import type { GroupStats, SessionScanResult, StatsProvider } from "./types.js"; + +/** Computes a 0-100 score for a diagnostic set. Injectable for tests. */ +export type ScoreComputer = ( + diagnostics: Diagnostic[], + sourceFileCount: number, +) => Promise; + +const defaultScoreComputer: ScoreComputer = (diagnostics, sourceFileCount) => + calculateScore(diagnostics, { metadata: { sourceFileCount } }); + +interface Accumulator { + readonly key: string; + readonly provider: StatsProvider | "mixed"; + sessions: number; + filesScanned: number; + unreconstructable: number; + diagnostics: Diagnostic[]; +} + +const upsert = ( + groups: Map, + key: string, + provider: StatsProvider | "mixed", + result: SessionScanResult, +): void => { + let group = groups.get(key); + if (!group) { + group = { key, provider, sessions: 0, filesScanned: 0, unreconstructable: 0, diagnostics: [] }; + groups.set(key, group); + } + group.sessions += 1; + group.filesScanned += result.filesScanned; + group.unreconstructable += result.unreconstructable; + group.diagnostics.push(...result.diagnostics); +}; + +/** + * Confidence-weight a raw score with a Bayesian average: pull it toward the + * global mean (`priorScore`) by the group's evidence. Files are the dominant + * sample unit; sessions only lightly discount the file weight (many files from + * one session are one correlated sample), bounded below by a floor so a + * file-rich, session-poor group still counts. Low-evidence groups regress to the + * mean; high-evidence groups keep their raw score. Returns the raw score when + * there's no prior. + */ +export const confidenceWeightedScore = ( + rawScore: number | null, + priorScore: number | null, + filesScanned: number, + sessions: number, +): number | null => { + if (rawScore === null) return null; + if (priorScore === null) return rawScore; + const sessionReliability = + STATS_SCORE_SESSION_FLOOR + + (1 - STATS_SCORE_SESSION_FLOOR) * (sessions / (sessions + STATS_SCORE_SESSION_PRIOR)); + const effectiveFiles = filesScanned * sessionReliability; + return Math.round( + (priorScore * STATS_SCORE_PRIOR_FILES + rawScore * effectiveFiles) / + (STATS_SCORE_PRIOR_FILES + effectiveFiles), + ); +}; + +const topRules = (diagnostics: ReadonlyArray): GroupStats["topRules"] => { + const counts = new Map(); + for (const diagnostic of diagnostics) { + const ruleKey = `${diagnostic.plugin}/${diagnostic.rule}`; + counts.set(ruleKey, (counts.get(ruleKey) ?? 0) + 1); + } + return [...counts.entries()] + .sort((left, right) => right[1] - left[1]) + .slice(0, STATS_TOP_RULES_PER_GROUP) + .map(([rule, count]) => ({ rule, count })); +}; + +const toGroupStats = async ( + accumulator: Accumulator, + userConfig: ReactDoctorConfig | null, + computeScore: ScoreComputer, + priorScore: number | null, +): Promise => { + const errorCount = accumulator.diagnostics.filter( + (diagnostic) => diagnostic.severity === "error", + ).length; + const scoreEligible = accumulator.filesScanned >= STATS_MIN_FILES_FOR_SCORE; + const score = scoreEligible + ? await computeScore( + filterDiagnosticsForSurface(accumulator.diagnostics, "score", userConfig), + accumulator.filesScanned, + ) + : null; + const rawScore = score?.score ?? null; + + return { + key: accumulator.key, + provider: accumulator.provider, + sessions: accumulator.sessions, + filesScanned: accumulator.filesScanned, + unreconstructable: accumulator.unreconstructable, + totalDiagnostics: accumulator.diagnostics.length, + errorCount, + warningCount: accumulator.diagnostics.length - errorCount, + diagnosticsPerFile: + accumulator.filesScanned > 0 ? accumulator.diagnostics.length / accumulator.filesScanned : 0, + score: rawScore, + scoreLabel: score?.label ?? null, + weightedScore: confidenceWeightedScore( + rawScore, + priorScore, + accumulator.filesScanned, + accumulator.sessions, + ), + topRules: topRules(accumulator.diagnostics), + }; +}; + +/** + * Rank groups best-first by the confidence-weighted score; ties (and score-less + * groups) break on fewer diagnostics-per-file. Only groups with enough scanned + * files to be ranked fairly are returned. + */ +const rankGroups = (groups: ReadonlyArray): GroupStats[] => + [...groups] + .filter((group) => group.filesScanned >= STATS_MIN_FILES_FOR_SCORE) + .sort((left, right) => { + if ( + left.weightedScore !== null && + right.weightedScore !== null && + left.weightedScore !== right.weightedScore + ) { + return right.weightedScore - left.weightedScore; + } + if (left.weightedScore !== null && right.weightedScore === null) return -1; + if (left.weightedScore === null && right.weightedScore !== null) return 1; + return left.diagnosticsPerFile - right.diagnosticsPerFile; + }); + +export interface AggregatedStats { + readonly models: GroupStats[]; + readonly providers: GroupStats[]; + readonly best: GroupStats | null; + readonly worst: GroupStats | null; +} + +/** + * Group scan results by model and by provider, compute a 0-100 score per group + * (one Score API call each), and rank them into a leaderboard. + */ +export const aggregateStats = async ( + results: ReadonlyArray, + userConfig: ReactDoctorConfig | null, + computeScore: ScoreComputer = defaultScoreComputer, +): Promise => { + const modelGroups = new Map(); + const providerGroups = new Map(); + for (const result of results) { + upsert( + modelGroups, + `${result.session.provider}/${result.session.model}`, + result.session.provider, + result, + ); + upsert(providerGroups, result.session.provider, result.session.provider, result); + } + + // Global mean across every scanned file — the prior every group regresses + // toward, so a small sample can't top the board on a lucky run. + const totalFiles = results.reduce((sum, result) => sum + result.filesScanned, 0); + const priorScore = + totalFiles >= STATS_MIN_FILES_FOR_SCORE + ? (( + await computeScore( + filterDiagnosticsForSurface( + results.flatMap((result) => result.diagnostics), + "score", + userConfig, + ), + totalFiles, + ) + )?.score ?? null) + : null; + + const models = await Promise.all( + [...modelGroups.values()].map((group) => + toGroupStats(group, userConfig, computeScore, priorScore), + ), + ); + const providers = await Promise.all( + [...providerGroups.values()].map((group) => + toGroupStats(group, userConfig, computeScore, priorScore), + ), + ); + + const rankedModels = rankGroups(models); + return { + models: rankedModels, + providers: rankGroups(providers), + best: rankedModels[0] ?? null, + worst: rankedModels.length > 1 ? rankedModels[rankedModels.length - 1] : null, + }; +}; diff --git a/packages/react-doctor/src/stats/constants.ts b/packages/react-doctor/src/stats/constants.ts new file mode 100644 index 000000000..d2320eff0 --- /dev/null +++ b/packages/react-doctor/src/stats/constants.ts @@ -0,0 +1,44 @@ +// Source file extensions React Doctor can lint. Reconstructed files outside +// this allowlist are dropped before scanning (assets, notebooks, markdown). +export const STATS_LINTABLE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"] as const; + +// Default cap on agent sessions scanned in one `stats` run. Each session is one +// oxlint subprocess, so an unbounded run over a machine's whole history could +// spawn thousands. Bounded by default; raise with `--limit`. +export const STATS_DEFAULT_SESSION_LIMIT = 200; + +// Concurrent per-session lint scans. Each scan pins oxlint to a single worker, +// so this is the real fan-out across CPU cores. +export const STATS_SCAN_CONCURRENCY = 6; + +// Temp-dir prefix for a per-session reconstructed source tree. +export const STATS_TEMP_DIR_PREFIX = "react-doctor-stats-"; + +// A group (model/provider) needs at least this many scanned files before its +// score is shown; below it the sample is too small to rank fairly. +export const STATS_MIN_FILES_FOR_SCORE = 3; + +// Confidence weighting for the leaderboard. A group's raw 0-100 score is pulled +// toward the global mean by a Bayesian average so a model can't top the board on +// a handful of files. The prior carries this many "average" effective files of +// weight; a group needs more effective files than this before its own score +// dominates the prior. +export const STATS_SCORE_PRIOR_FILES = 25; + +// Sessions discount the file weight (many files from a single session are one +// correlated sample), but only mildly — files are the heavier signal. Session +// reliability ramps from the floor below toward 1 as sessions grow: +// reliability = FLOOR + (1 - FLOOR) * sessions / (sessions + PRIOR) +export const STATS_SCORE_SESSION_PRIOR = 2; + +// Floor on session reliability: a group keeps at least this fraction of its file +// weight no matter how few sessions it has, so sessions can only shave off the +// remaining (1 - FLOOR). Closer to 1 = files dominate even harder. +export const STATS_SCORE_SESSION_FLOOR = 0.6; + +// Most-fired rules shown per group in the report. +export const STATS_TOP_RULES_PER_GROUP = 3; + +// Label used when a session does not expose a stable model id (e.g. a Cursor +// composer left on the "Auto" default with no per-bubble model recorded). +export const STATS_UNKNOWN_MODEL = "unknown"; diff --git a/packages/react-doctor/src/stats/cursor-db.ts b/packages/react-doctor/src/stats/cursor-db.ts new file mode 100644 index 000000000..bbf16a109 --- /dev/null +++ b/packages/react-doctor/src/stats/cursor-db.ts @@ -0,0 +1,185 @@ +import * as fs from "node:fs"; +import { createRequire } from "node:module"; +import * as os from "node:os"; +import * as path from "node:path"; + +const nodeRequire = createRequire(import.meta.url); + +// Cursor persists chat state in a single SQLite file. The GUI agent's model +// selection, tool calls (edits), and full post-edit file snapshots all live in +// here — the agent-transcript JSONL files do not record the model at all. +const CURSOR_DB_RELATIVE_PATH = path.join("User", "globalStorage", "state.vscdb"); +const COMPOSER_DATA_PREFIX = "composerData:"; +const BUBBLE_PREFIX = "bubbleId:"; +const COMPOSER_HEADERS_KEY = "composer.composerHeaders"; + +/** One chat in the composer index, with its newest-activity timestamp. */ +interface CursorComposerHeader { + readonly composerId: string; + readonly modifiedMs: number; +} + +/** Read-only accessor over the Cursor composer database. */ +interface CursorDbHandle { + composerHeaders(): CursorComposerHeader[]; + composerValue(composerId: string): string | null; + bubbleValues(composerId: string): string[]; + contentValue(contentId: string): string | null; +} + +export type { CursorComposerHeader, CursorDbHandle }; + +const asString = (value: unknown): string | null => (typeof value === "string" ? value : null); + +const asRecord = (value: unknown): Record | null => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : null; + +const cursorAppDir = (): string => { + if (process.platform === "darwin") { + return path.join(os.homedir(), "Library", "Application Support", "Cursor"); + } + if (process.platform === "win32") { + const appData = process.env.APPDATA ?? path.join(os.homedir(), "AppData", "Roaming"); + return path.join(appData, "Cursor"); + } + const configHome = process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), ".config"); + return path.join(configHome, "Cursor"); +}; + +/** + * Absolute path to the Cursor composer database, honoring a + * `REACT_DOCTOR_CURSOR_DB` override (used by tests). Returns `null` when no + * readable database exists. + */ +export const resolveCursorDbPath = (): string | null => { + const candidate = + process.env.REACT_DOCTOR_CURSOR_DB ?? path.join(cursorAppDir(), CURSOR_DB_RELATIVE_PATH); + return fs.existsSync(candidate) ? candidate : null; +}; + +const modifiedMsFromHeader = (head: Record): number => { + const lastUpdatedAt = head.lastUpdatedAt; + if (typeof lastUpdatedAt === "number") return lastUpdatedAt; + const createdAt = head.createdAt; + if (typeof createdAt === "number") return createdAt; + return 0; +}; + +const parseComposerHeaders = (raw: string): CursorComposerHeader[] => { + let decoded: unknown; + try { + decoded = JSON.parse(raw); + } catch { + return []; + } + const record = asRecord(decoded); + const list = Array.isArray(decoded) + ? decoded + : record && Array.isArray(record.allComposers) + ? record.allComposers + : []; + const headers: CursorComposerHeader[] = []; + for (const entry of list) { + const head = asRecord(entry); + const composerId = head && asString(head.composerId); + if (head && composerId) { + headers.push({ composerId, modifiedMs: modifiedMsFromHeader(head) }); + } + } + return headers; +}; + +// node:sqlite returns each row as an object keyed by column name. +const rowValueString = (row: unknown): string | null => { + const record = asRecord(row); + return record ? asString(record.value) : null; +}; + +// The exclusive upper bound for a key prefix: the prefix with its last byte +// incremented. A `key >= prefix AND key < upper` range always uses the primary +// key index, unlike `LIKE 'prefix%'`, which a BINARY-collated index can't serve +// (and so falls back to a full scan of the multi-GB database). +const prefixUpperBound = (prefix: string): string => + prefix.slice(0, -1) + String.fromCharCode(prefix.charCodeAt(prefix.length - 1) + 1); + +const makeHandle = (dbPath: string): CursorDbHandle | null => { + let database: { + prepare(sql: string): { + get(...params: unknown[]): unknown; + all(...params: unknown[]): unknown[]; + }; + }; + try { + // `node:sqlite` is built in on Node 22.13+/24+; absent on older Node, where + // the require throws and Cursor stats degrade to "no sessions found". + const { DatabaseSync } = nodeRequire("node:sqlite"); + database = new DatabaseSync(dbPath, { readOnly: true }); + } catch { + return null; + } + + const headersStatement = database.prepare(`SELECT value FROM ItemTable WHERE key = ?`); + const composerStatement = database.prepare(`SELECT value FROM cursorDiskKV WHERE key = ?`); + const bubbleStatement = database.prepare( + `SELECT value FROM cursorDiskKV WHERE key >= ? AND key < ?`, + ); + + return { + composerHeaders(): CursorComposerHeader[] { + try { + const raw = rowValueString(headersStatement.get(COMPOSER_HEADERS_KEY)); + return raw ? parseComposerHeaders(raw) : []; + } catch { + return []; + } + }, + composerValue(composerId: string): string | null { + try { + return rowValueString(composerStatement.get(`${COMPOSER_DATA_PREFIX}${composerId}`)); + } catch { + return null; + } + }, + bubbleValues(composerId: string): string[] { + try { + const prefix = `${BUBBLE_PREFIX}${composerId}:`; + const rows = bubbleStatement.all(prefix, prefixUpperBound(prefix)); + const values: string[] = []; + for (const row of rows) { + const value = rowValueString(row); + if (value) values.push(value); + } + return values; + } catch { + return []; + } + }, + contentValue(contentId: string): string | null { + try { + return rowValueString(composerStatement.get(contentId)); + } catch { + return null; + } + }, + }; +}; + +// One open handle per process — opening is cheap (SQLite memory-maps lazily), +// but reopening per composer during a scan would thrash. `closeCursorDb` resets +// it for tests; the CLI relies on process exit. +let cachedHandle: { dbPath: string; handle: CursorDbHandle | null } | null = null; + +/** Open (and memoize) the composer database, or `null` when unavailable. */ +export const openCursorDb = (dbPath: string | null): CursorDbHandle | null => { + if (!dbPath) return null; + if (cachedHandle && cachedHandle.dbPath === dbPath) return cachedHandle.handle; + cachedHandle = { dbPath, handle: makeHandle(dbPath) }; + return cachedHandle.handle; +}; + +/** Drop the memoized handle (tests open fresh fixture databases). */ +export const closeCursorDb = (): void => { + cachedHandle = null; +}; diff --git a/packages/react-doctor/src/stats/discover-sessions.ts b/packages/react-doctor/src/stats/discover-sessions.ts new file mode 100644 index 000000000..9b7f87f4b --- /dev/null +++ b/packages/react-doctor/src/stats/discover-sessions.ts @@ -0,0 +1,40 @@ +import * as path from "node:path"; +import { STATS_SOURCES } from "./sources/index.js"; +import { resolveEditPaths } from "./reconstruct-files.js"; +import type { AgentSession, StatsScopeOptions } from "./types.js"; + +const isPathUnder = (childPath: string, parentPath: string): boolean => { + const relative = path.relative(parentPath, childPath); + return !relative.startsWith("..") && !path.isAbsolute(relative); +}; + +const sessionTouchesRepo = (session: AgentSession, repoRoot: string): boolean => { + if (session.cwd && isPathUnder(session.cwd, repoRoot)) return true; + return resolveEditPaths(session).some((editPath) => isPathUnder(editPath, repoRoot)); +}; + +/** + * Enumerate, load, and scope-filter agent sessions. By default only sessions + * that touched `repoRoot` are kept; `--global` lifts that. `--since` and + * `--limit` bound cost (candidates are loaded newest-first, and loading is lazy + * so capped runs never touch the whole history). Sessions with no edits are + * dropped. + */ +export const discoverSessions = (repoRoot: string, scope: StatsScopeOptions): AgentSession[] => { + const candidates = STATS_SOURCES.filter( + (source) => !scope.provider || source.name === scope.provider, + ).flatMap((source) => source.candidates()); + candidates.sort((left, right) => right.modifiedMs - left.modifiedMs); + + const sinceMs = scope.since ? scope.since.getTime() : null; + const sessions: AgentSession[] = []; + for (const candidate of candidates) { + if (sinceMs !== null && candidate.modifiedMs > 0 && candidate.modifiedMs < sinceMs) break; + const session = candidate.load(); + if (!session || session.edits.length === 0) continue; + if (!scope.global && !sessionTouchesRepo(session, repoRoot)) continue; + sessions.push(session); + if (sessions.length >= scope.limit) break; + } + return sessions; +}; diff --git a/packages/react-doctor/src/stats/is-react-source.ts b/packages/react-doctor/src/stats/is-react-source.ts new file mode 100644 index 000000000..f220eeb89 --- /dev/null +++ b/packages/react-doctor/src/stats/is-react-source.ts @@ -0,0 +1,42 @@ +// JSX-bearing extensions imply a React (or React-like) component in this +// product's universe — `.ts`/`.js` cannot hold JSX, so they need a content +// signal instead. +const JSX_EXTENSION_PATTERN = /\.(tsx|jsx)$/; + +// `"use client"` / `"use server"` directives mark React Server Component +// boundaries and server actions — React code even without a `react` import. +const REACT_DIRECTIVE_PATTERN = /^\s*['"]use (?:client|server)['"]/m; + +// Every `from "…"`, `require("…")`, and `import("…")` specifier in a file. +const MODULE_SPECIFIER_PATTERN = /(?:\bfrom\s*|\brequire\(\s*|\bimport\(\s*)['"]([^'"]+)['"]/g; + +// React framework packages that don't carry "react" in their name. +const REACT_FRAMEWORK_ROOTS = ["next", "expo", "gatsby", "@remix-run", "@shopify/hydrogen"]; + +const isReactModuleSpecifier = (specifier: string): boolean => { + const lower = specifier.toLowerCase(); + if (lower === "react" || lower.startsWith("react/") || lower.startsWith("react-")) return true; + // Scoped/nested React packages: `@tanstack/react-query`, `@react-navigation/native`, … + if (lower.includes("/react-") || lower.endsWith("/react") || lower.startsWith("@react-")) { + return true; + } + if (lower === "preact" || lower.startsWith("preact/")) return true; + return REACT_FRAMEWORK_ROOTS.some((root) => lower === root || lower.startsWith(`${root}/`)); +}; + +/** + * Whether a reconstructed file is actually React code worth ranking. React + * Doctor's rules are React-specific, so a model's plain backend/util/config + * files would otherwise pad its file count and dilute its diagnostics-per-file + * — skewing the leaderboard toward whoever wrote the most non-React code. A + * file qualifies when it has a JSX extension, a `use client`/`use server` + * directive, or imports from the React ecosystem. + */ +export const isReactSourceFile = (filePath: string, content: string): boolean => { + if (JSX_EXTENSION_PATTERN.test(filePath)) return true; + if (REACT_DIRECTIVE_PATTERN.test(content)) return true; + for (const match of content.matchAll(MODULE_SPECIFIER_PATTERN)) { + if (isReactModuleSpecifier(match[1])) return true; + } + return false; +}; diff --git a/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts new file mode 100644 index 000000000..744961598 --- /dev/null +++ b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts @@ -0,0 +1,71 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "@react-doctor/core"; +import { STATS_TEMP_DIR_PREFIX } from "./constants.js"; +import type { ReconstructedFile } from "./types.js"; + +export interface MaterializedReconstruction { + readonly tempDirectory: string; + /** `realpath` of `tempDirectory` (macOS symlinks `/var` → `/private/var`). */ + readonly realTempDirectory: string; + readonly relativePaths: string[]; + readonly cleanup: () => void; +} + +const isInsideDirectory = (childPath: string, parentPath: string): boolean => { + const relative = path.relative(parentPath, childPath); + return Boolean(relative) && !relative.startsWith("..") && !path.isAbsolute(relative); +}; + +/** + * Write reconstructed file content into a fresh temp tree mirroring the scan + * layout, copying the project-config files (`tsconfig` / `package.json` / + * `doctor.config` / oxlintrc) from `scanRoot` so oxlint resolves the same + * config it would in the real project. In-memory sibling of core's + * `materializeSourceTree` (which reads from git); the zip-slip guard mirrors it. + */ +export const materializeReconstructedTree = ( + scanRoot: string, + files: ReadonlyArray, +): MaterializedReconstruction => { + const tempDirectory = fs.mkdtempSync(path.join(os.tmpdir(), STATS_TEMP_DIR_PREFIX)); + const resolvedTempDirectory = path.resolve(tempDirectory); + const relativePaths: string[] = []; + + for (const file of files) { + const targetPath = path.resolve(resolvedTempDirectory, file.relativePath); + if (!isInsideDirectory(targetPath, resolvedTempDirectory)) continue; + fs.mkdirSync(path.dirname(targetPath), { recursive: true }); + fs.writeFileSync(targetPath, file.content); + relativePaths.push(file.relativePath); + } + + for (const configFilename of STAGED_FILES_PROJECT_CONFIG_FILENAMES) { + const sourcePath = path.join(scanRoot, configFilename); + const targetPath = path.join(resolvedTempDirectory, configFilename); + if (fs.existsSync(sourcePath) && !fs.existsSync(targetPath)) { + fs.cpSync(sourcePath, targetPath, { recursive: true }); + } + } + + let realTempDirectory = resolvedTempDirectory; + try { + realTempDirectory = fs.realpathSync(resolvedTempDirectory); + } catch { + realTempDirectory = resolvedTempDirectory; + } + + return { + tempDirectory: resolvedTempDirectory, + realTempDirectory, + relativePaths, + cleanup: () => { + try { + fs.rmSync(resolvedTempDirectory, { recursive: true, force: true }); + } catch { + // Best-effort; the OS tempdir reaper eventually runs. + } + }, + }; +}; diff --git a/packages/react-doctor/src/stats/parse-apply-patch.ts b/packages/react-doctor/src/stats/parse-apply-patch.ts new file mode 100644 index 000000000..c01f04979 --- /dev/null +++ b/packages/react-doctor/src/stats/parse-apply-patch.ts @@ -0,0 +1,124 @@ +export type PatchOpType = "add" | "update" | "delete"; + +export interface PatchOp { + readonly type: PatchOpType; + readonly path: string; + /** For `add`: the full file content lines (without the leading `+`). */ + readonly addedLines?: string[]; + /** For `update`: the raw hunk body lines (` `/`+`/`-`/`@@`). */ + readonly hunkLines?: string[]; + /** For `update` with a `*** Move to:` directive. */ + readonly movePath?: string; +} + +const FILE_HEADER = /^\*\*\* (Add|Update|Delete) File: (.+)$/; +const MOVE_HEADER = /^\*\*\* Move to: (.+)$/; + +/** + * Parse a Codex / Cursor `apply_patch` envelope (`*** Begin Patch` … + * `*** End Patch`) into per-file operations. The format carries no line + * numbers, so `update` ops keep their raw hunk body for a fuzzy line-search + * apply at reconstruction time. Returns `[]` when no file header is found. + */ +export const parseApplyPatch = (patchText: string): PatchOp[] => { + const lines = patchText.split("\n"); + const ops: PatchOp[] = []; + let current: { type: PatchOpType; path: string; movePath?: string; body: string[] } | null = null; + + const flush = (): void => { + if (!current) return; + if (current.type === "add") { + ops.push({ + type: "add", + path: current.path, + addedLines: current.body + .filter((line) => line.startsWith("+")) + .map((line) => line.slice(1)), + }); + } else if (current.type === "update") { + ops.push({ + type: "update", + path: current.path, + hunkLines: current.body, + ...(current.movePath ? { movePath: current.movePath } : {}), + }); + } else { + ops.push({ type: "delete", path: current.path }); + } + current = null; + }; + + for (const line of lines) { + if (line.startsWith("*** Begin Patch") || line.startsWith("*** End Patch")) continue; + const header = FILE_HEADER.exec(line); + if (header) { + flush(); + const kind = header[1].toLowerCase(); + current = { + type: kind === "add" ? "add" : kind === "delete" ? "delete" : "update", + path: header[2].trim(), + body: [], + }; + continue; + } + if (!current) continue; + const move = MOVE_HEADER.exec(line); + if (move && current.type === "update") { + current.movePath = move[1].trim(); + continue; + } + current.body.push(line); + } + flush(); + return ops; +}; + +/** + * Apply an `update` hunk body to `baseContent` using a forward line search + * (the apply_patch format omits line numbers). Returns the new content, or + * `null` when a context / removed line can't be located — the caller then + * treats the file as unreconstructable rather than linting wrong content. + */ +export const applyUpdateHunks = (baseContent: string, hunkLines: string[]): string | null => { + const baseLines = baseContent.split("\n"); + const result: string[] = []; + let cursor = 0; + + const consumeUntil = (text: string): boolean => { + for (let index = cursor; index < baseLines.length; index += 1) { + if (baseLines[index] === text) { + for (let copy = cursor; copy < index; copy += 1) result.push(baseLines[copy]); + cursor = index + 1; + return true; + } + } + return false; + }; + + for (const line of hunkLines) { + if (line.startsWith("@@")) continue; + if (line === "") { + // A bare blank line in a hunk is an unchanged empty context line. + if (!consumeUntil("")) return null; + result.push(""); + continue; + } + const tag = line[0]; + const text = line.slice(1); + if (tag === " ") { + if (!consumeUntil(text)) return null; + result.push(text); + } else if (tag === "-") { + if (!consumeUntil(text)) return null; + } else if (tag === "+") { + result.push(text); + } else { + // Unknown prefix — treat as context to stay lenient. + if (!consumeUntil(line)) return null; + result.push(line); + } + } + + for (let index = cursor; index < baseLines.length; index += 1) result.push(baseLines[index]); + return result.join("\n"); +}; diff --git a/packages/react-doctor/src/stats/reconstruct-files.ts b/packages/react-doctor/src/stats/reconstruct-files.ts new file mode 100644 index 000000000..45ff72b8f --- /dev/null +++ b/packages/react-doctor/src/stats/reconstruct-files.ts @@ -0,0 +1,136 @@ +import * as path from "node:path"; +import { STATS_LINTABLE_EXTENSIONS } from "./constants.js"; +import { applyUpdateHunks, parseApplyPatch } from "./parse-apply-patch.js"; +import type { AgentSession, ReconstructedContent, SessionReconstruction } from "./types.js"; + +export const isLintablePath = (filePath: string): boolean => + STATS_LINTABLE_EXTENSIONS.some((extension) => filePath.endsWith(extension)); + +const resolveAgainstCwd = (rawPath: string, cwd: string | null): string | null => { + if (!rawPath) return null; + if (path.isAbsolute(rawPath)) return path.normalize(rawPath); + if (!cwd) return null; + return path.resolve(cwd, rawPath); +}; + +const applyStringReplace = ( + source: string, + oldString: string, + newString: string, + replaceAll: boolean, +): string => { + if (oldString === "") return source; + if (!source.includes(oldString)) return source; + return replaceAll + ? source.split(oldString).join(newString) + : source.replace(oldString, newString); +}; + +/** + * Every absolute file path an agent's edits reference (resolving relatives + * against the session cwd and parsing apply_patch envelopes). Used by scope + * filtering to decide whether a session touched the current repo. + */ +export const resolveEditPaths = (session: AgentSession): string[] => { + const paths = new Set(); + const add = (rawPath: string): void => { + const resolved = resolveAgainstCwd(rawPath, session.cwd); + if (resolved) paths.add(resolved); + }; + for (const edit of session.edits) { + if (edit.kind === "patch") { + for (const op of parseApplyPatch(edit.patch ?? "")) add(op.path); + } else { + add(edit.path); + } + } + return [...paths]; +}; + +/** + * Replay a session's edits into the final content of each touched file, as the + * model left it (Tier 2). Only files with a faithful base (a full write, an + * apply_patch `Add File`, or a captured read) and a lintable extension are + * emitted; anything edited without a faithful base is reported as + * `unreconstructable` and never linted with wrong content. + */ +export const reconstructSession = (session: AgentSession): SessionReconstruction => { + // `string` = current content, `null` = deleted. Absent = no faithful base yet. + const buffers = new Map(); + const touchedLintable = new Set(); + + for (const read of session.reads) { + const resolved = resolveAgainstCwd(read.path, session.cwd); + if (resolved) buffers.set(resolved, read.content); + } + + const applyPatchOps = (patchText: string): void => { + for (const op of parseApplyPatch(patchText)) { + const resolved = resolveAgainstCwd(op.path, session.cwd); + if (!resolved) continue; + if (isLintablePath(resolved)) touchedLintable.add(resolved); + if (op.type === "add") { + const lines = op.addedLines ?? []; + buffers.set(resolved, lines.length > 0 ? `${lines.join("\n")}\n` : ""); + } else if (op.type === "delete") { + buffers.set(resolved, null); + } else { + const base = buffers.get(resolved); + if (typeof base !== "string") continue; + const applied = applyUpdateHunks(base, op.hunkLines ?? []); + if (applied === null) continue; + const movedTo = op.movePath && resolveAgainstCwd(op.movePath, session.cwd); + if (movedTo) { + buffers.set(resolved, null); + buffers.set(movedTo, applied); + if (isLintablePath(movedTo)) touchedLintable.add(movedTo); + } else { + buffers.set(resolved, applied); + } + } + } + }; + + for (const edit of session.edits) { + if (edit.kind === "patch") { + applyPatchOps(edit.patch ?? ""); + continue; + } + const resolved = resolveAgainstCwd(edit.path, session.cwd); + if (!resolved) continue; + if (isLintablePath(resolved)) touchedLintable.add(resolved); + if (edit.kind === "write") { + buffers.set(resolved, edit.content ?? edit.resultContent ?? ""); + } else if (edit.kind === "delete") { + buffers.set(resolved, null); + } else { + const base = buffers.get(resolved); + if (typeof base !== "string") continue; + buffers.set( + resolved, + applyStringReplace( + base, + edit.oldString ?? "", + edit.newString ?? "", + edit.replaceAll ?? false, + ), + ); + } + } + + const files: ReconstructedContent[] = []; + const unreconstructable: string[] = []; + for (const absolutePath of touchedLintable) { + const content = buffers.get(absolutePath); + if (typeof content === "string") { + files.push({ absolutePath, content }); + } else if (content === undefined) { + // Edited but never had a faithful base (e.g. a replace on unread content, + // or a Codex shell edit we couldn't capture). Deleted files (null) are + // intentional removals, not coverage gaps. + unreconstructable.push(absolutePath); + } + } + + return { session, files, unreconstructable }; +}; diff --git a/packages/react-doctor/src/stats/render-stats.ts b/packages/react-doctor/src/stats/render-stats.ts new file mode 100644 index 000000000..b393a1b5a --- /dev/null +++ b/packages/react-doctor/src/stats/render-stats.ts @@ -0,0 +1,140 @@ +import { highlighter } from "@react-doctor/core"; +import type { GroupStats, StatsReport } from "./types.js"; + +const SCORE_BAR_WIDTH = 16; + +const colorForScore = (score: number): ((text: string) => string) => { + if (score >= 80) return highlighter.success; + if (score >= 50) return highlighter.warn; + return highlighter.error; +}; + +const colorForProvider = (provider: string): ((text: string) => string) => { + if (provider === "cursor") return highlighter.gray; + if (provider === "claude") return highlighter.orange; + if (provider === "codex") return highlighter.info; + return highlighter.dim; +}; + +const renderScore = (group: GroupStats): string => { + if (group.weightedScore === null) return highlighter.dim("n/a"); + const filledCount = Math.max( + 0, + Math.min(SCORE_BAR_WIDTH, Math.round((group.weightedScore / 100) * SCORE_BAR_WIDTH)), + ); + const paint = colorForScore(group.weightedScore); + const bar = + paint("█".repeat(filledCount)) + highlighter.dim("░".repeat(SCORE_BAR_WIDTH - filledCount)); + return `${bar} ${paint(String(group.weightedScore).padStart(3))}`; +}; + +const modelLabel = (group: GroupStats): string => { + const slash = group.key.indexOf("/"); + return slash === -1 ? group.key : group.key.slice(slash + 1); +}; + +const ANSI_PATTERN = new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, "g"); + +const stripAnsi = (text: string): string => text.replace(ANSI_PATTERN, ""); + +const renderTable = (headers: string[], rows: string[][]): string => { + const widths = headers.map((header, columnIndex) => + Math.max(header.length, ...rows.map((row) => stripAnsi(row[columnIndex] ?? "").length)), + ); + const pad = (cell: string, columnIndex: number): string => { + const visibleLength = stripAnsi(cell).length; + return cell + " ".repeat(Math.max(0, widths[columnIndex] - visibleLength)); + }; + const headerLine = headers.map((header, index) => highlighter.dim(pad(header, index))).join(" "); + const bodyLines = rows.map((row) => row.map((cell, index) => pad(cell, index)).join(" ")); + return [headerLine, ...bodyLines].join("\n"); +}; + +const renderModelTable = (models: ReadonlyArray): string => { + const rows = models.map((group, index) => [ + String(index + 1), + highlighter.bold(modelLabel(group)), + colorForProvider(group.provider)(group.provider), + String(group.filesScanned), + renderScore(group), + ]); + return renderTable(["#", "Model", "Tool", "Files", "Score"], rows); +}; + +const renderProviderTable = (providers: ReadonlyArray): string => { + const rows = providers.map((group) => [ + highlighter.bold(colorForProvider(group.provider)(group.provider)), + String(group.filesScanned), + renderScore(group), + ]); + return renderTable(["Tool", "Files", "Score"], rows); +}; + +const calloutScore = (group: GroupStats): string => + group.weightedScore !== null ? ` (${group.weightedScore})` : ""; + +const renderCallout = (report: StatsReport): string => { + if (!report.best) return ""; + const lines: string[] = []; + lines.push( + `${highlighter.success("Best")}: ${highlighter.bold( + modelLabel(report.best), + )}${calloutScore(report.best)}`, + ); + if (report.worst && report.worst.key !== report.best.key) { + lines.push( + `${highlighter.error("Worst")}: ${highlighter.bold( + modelLabel(report.worst), + )}${calloutScore(report.worst)}`, + ); + } + return lines.join("\n"); +}; + +/** Render the leaderboard to a string for the terminal. */ +export const renderStatsReport = (report: StatsReport): string => { + const scopePhrase = report.scope === "global" ? "across all your projects" : "in this project"; + const header = [ + highlighter.bold("React Doctor leaderboard"), + highlighter.dim( + `Which agent writes the cleanest React code ${scopePhrase}. Higher is better, 0 to 100.`, + ), + ].join("\n"); + + if (report.models.length === 0) { + return [ + header, + "", + highlighter.dim( + "Nothing to rank yet. The edits touched only non-React files, were too few, or could not be replayed.", + ), + ].join("\n"); + } + + const sections = [ + header, + "", + renderModelTable(report.models), + "", + highlighter.dim("By tool:"), + renderProviderTable(report.providers), + ]; + + const callout = renderCallout(report); + if (callout) { + sections.push("", callout); + } + + const notes: string[] = []; + if (report.sessionsNonReact > 0) { + notes.push(`Skipped ${report.sessionsNonReact} that changed only non-React files.`); + } + if (report.sessionsUnreconstructable > 0) { + notes.push(`Skipped ${report.sessionsUnreconstructable} that used edits we could not replay.`); + } + if (notes.length > 0) { + sections.push("", ...notes.map((note) => highlighter.dim(note))); + } + + return sections.join("\n"); +}; diff --git a/packages/react-doctor/src/stats/run-stats-scan.ts b/packages/react-doctor/src/stats/run-stats-scan.ts new file mode 100644 index 000000000..aa67fa25c --- /dev/null +++ b/packages/react-doctor/src/stats/run-stats-scan.ts @@ -0,0 +1,156 @@ +import * as path from "node:path"; +import { mapWithConcurrency, runEditorScan, type Diagnostic } from "@react-doctor/core"; +import { STATS_SCAN_CONCURRENCY } from "./constants.js"; +import { isReactSourceFile } from "./is-react-source.js"; +import { materializeReconstructedTree } from "./materialize-reconstructed-tree.js"; +import { reconstructSession } from "./reconstruct-files.js"; +import type { AgentSession, ReconstructedFile, SessionScanResult } from "./types.js"; + +const toPosix = (filePath: string): string => filePath.split(path.sep).join("/"); + +/** Longest shared directory of a set of absolute paths, or `null`. */ +const commonAncestorDirectory = (absolutePaths: ReadonlyArray): string | null => { + if (absolutePaths.length === 0) return null; + const splitPaths = absolutePaths.map((absolutePath) => + path.dirname(absolutePath).split(path.sep), + ); + let shared = splitPaths[0]; + for (const segments of splitPaths.slice(1)) { + let index = 0; + while (index < shared.length && index < segments.length && shared[index] === segments[index]) { + index += 1; + } + shared = shared.slice(0, index); + } + const joined = shared.join(path.sep); + return joined.length > 0 ? joined : null; +}; + +/** + * Map a diagnostic's path (relative to the temp dir, or absolute under it) back + * to the real absolute path it was reconstructed from. + */ +const remapDiagnosticPath = ( + filePath: string, + tempDirectory: string, + realTempDirectory: string, + scanRoot: string, +): string => { + const normalized = toPosix(filePath); + const absolute = path.isAbsolute(normalized) + ? normalized + : `${toPosix(tempDirectory)}/${normalized}`; + for (const prefix of [tempDirectory, realTempDirectory]) { + const prefixPosix = toPosix(prefix); + if (absolute === prefixPosix || absolute.startsWith(`${prefixPosix}/`)) { + return path.normalize(`${scanRoot}${absolute.slice(prefixPosix.length)}`); + } + } + return path.normalize(absolute); +}; + +/** + * Resolve the directory the session's reconstructed files should be linted + * under. Repo-scoped runs pin it to the repo root; global runs fall back to the + * session cwd, then the common ancestor of the edited files. + */ +const resolveScanRoot = ( + session: AgentSession, + fileAbsolutePaths: ReadonlyArray, + repoRoot: string | null, +): string | null => { + if (repoRoot) return repoRoot; + if (session.cwd) return session.cwd; + return commonAncestorDirectory(fileAbsolutePaths); +}; + +const scanSession = async ( + session: AgentSession, + repoRoot: string | null, +): Promise => { + const reconstruction = reconstructSession(session); + const empty: SessionScanResult = { + session, + diagnostics: [], + filesScanned: 0, + reconstructedFiles: reconstruction.files.length, + unreconstructable: reconstruction.unreconstructable.length, + }; + // React Doctor only scores React code; ranking a model on the plain + // backend/util/config files it also wrote would dilute its diagnostics-per- + // file and skew the leaderboard toward whoever wrote the most non-React code. + const reactFiles = reconstruction.files.filter((file) => + isReactSourceFile(file.absolutePath, file.content), + ); + if (reactFiles.length === 0) return empty; + + const scanRoot = resolveScanRoot( + session, + reactFiles.map((file) => file.absolutePath), + repoRoot, + ); + if (!scanRoot) return empty; + + const files: ReconstructedFile[] = []; + for (const file of reactFiles) { + const relative = toPosix(path.relative(scanRoot, file.absolutePath)); + if (!relative || relative.startsWith("..") || path.isAbsolute(relative)) continue; + files.push({ ...file, relativePath: relative }); + } + if (files.length === 0) return empty; + + const tree = materializeReconstructedTree(scanRoot, files); + try { + const result = await runEditorScan({ + directory: tree.tempDirectory, + includePaths: tree.relativePaths, + lint: true, + runDeadCode: false, + // The node running the CLI can load oxlint's native binding. + nodeBinaryPath: process.execPath, + }); + const diagnostics: Diagnostic[] = result.diagnostics.map((diagnostic) => ({ + ...diagnostic, + filePath: remapDiagnosticPath( + diagnostic.filePath, + tree.tempDirectory, + tree.realTempDirectory, + scanRoot, + ), + })); + return { + session, + diagnostics, + filesScanned: tree.relativePaths.length, + reconstructedFiles: reconstruction.files.length, + unreconstructable: reconstruction.unreconstructable.length, + }; + } finally { + tree.cleanup(); + } +}; + +export interface RunStatsScanOptions { + /** Reports `(completedCount, totalCount)` as each session finishes. */ + readonly onProgress?: (completedCount: number, totalCount: number) => void; +} + +/** + * Reconstruct and lint every session with bounded concurrency. `repoRoot` pins + * the scan root for repo-scoped runs; pass `null` for global runs (per-session + * root inferred from cwd / edited files). Each session that yields content + * spawns one oxlint subprocess, so progress is reported per session. + */ +export const runStatsScan = ( + sessions: ReadonlyArray, + repoRoot: string | null, + options: RunStatsScanOptions = {}, +): Promise => { + let completedCount = 0; + return mapWithConcurrency(sessions, STATS_SCAN_CONCURRENCY, async (session) => { + const result = await scanSession(session, repoRoot); + completedCount += 1; + options.onProgress?.(completedCount, sessions.length); + return result; + }); +}; diff --git a/packages/react-doctor/src/stats/sources/claude.ts b/packages/react-doctor/src/stats/sources/claude.ts new file mode 100644 index 000000000..29be38e49 --- /dev/null +++ b/packages/react-doctor/src/stats/sources/claude.ts @@ -0,0 +1,150 @@ +import * as os from "node:os"; +import * as path from "node:path"; +import { fileSessionCandidates, findJsonlFiles, readJsonlEntries } from "../walk-transcripts.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import type { AgentSession, FileEdit, FileRead, SourceDef } from "./index.js"; + +const asString = (value: unknown): string | undefined => + typeof value === "string" && value.length > 0 ? value : undefined; + +const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; + +const asArray = (value: unknown): unknown[] => (Array.isArray(value) ? value : []); + +const EDIT_TOOL_NAMES = new Set(["Write", "Edit", "MultiEdit"]); + +const editsFromToolUse = (name: string, input: Record): FileEdit[] => { + const filePath = asString(input.file_path); + if (!filePath) return []; + if (name === "Write") { + return [{ kind: "write", path: filePath, content: asString(input.content) ?? "" }]; + } + if (name === "Edit") { + return [ + { + kind: "replace", + path: filePath, + oldString: asString(input.old_string) ?? "", + newString: asString(input.new_string) ?? "", + replaceAll: input.replace_all === true, + }, + ]; + } + // MultiEdit: a sequence of replacements applied in order. + return asArray(input.edits).flatMap((rawEdit) => { + const edit = asRecord(rawEdit); + if (!edit) return []; + return [ + { + kind: "replace" as const, + path: filePath, + oldString: asString(edit.old_string) ?? "", + newString: asString(edit.new_string) ?? "", + replaceAll: edit.replace_all === true, + }, + ]; + }); +}; + +export const parseClaudeSession = (transcriptPath: string): AgentSession | null => { + const edits: FileEdit[] = []; + const reads: FileRead[] = []; + const modelCounts = new Map(); + let cwd: string | null = null; + let startedAt: string | undefined; + let endedAt: string | undefined; + let sawAnything = false; + + readJsonlEntries(transcriptPath, (entry) => { + sawAnything = true; + const timestamp = asString(entry.timestamp); + if (timestamp) { + if (!startedAt || timestamp < startedAt) startedAt = timestamp; + if (!endedAt || timestamp > endedAt) endedAt = timestamp; + } + if (!cwd) cwd = asString(entry.cwd) ?? null; + + // Post-edit / read snapshots ride a top-level `toolUseResult` on the + // following user/tool line — the most faithful reconstruction source. + const toolResult = asRecord(entry.toolUseResult); + if (toolResult) { + const resultFilePath = asString(toolResult.filePath); + if (resultFilePath && typeof toolResult.content === "string") { + edits.push({ kind: "write", path: resultFilePath, resultContent: toolResult.content }); + } + const readFile = asRecord(toolResult.file); + const readPath = readFile && asString(readFile.filePath); + if (readFile && readPath && typeof readFile.content === "string") { + reads.push({ path: readPath, content: readFile.content }); + } + } + + if (entry.type !== "assistant") return; + const message = asRecord(entry.message); + if (!message) return; + const model = asString(message.model); + if (model && model !== "") { + modelCounts.set(model, (modelCounts.get(model) ?? 0) + 1); + } + for (const rawBlock of asArray(message.content)) { + const block = asRecord(rawBlock); + if (!block || block.type !== "tool_use") continue; + const name = asString(block.name); + const input = asRecord(block.input); + if (!name || !input || !EDIT_TOOL_NAMES.has(name)) continue; + edits.push(...editsFromToolUse(name, input)); + } + }); + + if (!sawAnything) return null; + + let model = STATS_UNKNOWN_MODEL; + let bestCount = 0; + for (const [candidate, count] of modelCounts) { + if (count > bestCount) { + model = candidate; + bestCount = count; + } + } + + return { + provider: "claude", + sessionId: path.basename(transcriptPath, ".jsonl"), + transcriptPath, + model, + cwd, + startedAt, + endedAt, + edits, + reads, + }; +}; + +const claudeRoots = (): string[] => { + const fromEnv = process.env.CLAUDE_CONFIG_DIR; + const configDirs = fromEnv + ? fromEnv + .split(",") + .map((value) => value.trim()) + .filter(Boolean) + : [ + path.join(process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), ".config"), "claude"), + path.join(os.homedir(), ".claude"), + ]; + return configDirs.map((dir) => path.join(dir, "projects")); +}; + +export const claudeSource: SourceDef = { + name: "claude", + candidates() { + return fileSessionCandidates( + "claude", + claudeRoots(), + (root) => findJsonlFiles(root, 3), + parseClaudeSession, + ); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/codex.ts b/packages/react-doctor/src/stats/sources/codex.ts new file mode 100644 index 000000000..633f9ea47 --- /dev/null +++ b/packages/react-doctor/src/stats/sources/codex.ts @@ -0,0 +1,85 @@ +import * as os from "node:os"; +import * as path from "node:path"; +import { fileSessionCandidates, findJsonlFiles, readJsonlEntries } from "../walk-transcripts.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import type { AgentSession, FileEdit, SourceDef } from "./index.js"; + +const asString = (value: unknown): string | undefined => + typeof value === "string" && value.length > 0 ? value : undefined; + +const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; + +// Codex reconstructs only `apply_patch` (`custom_tool_call`) edits — `shell` +// function calls (sed, heredoc redirects, …) are not faithfully reconstructable +// and are skipped. Model comes from `turn_context`, cwd from `session_meta`. +export const parseCodexSession = (transcriptPath: string): AgentSession | null => { + const edits: FileEdit[] = []; + const modelCounts = new Map(); + let cwd: string | null = null; + let sawAnything = false; + + readJsonlEntries(transcriptPath, (entry) => { + sawAnything = true; + const payload = asRecord(entry.payload); + if (!payload) return; + + if (entry.type === "session_meta" && !cwd) { + cwd = asString(payload.cwd) ?? null; + } + if (entry.type === "turn_context") { + if (!cwd) cwd = asString(payload.cwd) ?? null; + const model = asString(payload.model); + if (model) modelCounts.set(model, (modelCounts.get(model) ?? 0) + 1); + } + + if ( + payload.type === "custom_tool_call" && + payload.name === "apply_patch" && + typeof payload.input === "string" + ) { + edits.push({ kind: "patch", path: "", patch: payload.input }); + } + }); + + if (!sawAnything) return null; + + let model = STATS_UNKNOWN_MODEL; + let bestCount = 0; + for (const [candidate, count] of modelCounts) { + if (count > bestCount) { + model = candidate; + bestCount = count; + } + } + + return { + provider: "codex", + sessionId: path.basename(transcriptPath, ".jsonl"), + transcriptPath, + model, + cwd, + edits, + reads: [], + }; +}; + +const codexRoots = (): string[] => { + const home = process.env.CODEX_HOME ?? path.join(os.homedir(), ".codex"); + return [path.join(home, "sessions"), path.join(home, "archived_sessions")]; +}; + +export const codexSource: SourceDef = { + name: "codex", + candidates() { + // sessions/YYYY/MM/DD/rollout-*.jsonl → 4 levels. + return fileSessionCandidates( + "codex", + codexRoots(), + (root) => findJsonlFiles(root, 5), + parseCodexSession, + ); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/cursor.ts b/packages/react-doctor/src/stats/sources/cursor.ts new file mode 100644 index 000000000..b1efa7645 --- /dev/null +++ b/packages/react-doctor/src/stats/sources/cursor.ts @@ -0,0 +1,134 @@ +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import { openCursorDb, resolveCursorDbPath, type CursorDbHandle } from "../cursor-db.js"; +import { isLintablePath } from "../reconstruct-files.js"; +import type { AgentSession, FileEdit, SessionCandidate, SourceDef } from "./index.js"; + +const asString = (value: unknown): string | undefined => + typeof value === "string" && value.length > 0 ? value : undefined; + +const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; + +const parseJson = (raw: string | null | undefined): unknown => { + if (typeof raw !== "string") return undefined; + try { + return JSON.parse(raw); + } catch { + return undefined; + } +}; + +// The composer's selected model, ignoring the "Auto" sentinel which carries no +// concrete model id. +const composerModelName = (composer: Record | undefined): string | undefined => { + const modelConfig = composer && asRecord(composer.modelConfig); + const modelName = modelConfig && asString(modelConfig.modelName); + return modelName && modelName !== "default" ? modelName : undefined; +}; + +// One Cursor tool call. `edit_file_v2` records the full post-edit file behind a +// content id (`result.afterContentId`), giving exact reconstruction; the inline +// `streamingContent` is the fallback when that blob is gone. `delete_file` +// removes a path. Other tools (read, search, terminal) are ignored. +const editFromToolCall = ( + toolData: Record, + db: CursorDbHandle, +): FileEdit | null => { + if (toolData.status !== "completed") return null; + const name = asString(toolData.name); + if (!name) return null; + const params = asRecord(parseJson(asString(toolData.params))); + const filePath = params && asString(params.relativeWorkspacePath); + if (!filePath || !isLintablePath(filePath)) return null; + + if (name === "delete_file") { + return { kind: "delete", path: filePath }; + } + if (name !== "edit_file_v2") return null; + + const result = asRecord(parseJson(asString(toolData.result))); + const afterContentId = result && asString(result.afterContentId); + const content = afterContentId ? db.contentValue(afterContentId) : null; + const resultContent = content ?? asString(params?.streamingContent); + if (resultContent === undefined || resultContent === null) return null; + return { kind: "write", path: filePath, resultContent }; +}; + +// A composer can switch models mid-chat; when the conversation-level selection +// is "Auto", fall back to the model most bubbles were generated with. +const bubbleModelName = (bubble: Record): string | undefined => { + const modelInfo = asRecord(bubble.modelInfo); + const modelName = modelInfo && asString(modelInfo.modelName); + return modelName && modelName !== "default" ? modelName : undefined; +}; + +interface OrderedEdit { + readonly createdAt: number; + readonly edit: FileEdit; +} + +const buildCursorSession = (db: CursorDbHandle, composerId: string): AgentSession | null => { + const composer = asRecord(parseJson(db.composerValue(composerId))); + const orderedEdits: OrderedEdit[] = []; + const bubbleModelCounts = new Map(); + + for (const rawBubble of db.bubbleValues(composerId)) { + const bubble = asRecord(parseJson(rawBubble)); + if (!bubble) continue; + const model = bubbleModelName(bubble); + if (model) bubbleModelCounts.set(model, (bubbleModelCounts.get(model) ?? 0) + 1); + const toolData = asRecord(bubble.toolFormerData); + if (!toolData) continue; + const edit = editFromToolCall(toolData, db); + if (edit) { + const createdAt = typeof bubble.createdAt === "number" ? bubble.createdAt : 0; + orderedEdits.push({ createdAt, edit }); + } + } + + // Apply edits in chronological order so the last write to a file wins. + orderedEdits.sort((left, right) => left.createdAt - right.createdAt); + + let mostCommonBubbleModel: string | undefined; + let bestCount = 0; + for (const [candidate, count] of bubbleModelCounts) { + if (count > bestCount) { + mostCommonBubbleModel = candidate; + bestCount = count; + } + } + + return { + provider: "cursor", + sessionId: composerId, + transcriptPath: `cursor-composer:${composerId}`, + model: composerModelName(composer) ?? mostCommonBubbleModel ?? STATS_UNKNOWN_MODEL, + cwd: null, + edits: orderedEdits.map((entry) => entry.edit), + reads: [], + }; +}; + +/** + * Enumerate every composer in the database as a lazy candidate. The header + * index is cheap to read; the per-composer bubble/content walk only runs when a + * candidate survives scope/`--since`/`--limit` filtering and `load()` is called. + */ +export const cursorComposerCandidates = (dbPath: string | null): SessionCandidate[] => { + const db = openCursorDb(dbPath); + if (!db) return []; + return db.composerHeaders().map((header) => ({ + provider: "cursor" as const, + modifiedMs: header.modifiedMs, + load: () => buildCursorSession(db, header.composerId), + })); +}; + +export const cursorSource: SourceDef = { + name: "cursor", + candidates() { + return cursorComposerCandidates(resolveCursorDbPath()); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/index.ts b/packages/react-doctor/src/stats/sources/index.ts new file mode 100644 index 000000000..bc1c69d2d --- /dev/null +++ b/packages/react-doctor/src/stats/sources/index.ts @@ -0,0 +1,26 @@ +import type { SessionCandidate, StatsProvider } from "../types.js"; +import { claudeSource } from "./claude.js"; +import { codexSource } from "./codex.js"; +import { cursorSource } from "./cursor.js"; + +export type { + AgentSession, + FileEdit, + FileRead, + SessionCandidate, + StatsProvider, +} from "../types.js"; + +/** + * A per-provider session source. Each source enumerates its sessions as cheap, + * lazily-loadable `SessionCandidate`s — transcript files for Claude/Codex, rows + * from the Cursor composer database for Cursor — so the rest of the pipeline is + * provider-agnostic. + */ +export interface SourceDef { + readonly name: StatsProvider; + /** Enumerate every candidate session for this provider (cheap; no parsing). */ + candidates(): SessionCandidate[]; +} + +export const STATS_SOURCES: ReadonlyArray = [claudeSource, codexSource, cursorSource]; diff --git a/packages/react-doctor/src/stats/types.ts b/packages/react-doctor/src/stats/types.ts new file mode 100644 index 000000000..d6480d2ac --- /dev/null +++ b/packages/react-doctor/src/stats/types.ts @@ -0,0 +1,137 @@ +import type { Diagnostic } from "@react-doctor/core"; + +export type StatsProvider = "claude" | "codex" | "cursor"; + +export type FileEditKind = "write" | "replace" | "patch" | "delete"; + +/** + * One edit operation an agent performed on a file, normalized across + * providers. `replace` carries `oldString`/`newString`; `patch` carries a raw + * apply-patch envelope; `write` carries full `content`. `resultContent` is the + * post-edit full file content when the transcript records it directly (Claude + * tool results), which short-circuits replay reconstruction. + */ +export interface FileEdit { + readonly kind: FileEditKind; + readonly path: string; + readonly content?: string; + readonly oldString?: string; + readonly newString?: string; + readonly replaceAll?: boolean; + readonly patch?: string; + readonly resultContent?: string; +} + +/** A file the agent read, captured as a reconstruction base for replay. */ +export interface FileRead { + readonly path: string; + readonly content: string; +} + +/** A single agent run (one model), normalized from one transcript. */ +export interface AgentSession { + readonly provider: StatsProvider; + readonly sessionId: string; + readonly transcriptPath: string; + readonly model: string; + readonly cwd: string | null; + readonly startedAt?: string; + readonly endedAt?: string; + readonly edits: FileEdit[]; + readonly reads: FileRead[]; +} + +/** + * A discovered-but-not-yet-parsed session. Sources enumerate these cheaply so + * scope/`--since`/`--limit` can be applied before the expensive `load()` runs + * (a file read for transcript sources, a DB walk for the Cursor composer + * source). `modifiedMs` is the sort + `--since` key (0 when unknown). + */ +export interface SessionCandidate { + readonly provider: StatsProvider; + readonly modifiedMs: number; + load(): AgentSession | null; +} + +/** A faithfully reconstructed file as the model left it at session end. */ +export interface ReconstructedContent { + /** Absolute path the agent wrote to (used for attribution + display). */ + readonly absolutePath: string; + readonly content: string; +} + +/** A reconstructed file placed under a scan root, ready to materialize + lint. */ +export interface ReconstructedFile extends ReconstructedContent { + /** Path relative to the scan root, forward-slashed (temp-dir layout). */ + readonly relativePath: string; +} + +export interface SessionReconstruction { + readonly session: AgentSession; + readonly files: ReconstructedContent[]; + /** Paths touched but not faithfully reconstructable (e.g. Codex shell edits). */ + readonly unreconstructable: string[]; +} + +export interface SessionScanResult { + readonly session: AgentSession; + readonly diagnostics: Diagnostic[]; + /** React files actually linted (the score's denominator for this session). */ + readonly filesScanned: number; + /** + * Lintable files faithfully reconstructed before the React filter. When this + * is positive but `filesScanned` is 0, the session was skipped only because + * none of its files were React — not because reconstruction failed. + */ + readonly reconstructedFiles: number; + /** Files edited without a faithful base (a genuine reconstruction gap). */ + readonly unreconstructable: number; +} + +/** Aggregate stats for one leaderboard row (a model or a provider). */ +export interface GroupStats { + readonly key: string; + readonly provider: StatsProvider | "mixed"; + readonly sessions: number; + readonly filesScanned: number; + readonly unreconstructable: number; + readonly totalDiagnostics: number; + readonly errorCount: number; + readonly warningCount: number; + readonly diagnosticsPerFile: number; + /** Raw 0-100 React Doctor score for this group's code (null if undersampled). */ + readonly score: number | null; + readonly scoreLabel: string | null; + /** + * Confidence-weighted score: the raw score regressed toward the global mean by + * the group's evidence (files discounted by sessions). This is what the + * leaderboard ranks on, so small samples can't dominate. + */ + readonly weightedScore: number | null; + readonly topRules: ReadonlyArray<{ readonly rule: string; readonly count: number }>; +} + +export interface StatsReport { + readonly scope: "repo" | "global"; + readonly directory: string; + readonly models: GroupStats[]; + readonly providers: GroupStats[]; + readonly best: GroupStats | null; + readonly worst: GroupStats | null; + /** Sessions with edits that were reconstructed and considered. */ + readonly sessionsAnalyzed: number; + /** Sessions that contributed at least one React file to the ranking. */ + readonly sessionsRanked: number; + /** Sessions reconstructed successfully but whose files were all non-React. */ + readonly sessionsNonReact: number; + /** Sessions whose edits could not be faithfully reconstructed. */ + readonly sessionsUnreconstructable: number; + readonly generatedAt: string; +} + +export interface StatsScopeOptions { + readonly global: boolean; + readonly since?: Date; + readonly limit: number; + readonly provider?: StatsProvider; +} diff --git a/packages/react-doctor/src/stats/walk-transcripts.ts b/packages/react-doctor/src/stats/walk-transcripts.ts new file mode 100644 index 000000000..809b514c6 --- /dev/null +++ b/packages/react-doctor/src/stats/walk-transcripts.ts @@ -0,0 +1,99 @@ +import * as fs from "node:fs"; +import * as path from "node:path"; +import type { AgentSession, SessionCandidate, StatsProvider } from "./types.js"; + +/** File modification time in ms, or 0 when the file is missing/unreadable. */ +export const statMtimeMs = (filePath: string): number => { + try { + return fs.statSync(filePath).mtimeMs; + } catch { + return 0; + } +}; + +/** + * Turn a transcript-file-based provider into lazy `SessionCandidate`s: one per + * `.jsonl` file under its roots, each parsed only when `load()` is called. The + * file's mtime is the sort + `--since` key. + */ +export const fileSessionCandidates = ( + provider: StatsProvider, + roots: ReadonlyArray, + discover: (root: string) => string[], + parse: (transcriptPath: string) => AgentSession | null, +): SessionCandidate[] => { + const candidates: SessionCandidate[] = []; + for (const root of roots) { + for (const transcriptPath of discover(root)) { + candidates.push({ + provider, + modifiedMs: statMtimeMs(transcriptPath), + load: () => parse(transcriptPath), + }); + } + } + return candidates; +}; + +/** + * Recursively collect `.jsonl` transcript files under `root` up to `maxDepth` + * directory levels deep. Returns absolute paths sorted newest-first by mtime so + * a `--limit` keeps the most recent sessions. Missing roots yield `[]`. + */ +export const findJsonlFiles = (root: string, maxDepth: number): string[] => { + const found: Array<{ filePath: string; modifiedMs: number }> = []; + + const walk = (directory: string, depth: number): void => { + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(directory, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const entryPath = path.join(directory, entry.name); + if (entry.isDirectory()) { + if (depth < maxDepth) walk(entryPath, depth + 1); + } else if (entry.isFile() && entry.name.endsWith(".jsonl")) { + let modifiedMs = 0; + try { + modifiedMs = fs.statSync(entryPath).mtimeMs; + } catch { + modifiedMs = 0; + } + found.push({ filePath: entryPath, modifiedMs }); + } + } + }; + + walk(root, 0); + found.sort((left, right) => right.modifiedMs - left.modifiedMs); + return found.map((entry) => entry.filePath); +}; + +/** + * Parse each non-empty line of a JSONL file, invoking `onEntry` with the decoded + * object. Unparseable lines and unreadable files are skipped silently so one + * corrupt transcript never sinks a whole run. + */ +export const readJsonlEntries = ( + filePath: string, + onEntry: (entry: Record) => void, +): void => { + let raw: string; + try { + raw = fs.readFileSync(filePath, "utf8"); + } catch { + return; + } + for (const line of raw.split("\n")) { + if (!line.trim()) continue; + let entry: unknown; + try { + entry = JSON.parse(line); + } catch { + continue; + } + if (entry && typeof entry === "object") onEntry(entry as Record); + } +}; diff --git a/packages/react-doctor/tests/stats-adapters.test.ts b/packages/react-doctor/tests/stats-adapters.test.ts new file mode 100644 index 000000000..705a855c9 --- /dev/null +++ b/packages/react-doctor/tests/stats-adapters.test.ts @@ -0,0 +1,216 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { DatabaseSync } from "node:sqlite"; +import { afterAll, describe, expect, it } from "vite-plus/test"; +import { closeCursorDb } from "../src/stats/cursor-db.js"; +import { parseClaudeSession } from "../src/stats/sources/claude.js"; +import { parseCodexSession } from "../src/stats/sources/codex.js"; +import { cursorComposerCandidates } from "../src/stats/sources/cursor.js"; + +const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "stats-adapters-")); + +const writeTranscript = (name: string, lines: unknown[]): string => { + const filePath = path.join(tempDir, name); + fs.writeFileSync(filePath, lines.map((line) => JSON.stringify(line)).join("\n")); + return filePath; +}; + +afterAll(() => { + closeCursorDb(); + fs.rmSync(tempDir, { recursive: true, force: true }); +}); + +describe("parseClaudeSession", () => { + it("extracts model, cwd, edits, and post-edit result content", () => { + const filePath = writeTranscript("claude.jsonl", [ + { + type: "assistant", + cwd: "/repo", + timestamp: "2026-06-20T00:00:00Z", + message: { + model: "claude-x", + content: [ + { + type: "tool_use", + name: "Write", + id: "t1", + input: { file_path: "/repo/src/a.ts", content: "export const a=1;" }, + }, + ], + }, + }, + { + type: "user", + toolUseResult: { filePath: "/repo/src/a.ts", content: "export const a = 1;\n" }, + }, + ]); + const session = parseClaudeSession(filePath); + expect(session?.model).toBe("claude-x"); + expect(session?.cwd).toBe("/repo"); + expect(session?.edits.some((edit) => edit.resultContent === "export const a = 1;\n")).toBe( + true, + ); + }); +}); + +describe("parseCodexSession", () => { + it("extracts model from turn_context, cwd from session_meta, and apply_patch edits", () => { + const filePath = writeTranscript("codex.jsonl", [ + { type: "session_meta", payload: { cwd: "/repo" } }, + { type: "turn_context", payload: { model: "gpt-5.5" } }, + { + type: "response_item", + payload: { + type: "custom_tool_call", + name: "apply_patch", + input: "*** Begin Patch\n*** Add File: /repo/d.ts\n+x\n*** End Patch", + }, + }, + ]); + const session = parseCodexSession(filePath); + expect(session?.model).toBe("gpt-5.5"); + expect(session?.cwd).toBe("/repo"); + expect(session?.edits).toHaveLength(1); + expect(session?.edits[0].kind).toBe("patch"); + }); +}); + +interface ComposerFixture { + readonly composerId: string; + readonly modelName: string | null; + readonly bubbles: ReadonlyArray>; + readonly content?: Record; +} + +const writeComposerDb = (name: string, composers: ReadonlyArray): string => { + const dbPath = path.join(tempDir, name); + const database = new DatabaseSync(dbPath); + database.exec("CREATE TABLE ItemTable (key TEXT PRIMARY KEY, value TEXT)"); + database.exec("CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value TEXT)"); + + const headers = composers.map((composer, index) => ({ + composerId: composer.composerId, + lastUpdatedAt: 1_000 + index, + })); + const insertItem = database.prepare("INSERT INTO ItemTable (key, value) VALUES (?, ?)"); + insertItem.run("composer.composerHeaders", JSON.stringify({ allComposers: headers })); + + const insertKv = database.prepare("INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)"); + for (const composer of composers) { + insertKv.run( + `composerData:${composer.composerId}`, + JSON.stringify(composer.modelName ? { modelConfig: { modelName: composer.modelName } } : {}), + ); + composer.bubbles.forEach((bubble, index) => { + insertKv.run(`bubbleId:${composer.composerId}:b${index}`, JSON.stringify(bubble)); + }); + for (const [contentId, body] of Object.entries(composer.content ?? {})) { + insertKv.run(contentId, body); + } + } + database.close(); + return dbPath; +}; + +describe("cursorComposerCandidates", () => { + it("attributes the composer model and reconstructs exact content via afterContentId", () => { + closeCursorDb(); + const dbPath = writeComposerDb("cursor-model.vscdb", [ + { + composerId: "comp-1", + modelName: "claude-opus-4-8", + content: { "composer.content.hash1": "export const x = 1;\n" }, + bubbles: [ + { + createdAt: 10, + toolFormerData: { + name: "edit_file_v2", + status: "completed", + params: JSON.stringify({ + relativeWorkspacePath: "/repo/b.ts", + streamingContent: "export const x=1;", + }), + result: JSON.stringify({ afterContentId: "composer.content.hash1" }), + }, + }, + { + createdAt: 20, + toolFormerData: { + name: "delete_file", + status: "completed", + params: JSON.stringify({ relativeWorkspacePath: "/repo/old.ts" }), + }, + }, + ], + }, + ]); + + const candidates = cursorComposerCandidates(dbPath); + expect(candidates).toHaveLength(1); + const session = candidates[0].load(); + expect(session?.provider).toBe("cursor"); + expect(session?.model).toBe("claude-opus-4-8"); + expect(session?.edits).toHaveLength(2); + const write = session?.edits.find((edit) => edit.kind === "write"); + expect(write?.path).toBe("/repo/b.ts"); + expect(write?.resultContent).toBe("export const x = 1;\n"); + expect( + session?.edits.some((edit) => edit.kind === "delete" && edit.path === "/repo/old.ts"), + ).toBe(true); + }); + + it("falls back to the dominant bubble model when the composer is on Auto", () => { + closeCursorDb(); + const dbPath = writeComposerDb("cursor-auto.vscdb", [ + { + composerId: "comp-2", + modelName: null, + content: { "composer.content.hash2": "export const y = 2;\n" }, + bubbles: [ + { modelInfo: { modelName: "gpt-5.5" } }, + { + createdAt: 5, + modelInfo: { modelName: "gpt-5.5" }, + toolFormerData: { + name: "edit_file_v2", + status: "completed", + params: JSON.stringify({ relativeWorkspacePath: "/repo/c.ts" }), + result: JSON.stringify({ afterContentId: "composer.content.hash2" }), + }, + }, + ], + }, + ]); + + const session = cursorComposerCandidates(dbPath)[0]?.load(); + expect(session?.model).toBe("gpt-5.5"); + expect(session?.edits[0]?.resultContent).toBe("export const y = 2;\n"); + }); + + it("ignores non-lintable edits and skips when the database is absent", () => { + closeCursorDb(); + expect(cursorComposerCandidates(null)).toEqual([]); + + const dbPath = writeComposerDb("cursor-nonlintable.vscdb", [ + { + composerId: "comp-3", + modelName: "claude-opus-4-8", + content: { "composer.content.hash3": "# readme" }, + bubbles: [ + { + createdAt: 1, + toolFormerData: { + name: "edit_file_v2", + status: "completed", + params: JSON.stringify({ relativeWorkspacePath: "/repo/README.md" }), + result: JSON.stringify({ afterContentId: "composer.content.hash3" }), + }, + }, + ], + }, + ]); + const session = cursorComposerCandidates(dbPath)[0]?.load(); + expect(session?.edits).toEqual([]); + }); +}); diff --git a/packages/react-doctor/tests/stats-aggregate.test.ts b/packages/react-doctor/tests/stats-aggregate.test.ts new file mode 100644 index 000000000..a43b25eed --- /dev/null +++ b/packages/react-doctor/tests/stats-aggregate.test.ts @@ -0,0 +1,123 @@ +import { describe, expect, it } from "vite-plus/test"; +import type { Diagnostic } from "@react-doctor/core"; +import { aggregateStats, type ScoreComputer } from "../src/stats/aggregate-stats.js"; +import type { AgentSession, SessionScanResult, StatsProvider } from "../src/stats/types.js"; + +const diagnostic = (rule: string, severity: "error" | "warning" = "warning"): Diagnostic => ({ + filePath: "src/App.tsx", + plugin: "react-doctor", + rule, + severity, + message: "m", + help: "h", + line: 1, + column: 1, + category: "Correctness", +}); + +const result = ( + provider: StatsProvider, + model: string, + filesScanned: number, + diagnostics: Diagnostic[], +): SessionScanResult => { + const session: AgentSession = { + provider, + sessionId: `${provider}-${model}`, + transcriptPath: "/tmp/x.jsonl", + model, + cwd: "/repo", + edits: [], + reads: [], + }; + return { + session, + diagnostics, + filesScanned, + reconstructedFiles: filesScanned, + unreconstructable: 0, + }; +}; + +// Deterministic, offline score: cleaner code (fewer diagnostics) scores higher. +const stubScore: ScoreComputer = async (diagnostics) => ({ + score: Math.max(0, 100 - diagnostics.length * 5), + label: "stub", +}); + +describe("aggregateStats", () => { + it("ranks models best-first by score and surfaces best/worst", async () => { + const results = [ + result("claude", "m1", 4, [diagnostic("r1"), diagnostic("r1")]), + result( + "codex", + "m2", + 4, + Array.from({ length: 6 }, () => diagnostic("r2")), + ), + ]; + const aggregated = await aggregateStats(results, null, stubScore); + + expect(aggregated.models.map((group) => group.key)).toEqual(["claude/m1", "codex/m2"]); + expect(aggregated.best?.key).toBe("claude/m1"); + expect(aggregated.best?.score).toBe(90); + expect(aggregated.worst?.key).toBe("codex/m2"); + expect(aggregated.worst?.score).toBe(70); + }); + + it("computes diagnostics-per-file and top rules per group", async () => { + const results = [result("claude", "m1", 4, [diagnostic("r1"), diagnostic("r1")])]; + const aggregated = await aggregateStats(results, null, stubScore); + const group = aggregated.models[0]; + expect(group.totalDiagnostics).toBe(2); + expect(group.diagnosticsPerFile).toBe(0.5); + expect(group.topRules).toEqual([{ rule: "react-doctor/r1", count: 2 }]); + }); + + it("groups by provider and excludes under-sampled groups from the ranking", async () => { + const results = [ + result("claude", "m1", 4, [diagnostic("r1")]), + result("cursor", "unknown", 1, [diagnostic("r2")]), + ]; + const aggregated = await aggregateStats(results, null, stubScore); + // Cursor's single-file group is below the min-files threshold. + expect(aggregated.models.map((group) => group.key)).toEqual(["claude/m1"]); + expect(aggregated.providers.map((group) => group.provider)).toEqual(["claude"]); + }); + + it("weights the score by files and sessions so a tiny perfect sample can't top the board", async () => { + const results = [ + result("claude", "big", 10, [diagnostic("r1")]), + result("claude", "big", 10, []), + result("claude", "big", 10, []), + result("claude", "big", 10, []), + result("claude", "big", 10, []), + ...Array.from({ length: 5 }, () => + result("codex", "med", 10, [diagnostic("r2"), diagnostic("r2")]), + ), + result("cursor", "small", 3, []), + ]; + const aggregated = await aggregateStats(results, null, stubScore); + + // "small" has the best RAW score (100, zero diagnostics) but only 3 files + // from one session, so confidence weighting regresses it toward the mean and + // the well-sampled "big" group wins instead of the tiny perfect sample. + expect(aggregated.best?.key).toBe("claude/big"); + expect(aggregated.models[0]?.key).toBe("claude/big"); + const small = aggregated.models.find((group) => group.key === "cursor/small"); + expect(small?.score).toBe(100); + expect(small?.weightedScore).toBeLessThan(100); + expect(aggregated.models[0]?.weightedScore ?? 0).toBeGreaterThan(small?.weightedScore ?? 0); + }); + + it("leaves the score null when a group lacks enough files to rank fairly", async () => { + const results = [result("claude", "m1", 1, [diagnostic("r1")])]; + let called = false; + const aggregated = await aggregateStats(results, null, async () => { + called = true; + return { score: 0, label: "x" }; + }); + expect(called).toBe(false); + expect(aggregated.models).toEqual([]); + }); +}); diff --git a/packages/react-doctor/tests/stats-apply-patch.test.ts b/packages/react-doctor/tests/stats-apply-patch.test.ts new file mode 100644 index 000000000..fd178ddfb --- /dev/null +++ b/packages/react-doctor/tests/stats-apply-patch.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from "vite-plus/test"; +import { applyUpdateHunks, parseApplyPatch } from "../src/stats/parse-apply-patch.js"; + +describe("parseApplyPatch", () => { + it("parses Add, Update, and Delete ops from one envelope", () => { + const patch = [ + "*** Begin Patch", + "*** Add File: a.ts", + "+export const a = 1;", + "*** Update File: b.ts", + "@@", + " keep", + "-old", + "+new", + "*** Delete File: c.ts", + "*** End Patch", + ].join("\n"); + const ops = parseApplyPatch(patch); + expect(ops).toHaveLength(3); + expect(ops[0]).toEqual({ type: "add", path: "a.ts", addedLines: ["export const a = 1;"] }); + expect(ops[1].type).toBe("update"); + expect(ops[1].path).toBe("b.ts"); + expect(ops[2]).toEqual({ type: "delete", path: "c.ts" }); + }); + + it("captures a Move to directive on an update", () => { + const patch = [ + "*** Begin Patch", + "*** Update File: old.ts", + "*** Move to: new.ts", + "@@", + "+x", + "*** End Patch", + ].join("\n"); + const ops = parseApplyPatch(patch); + expect(ops[0].movePath).toBe("new.ts"); + }); + + it("returns nothing for a patch with no file headers", () => { + expect(parseApplyPatch("not a patch")).toEqual([]); + }); +}); + +describe("applyUpdateHunks", () => { + it("applies context / add / remove against a base via line search", () => { + const base = "line one\nline two\nline three\n"; + const result = applyUpdateHunks(base, [ + "@@", + " line one", + "-line two", + "+line 2", + " line three", + ]); + expect(result).toBe("line one\nline 2\nline three\n"); + }); + + it("returns null when a context line is not found in the base", () => { + expect(applyUpdateHunks("a\nb\n", ["@@", " missing", "+x"])).toBeNull(); + }); +}); diff --git a/packages/react-doctor/tests/stats-is-react-source.test.ts b/packages/react-doctor/tests/stats-is-react-source.test.ts new file mode 100644 index 000000000..ad63749da --- /dev/null +++ b/packages/react-doctor/tests/stats-is-react-source.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from "vite-plus/test"; +import { isReactSourceFile } from "../src/stats/is-react-source.js"; + +describe("isReactSourceFile", () => { + it("treats JSX extensions as React regardless of content", () => { + expect(isReactSourceFile("/repo/src/App.tsx", "export const App = () => null;")).toBe(true); + expect(isReactSourceFile("/repo/src/widget.jsx", "module.exports = {};")).toBe(true); + }); + + it("detects React via direct and ecosystem imports in .ts/.js files", () => { + expect(isReactSourceFile("/repo/src/useThing.ts", 'import { useState } from "react";')).toBe( + true, + ); + expect( + isReactSourceFile("/repo/src/data.ts", 'import { useQuery } from "@tanstack/react-query";'), + ).toBe(true); + expect( + isReactSourceFile( + "/repo/src/nav.ts", + 'import { useNavigation } from "@react-navigation/native";', + ), + ).toBe(true); + expect(isReactSourceFile("/repo/src/page.ts", 'const r = require("react-dom/server");')).toBe( + true, + ); + }); + + it("detects React Server Component / server-action directives", () => { + expect( + isReactSourceFile("/repo/src/actions.ts", '"use server";\nexport async function go() {}'), + ).toBe(true); + expect(isReactSourceFile("/repo/src/client.ts", "'use client'\nexport const x = 1;")).toBe( + true, + ); + }); + + it("rejects plain backend / util / config files", () => { + expect(isReactSourceFile("/repo/src/math.ts", "export const add = (a, b) => a + b;")).toBe( + false, + ); + expect( + isReactSourceFile( + "/repo/server/db.ts", + 'import { Pool } from "pg";\nexport const pool = new Pool();', + ), + ).toBe(false); + expect(isReactSourceFile("/repo/scripts/build.js", 'const fs = require("node:fs");')).toBe( + false, + ); + }); + + it("does not mistake unrelated specifiers containing other words for React", () => { + expect(isReactSourceFile("/repo/src/a.ts", 'import x from "reactor-core";')).toBe(false); + expect(isReactSourceFile("/repo/src/b.ts", 'import y from "overreact";')).toBe(false); + }); +}); diff --git a/packages/react-doctor/tests/stats-reconstruct.test.ts b/packages/react-doctor/tests/stats-reconstruct.test.ts new file mode 100644 index 000000000..d27472fa8 --- /dev/null +++ b/packages/react-doctor/tests/stats-reconstruct.test.ts @@ -0,0 +1,116 @@ +import { describe, expect, it } from "vite-plus/test"; +import { reconstructSession, resolveEditPaths } from "../src/stats/reconstruct-files.js"; +import type { AgentSession, FileEdit } from "../src/stats/types.js"; + +const session = (overrides: Partial): AgentSession => ({ + provider: "claude", + sessionId: "s1", + transcriptPath: "/tmp/s1.jsonl", + model: "test-model", + cwd: "/repo", + edits: [], + reads: [], + ...overrides, +}); + +const byPath = (files: ReadonlyArray<{ absolutePath: string; content: string }>) => + new Map(files.map((file) => [file.absolutePath, file.content])); + +describe("reconstructSession", () => { + it("uses Claude post-edit result content as the authoritative final state", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/a.ts", content: "export const a = 0;\n" }, + { kind: "write", path: "/repo/src/a.ts", resultContent: "export const a = 1;\n" }, + ]; + const result = reconstructSession(session({ edits })); + expect(byPath(result.files).get("/repo/src/a.ts")).toBe("export const a = 1;\n"); + expect(result.unreconstructable).toEqual([]); + }); + + it("replays a Cursor write then StrReplace into final content", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/b.ts", content: "const x = 1;\n" }, + { kind: "replace", path: "/repo/src/b.ts", oldString: "1", newString: "2" }, + ]; + const result = reconstructSession(session({ provider: "cursor", edits })); + expect(byPath(result.files).get("/repo/src/b.ts")).toBe("const x = 2;\n"); + }); + + it("flags a StrReplace with no in-session base as unreconstructable", () => { + const edits: FileEdit[] = [ + { kind: "replace", path: "/repo/src/c.ts", oldString: "a", newString: "b" }, + ]; + const result = reconstructSession(session({ provider: "cursor", edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual(["/repo/src/c.ts"]); + }); + + it("reconstructs a Codex apply_patch Add File", () => { + const patch = + "*** Begin Patch\n*** Add File: /repo/src/d.ts\n+export const d = 1;\n*** End Patch"; + const result = reconstructSession( + session({ provider: "codex", edits: [{ kind: "patch", path: "", patch }] }), + ); + expect(byPath(result.files).get("/repo/src/d.ts")).toBe("export const d = 1;\n"); + }); + + it("applies a Codex apply_patch Update File on an in-session base", () => { + const add = + "*** Begin Patch\n*** Add File: /repo/src/e.ts\n+const value = 1;\n+export default value;\n*** End Patch"; + const update = + "*** Begin Patch\n*** Update File: /repo/src/e.ts\n@@\n-const value = 1;\n+const value = 2;\n export default value;\n*** End Patch"; + const result = reconstructSession( + session({ + provider: "codex", + edits: [ + { kind: "patch", path: "", patch: add }, + { kind: "patch", path: "", patch: update }, + ], + }), + ); + expect(byPath(result.files).get("/repo/src/e.ts")).toBe( + "const value = 2;\nexport default value;\n", + ); + }); + + it("resolves relative edit paths against the session cwd", () => { + const edits: FileEdit[] = [{ kind: "write", path: "src/f.ts", content: "export {};\n" }]; + const result = reconstructSession(session({ edits })); + expect(result.files.map((file) => file.absolutePath)).toEqual(["/repo/src/f.ts"]); + }); + + it("ignores files outside the lintable extension allowlist", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/README.md", content: "# hi\n" }, + { kind: "replace", path: "/repo/notes.md", oldString: "x", newString: "y" }, + ]; + const result = reconstructSession(session({ edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([]); + }); + + it("drops deleted files from both output and the coverage gap list", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/g.ts", content: "export {};\n" }, + { kind: "delete", path: "/repo/src/g.ts" }, + ]; + const result = reconstructSession(session({ edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([]); + }); +}); + +describe("resolveEditPaths", () => { + it("collects absolute paths from plain edits and apply_patch envelopes", () => { + const patch = "*** Begin Patch\n*** Update File: /repo/src/x.ts\n@@\n+x\n*** End Patch"; + const result = resolveEditPaths( + session({ + edits: [ + { kind: "write", path: "src/y.ts", content: "" }, + { kind: "patch", path: "", patch }, + ], + }), + ); + expect(new Set(result)).toEqual(new Set(["/repo/src/y.ts", "/repo/src/x.ts"])); + }); +}); diff --git a/packages/react-doctor/tests/stats-render.test.ts b/packages/react-doctor/tests/stats-render.test.ts new file mode 100644 index 000000000..199433232 --- /dev/null +++ b/packages/react-doctor/tests/stats-render.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it } from "vite-plus/test"; +import { renderStatsReport } from "../src/stats/render-stats.js"; +import type { GroupStats, StatsReport } from "../src/stats/types.js"; + +const group = (overrides: Partial): GroupStats => ({ + key: "claude/m1", + provider: "claude", + sessions: 1, + filesScanned: 4, + unreconstructable: 0, + totalDiagnostics: 2, + errorCount: 0, + warningCount: 2, + diagnosticsPerFile: 0.5, + score: 90, + scoreLabel: "good", + weightedScore: 88, + topRules: [], + ...overrides, +}); + +const report = (overrides: Partial): StatsReport => ({ + scope: "repo", + directory: "/repo", + models: [], + providers: [], + best: null, + worst: null, + sessionsAnalyzed: 0, + sessionsRanked: 0, + sessionsNonReact: 0, + sessionsUnreconstructable: 0, + generatedAt: "2026-06-20T00:00:00.000Z", + ...overrides, +}); + +describe("renderStatsReport", () => { + it("renders a model leaderboard with the best/worst callout", () => { + const best = group({ key: "claude/opus", score: 95 }); + const worst = group({ key: "codex/gpt", provider: "codex", score: 60, diagnosticsPerFile: 2 }); + const output = renderStatsReport( + report({ + models: [best, worst], + providers: [best], + best, + worst, + sessionsAnalyzed: 2, + sessionsRanked: 2, + }), + ); + expect(output).toContain("React Doctor leaderboard"); + expect(output).toContain("Which agent writes the cleanest React code"); + expect(output).toContain("opus"); + expect(output).toContain("gpt"); + expect(output).toContain("Best"); + expect(output).toContain("Worst"); + }); + + it("shows a friendly message when there is nothing to rank", () => { + const output = renderStatsReport(report({ sessionsAnalyzed: 3 })); + expect(output).toContain("Nothing to rank yet"); + }); + + it("notes non-React sessions separately from unreplayable ones", () => { + const only = group({}); + const output = renderStatsReport( + report({ + models: [only], + providers: [only], + best: only, + sessionsAnalyzed: 5, + sessionsRanked: 1, + sessionsNonReact: 3, + sessionsUnreconstructable: 1, + }), + ); + expect(output).toContain("Skipped 3 that changed only non-React files"); + expect(output).toContain("Skipped 1 that used edits we could not replay"); + }); +}); From 7b0e7b1144c129be87cba0e91b6bdd49d016c6e1 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:09:30 -0700 Subject: [PATCH 02/17] fix(cli): keep stats spinner responsive during session discovery Discovery loaded each candidate session from the Cursor SQLite DB synchronously, blocking the event loop so the ora spinner appeared frozen for a few seconds. Yield to the event loop periodically and report live "(N found)" progress during the history walk. --- .../react-doctor/src/cli/commands/stats.ts | 4 ++- packages/react-doctor/src/stats/constants.ts | 5 +++ .../src/stats/discover-sessions.ts | 31 ++++++++++++++++--- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts index a132da3c4..49083c289 100644 --- a/packages/react-doctor/src/cli/commands/stats.ts +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -71,7 +71,9 @@ export const statsAction = async (flags: StatsFlags): Promise => { let report: StatsReport; let providerCount: number; try { - const sessions = discoverSessions(root, scope); + const sessions = await discoverSessions(root, scope, (foundCount) => + progress?.update(`Looking through your agent history… (${foundCount} found)`), + ); progress?.update("Checking the code each agent wrote…"); const results = await runStatsScan(sessions, scope.global ? null : root, { onProgress: (completedCount, totalCount) => diff --git a/packages/react-doctor/src/stats/constants.ts b/packages/react-doctor/src/stats/constants.ts index d2320eff0..a3f3b0a9b 100644 --- a/packages/react-doctor/src/stats/constants.ts +++ b/packages/react-doctor/src/stats/constants.ts @@ -14,6 +14,11 @@ export const STATS_SCAN_CONCURRENCY = 6; // Temp-dir prefix for a per-session reconstructed source tree. export const STATS_TEMP_DIR_PREFIX = "react-doctor-stats-"; +// Discovery loads each candidate session from disk/SQLite synchronously. Yield +// to the event loop after this many loads so the spinner keeps animating instead +// of looking frozen during the initial history walk. +export const STATS_DISCOVERY_YIELD_INTERVAL = 10; + // A group (model/provider) needs at least this many scanned files before its // score is shown; below it the sample is too small to rank fairly. export const STATS_MIN_FILES_FOR_SCORE = 3; diff --git a/packages/react-doctor/src/stats/discover-sessions.ts b/packages/react-doctor/src/stats/discover-sessions.ts index 9b7f87f4b..a08711c40 100644 --- a/packages/react-doctor/src/stats/discover-sessions.ts +++ b/packages/react-doctor/src/stats/discover-sessions.ts @@ -1,8 +1,12 @@ import * as path from "node:path"; +import { STATS_DISCOVERY_YIELD_INTERVAL } from "./constants.js"; import { STATS_SOURCES } from "./sources/index.js"; import { resolveEditPaths } from "./reconstruct-files.js"; import type { AgentSession, StatsScopeOptions } from "./types.js"; +/** Reports discovery progress: sessions kept so far, candidates scanned so far. */ +export type DiscoveryProgress = (foundCount: number, scannedCount: number) => void; + const isPathUnder = (childPath: string, parentPath: string): boolean => { const relative = path.relative(parentPath, childPath); return !relative.startsWith("..") && !path.isAbsolute(relative); @@ -18,9 +22,14 @@ const sessionTouchesRepo = (session: AgentSession, repoRoot: string): boolean => * that touched `repoRoot` are kept; `--global` lifts that. `--since` and * `--limit` bound cost (candidates are loaded newest-first, and loading is lazy * so capped runs never touch the whole history). Sessions with no edits are - * dropped. + * dropped. Loading is synchronous per candidate, so the loop yields to the event + * loop periodically (and reports progress) to keep the spinner responsive. */ -export const discoverSessions = (repoRoot: string, scope: StatsScopeOptions): AgentSession[] => { +export const discoverSessions = async ( + repoRoot: string, + scope: StatsScopeOptions, + onProgress?: DiscoveryProgress, +): Promise => { const candidates = STATS_SOURCES.filter( (source) => !scope.provider || source.name === scope.provider, ).flatMap((source) => source.candidates()); @@ -28,12 +37,24 @@ export const discoverSessions = (repoRoot: string, scope: StatsScopeOptions): Ag const sinceMs = scope.since ? scope.since.getTime() : null; const sessions: AgentSession[] = []; + let scannedCount = 0; for (const candidate of candidates) { if (sinceMs !== null && candidate.modifiedMs > 0 && candidate.modifiedMs < sinceMs) break; + const session = candidate.load(); - if (!session || session.edits.length === 0) continue; - if (!scope.global && !sessionTouchesRepo(session, repoRoot)) continue; - sessions.push(session); + scannedCount += 1; + if ( + session && + session.edits.length > 0 && + (scope.global || sessionTouchesRepo(session, repoRoot)) + ) { + sessions.push(session); + } + + if (scannedCount % STATS_DISCOVERY_YIELD_INTERVAL === 0) { + onProgress?.(sessions.length, scannedCount); + await new Promise((resolve) => setImmediate(resolve)); + } if (sessions.length >= scope.limit) break; } return sessions; From fe9f11005761d3dac58254121f2fcd29126d08f9 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:11:08 -0700 Subject: [PATCH 03/17] feat(cli): show only the top 5 models in the stats leaderboard Cap the terminal table to the top 5 with a "+ N more" pointer to --json; the full ranking still ships in the JSON report and the best/worst callout. --- packages/react-doctor/src/stats/constants.ts | 4 ++++ packages/react-doctor/src/stats/render-stats.ts | 16 ++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/react-doctor/src/stats/constants.ts b/packages/react-doctor/src/stats/constants.ts index a3f3b0a9b..26fd2d9b6 100644 --- a/packages/react-doctor/src/stats/constants.ts +++ b/packages/react-doctor/src/stats/constants.ts @@ -41,6 +41,10 @@ export const STATS_SCORE_SESSION_PRIOR = 2; // remaining (1 - FLOOR). Closer to 1 = files dominate even harder. export const STATS_SCORE_SESSION_FLOOR = 0.6; +// Models shown in the terminal leaderboard. The full ranking is always in the +// `--json` report; the table stays short so it reads at a glance. +export const STATS_LEADERBOARD_TOP_N = 5; + // Most-fired rules shown per group in the report. export const STATS_TOP_RULES_PER_GROUP = 3; diff --git a/packages/react-doctor/src/stats/render-stats.ts b/packages/react-doctor/src/stats/render-stats.ts index b393a1b5a..16c13bb0f 100644 --- a/packages/react-doctor/src/stats/render-stats.ts +++ b/packages/react-doctor/src/stats/render-stats.ts @@ -1,4 +1,5 @@ import { highlighter } from "@react-doctor/core"; +import { STATS_LEADERBOARD_TOP_N } from "./constants.js"; import type { GroupStats, StatsReport } from "./types.js"; const SCORE_BAR_WIDTH = 16; @@ -111,14 +112,13 @@ export const renderStatsReport = (report: StatsReport): string => { ].join("\n"); } - const sections = [ - header, - "", - renderModelTable(report.models), - "", - highlighter.dim("By tool:"), - renderProviderTable(report.providers), - ]; + const shownModels = report.models.slice(0, STATS_LEADERBOARD_TOP_N); + const hiddenCount = report.models.length - shownModels.length; + const sections = [header, "", renderModelTable(shownModels)]; + if (hiddenCount > 0) { + sections.push(highlighter.dim(`+ ${hiddenCount} more (see --json for the full ranking).`)); + } + sections.push("", highlighter.dim("By tool:"), renderProviderTable(report.providers)); const callout = renderCallout(report); if (callout) { From 87699243c7049b96bab4b73c3a525237d88a5e8a Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:15:06 -0700 Subject: [PATCH 04/17] refactor(stats): deduplicate transcript coercion helpers Consolidate the asString/asRecord/asArray/parseJson coercers (copied across the Claude/Codex/Cursor adapters) into a shared coerce.ts, extract the "most common model" tally into most-common-key.ts, and reuse statMtimeMs in findJsonlFiles. Behavior unchanged. --- packages/react-doctor/src/stats/coerce.ts | 26 ++++++++++++++++ .../react-doctor/src/stats/most-common-key.ts | 15 ++++++++++ .../react-doctor/src/stats/sources/claude.ts | 23 ++------------ .../react-doctor/src/stats/sources/codex.ts | 21 ++----------- .../react-doctor/src/stats/sources/cursor.ts | 30 ++----------------- .../src/stats/walk-transcripts.ts | 8 +---- 6 files changed, 51 insertions(+), 72 deletions(-) create mode 100644 packages/react-doctor/src/stats/coerce.ts create mode 100644 packages/react-doctor/src/stats/most-common-key.ts diff --git a/packages/react-doctor/src/stats/coerce.ts b/packages/react-doctor/src/stats/coerce.ts new file mode 100644 index 000000000..0721228aa --- /dev/null +++ b/packages/react-doctor/src/stats/coerce.ts @@ -0,0 +1,26 @@ +// Defensive coercion for untrusted transcript JSON. Every agent source parses +// data the user didn't write by hand, so values are narrowed before use rather +// than trusted. Shared by the Claude/Codex/Cursor adapters. + +/** Narrow an unknown to a non-empty string, else undefined. */ +export const asString = (value: unknown): string | undefined => + typeof value === "string" && value.length > 0 ? value : undefined; + +/** Narrow an unknown to a plain object record, else undefined. */ +export const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; + +/** Narrow an unknown to an array, else an empty array. */ +export const asArray = (value: unknown): unknown[] => (Array.isArray(value) ? value : []); + +/** Parse a JSON string, returning undefined on non-strings or parse errors. */ +export const parseJson = (raw: string | null | undefined): unknown => { + if (typeof raw !== "string") return undefined; + try { + return JSON.parse(raw); + } catch { + return undefined; + } +}; diff --git a/packages/react-doctor/src/stats/most-common-key.ts b/packages/react-doctor/src/stats/most-common-key.ts new file mode 100644 index 000000000..7319e6781 --- /dev/null +++ b/packages/react-doctor/src/stats/most-common-key.ts @@ -0,0 +1,15 @@ +/** + * The map key with the highest count, or undefined when the map is empty. Used + * to pick a session's dominant model from per-message model tallies. + */ +export const mostCommonKey = (counts: ReadonlyMap): string | undefined => { + let bestKey: string | undefined; + let bestCount = 0; + for (const [key, count] of counts) { + if (count > bestCount) { + bestKey = key; + bestCount = count; + } + } + return bestKey; +}; diff --git a/packages/react-doctor/src/stats/sources/claude.ts b/packages/react-doctor/src/stats/sources/claude.ts index 29be38e49..50706cfb1 100644 --- a/packages/react-doctor/src/stats/sources/claude.ts +++ b/packages/react-doctor/src/stats/sources/claude.ts @@ -1,19 +1,11 @@ import * as os from "node:os"; import * as path from "node:path"; +import { asArray, asRecord, asString } from "../coerce.js"; +import { mostCommonKey } from "../most-common-key.js"; import { fileSessionCandidates, findJsonlFiles, readJsonlEntries } from "../walk-transcripts.js"; import { STATS_UNKNOWN_MODEL } from "../constants.js"; import type { AgentSession, FileEdit, FileRead, SourceDef } from "./index.js"; -const asString = (value: unknown): string | undefined => - typeof value === "string" && value.length > 0 ? value : undefined; - -const asRecord = (value: unknown): Record | undefined => - value && typeof value === "object" && !Array.isArray(value) - ? (value as Record) - : undefined; - -const asArray = (value: unknown): unknown[] => (Array.isArray(value) ? value : []); - const EDIT_TOOL_NAMES = new Set(["Write", "Edit", "MultiEdit"]); const editsFromToolUse = (name: string, input: Record): FileEdit[] => { @@ -101,20 +93,11 @@ export const parseClaudeSession = (transcriptPath: string): AgentSession | null if (!sawAnything) return null; - let model = STATS_UNKNOWN_MODEL; - let bestCount = 0; - for (const [candidate, count] of modelCounts) { - if (count > bestCount) { - model = candidate; - bestCount = count; - } - } - return { provider: "claude", sessionId: path.basename(transcriptPath, ".jsonl"), transcriptPath, - model, + model: mostCommonKey(modelCounts) ?? STATS_UNKNOWN_MODEL, cwd, startedAt, endedAt, diff --git a/packages/react-doctor/src/stats/sources/codex.ts b/packages/react-doctor/src/stats/sources/codex.ts index 633f9ea47..f487bdf37 100644 --- a/packages/react-doctor/src/stats/sources/codex.ts +++ b/packages/react-doctor/src/stats/sources/codex.ts @@ -1,17 +1,11 @@ import * as os from "node:os"; import * as path from "node:path"; +import { asRecord, asString } from "../coerce.js"; +import { mostCommonKey } from "../most-common-key.js"; import { fileSessionCandidates, findJsonlFiles, readJsonlEntries } from "../walk-transcripts.js"; import { STATS_UNKNOWN_MODEL } from "../constants.js"; import type { AgentSession, FileEdit, SourceDef } from "./index.js"; -const asString = (value: unknown): string | undefined => - typeof value === "string" && value.length > 0 ? value : undefined; - -const asRecord = (value: unknown): Record | undefined => - value && typeof value === "object" && !Array.isArray(value) - ? (value as Record) - : undefined; - // Codex reconstructs only `apply_patch` (`custom_tool_call`) edits — `shell` // function calls (sed, heredoc redirects, …) are not faithfully reconstructable // and are skipped. Model comes from `turn_context`, cwd from `session_meta`. @@ -46,20 +40,11 @@ export const parseCodexSession = (transcriptPath: string): AgentSession | null = if (!sawAnything) return null; - let model = STATS_UNKNOWN_MODEL; - let bestCount = 0; - for (const [candidate, count] of modelCounts) { - if (count > bestCount) { - model = candidate; - bestCount = count; - } - } - return { provider: "codex", sessionId: path.basename(transcriptPath, ".jsonl"), transcriptPath, - model, + model: mostCommonKey(modelCounts) ?? STATS_UNKNOWN_MODEL, cwd, edits, reads: [], diff --git a/packages/react-doctor/src/stats/sources/cursor.ts b/packages/react-doctor/src/stats/sources/cursor.ts index b1efa7645..2d288272d 100644 --- a/packages/react-doctor/src/stats/sources/cursor.ts +++ b/packages/react-doctor/src/stats/sources/cursor.ts @@ -1,25 +1,10 @@ +import { asRecord, asString, parseJson } from "../coerce.js"; import { STATS_UNKNOWN_MODEL } from "../constants.js"; import { openCursorDb, resolveCursorDbPath, type CursorDbHandle } from "../cursor-db.js"; +import { mostCommonKey } from "../most-common-key.js"; import { isLintablePath } from "../reconstruct-files.js"; import type { AgentSession, FileEdit, SessionCandidate, SourceDef } from "./index.js"; -const asString = (value: unknown): string | undefined => - typeof value === "string" && value.length > 0 ? value : undefined; - -const asRecord = (value: unknown): Record | undefined => - value && typeof value === "object" && !Array.isArray(value) - ? (value as Record) - : undefined; - -const parseJson = (raw: string | null | undefined): unknown => { - if (typeof raw !== "string") return undefined; - try { - return JSON.parse(raw); - } catch { - return undefined; - } -}; - // The composer's selected model, ignoring the "Auto" sentinel which carries no // concrete model id. const composerModelName = (composer: Record | undefined): string | undefined => { @@ -91,20 +76,11 @@ const buildCursorSession = (db: CursorDbHandle, composerId: string): AgentSessio // Apply edits in chronological order so the last write to a file wins. orderedEdits.sort((left, right) => left.createdAt - right.createdAt); - let mostCommonBubbleModel: string | undefined; - let bestCount = 0; - for (const [candidate, count] of bubbleModelCounts) { - if (count > bestCount) { - mostCommonBubbleModel = candidate; - bestCount = count; - } - } - return { provider: "cursor", sessionId: composerId, transcriptPath: `cursor-composer:${composerId}`, - model: composerModelName(composer) ?? mostCommonBubbleModel ?? STATS_UNKNOWN_MODEL, + model: composerModelName(composer) ?? mostCommonKey(bubbleModelCounts) ?? STATS_UNKNOWN_MODEL, cwd: null, edits: orderedEdits.map((entry) => entry.edit), reads: [], diff --git a/packages/react-doctor/src/stats/walk-transcripts.ts b/packages/react-doctor/src/stats/walk-transcripts.ts index 809b514c6..e37a13dfb 100644 --- a/packages/react-doctor/src/stats/walk-transcripts.ts +++ b/packages/react-doctor/src/stats/walk-transcripts.ts @@ -55,13 +55,7 @@ export const findJsonlFiles = (root: string, maxDepth: number): string[] => { if (entry.isDirectory()) { if (depth < maxDepth) walk(entryPath, depth + 1); } else if (entry.isFile() && entry.name.endsWith(".jsonl")) { - let modifiedMs = 0; - try { - modifiedMs = fs.statSync(entryPath).mtimeMs; - } catch { - modifiedMs = 0; - } - found.push({ filePath: entryPath, modifiedMs }); + found.push({ filePath: entryPath, modifiedMs: statMtimeMs(entryPath) }); } } }; From 755b8aa09f3473d5ac99d9b6bafe6c8e5db6435c Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:27:24 -0700 Subject: [PATCH 05/17] test(stats): guard cursor adapter test behind node:sqlite availability The static `node:sqlite` import crashed the whole adapter test file on Node 20 (where the module doesn't exist), failing the 20.19 CI matrix job. Load it via a guarded require and skip the Cursor suite when unavailable, mirroring cursor-db.ts's runtime degradation. --- .../react-doctor/tests/stats-adapters.test.ts | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/packages/react-doctor/tests/stats-adapters.test.ts b/packages/react-doctor/tests/stats-adapters.test.ts index 705a855c9..1138d1654 100644 --- a/packages/react-doctor/tests/stats-adapters.test.ts +++ b/packages/react-doctor/tests/stats-adapters.test.ts @@ -1,13 +1,34 @@ import * as fs from "node:fs"; +import { createRequire } from "node:module"; import * as os from "node:os"; import * as path from "node:path"; -import { DatabaseSync } from "node:sqlite"; import { afterAll, describe, expect, it } from "vite-plus/test"; import { closeCursorDb } from "../src/stats/cursor-db.js"; import { parseClaudeSession } from "../src/stats/sources/claude.js"; import { parseCodexSession } from "../src/stats/sources/codex.js"; import { cursorComposerCandidates } from "../src/stats/sources/cursor.js"; +interface SqliteDb { + exec(sql: string): void; + prepare(sql: string): { run(...params: unknown[]): void }; + close(): void; +} +interface SqliteModule { + DatabaseSync: new (filePath: string) => SqliteDb; +} + +// `node:sqlite` is built in on Node 22.13+/24+ and absent on older Node, where +// the require throws. Mirror cursor-db.ts and skip the Cursor suite there rather +// than crashing the whole file at import time. +const loadSqlite = (): SqliteModule | null => { + try { + return createRequire(import.meta.url)("node:sqlite"); + } catch { + return null; + } +}; +const sqlite = loadSqlite(); + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "stats-adapters-")); const writeTranscript = (name: string, lines: unknown[]): string => { @@ -84,8 +105,9 @@ interface ComposerFixture { } const writeComposerDb = (name: string, composers: ReadonlyArray): string => { + if (!sqlite) throw new Error("node:sqlite unavailable"); const dbPath = path.join(tempDir, name); - const database = new DatabaseSync(dbPath); + const database = new sqlite.DatabaseSync(dbPath); database.exec("CREATE TABLE ItemTable (key TEXT PRIMARY KEY, value TEXT)"); database.exec("CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value TEXT)"); @@ -113,7 +135,9 @@ const writeComposerDb = (name: string, composers: ReadonlyArray return dbPath; }; -describe("cursorComposerCandidates", () => { +const describeCursor = sqlite ? describe : describe.skip; + +describeCursor("cursorComposerCandidates", () => { it("attributes the composer model and reconstructs exact content via afterContentId", () => { closeCursorDb(); const dbPath = writeComposerDb("cursor-model.vscdb", [ From 721470d46a2ac294e635a9597f418c532e829bb9 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:37:13 -0700 Subject: [PATCH 06/17] fix(stats): make cursor DB + reconstruct tests pass on Windows - closeCursorDb now closes the underlying node:sqlite database instead of only dropping the cached reference, so the fixture file is unlocked and Windows can unlink the temp dir (was EBUSY in the adapter test teardown). - The reconstruct test compared emitted absolute paths against hardcoded POSIX strings; on Windows resolveAgainstCwd normalizes to backslashes, so expectations now mirror that normalization. Production code unchanged. --- packages/react-doctor/src/stats/cursor-db.ts | 40 ++++++++++++++----- .../tests/stats-reconstruct.test.ts | 24 +++++++---- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/packages/react-doctor/src/stats/cursor-db.ts b/packages/react-doctor/src/stats/cursor-db.ts index bbf16a109..1c8178030 100644 --- a/packages/react-doctor/src/stats/cursor-db.ts +++ b/packages/react-doctor/src/stats/cursor-db.ts @@ -104,12 +104,18 @@ const rowValueString = (row: unknown): string | null => { const prefixUpperBound = (prefix: string): string => prefix.slice(0, -1) + String.fromCharCode(prefix.charCodeAt(prefix.length - 1) + 1); -const makeHandle = (dbPath: string): CursorDbHandle | null => { +interface OpenDb { + readonly handle: CursorDbHandle; + readonly close: () => void; +} + +const makeHandle = (dbPath: string): OpenDb | null => { let database: { prepare(sql: string): { get(...params: unknown[]): unknown; all(...params: unknown[]): unknown[]; }; + close(): void; }; try { // `node:sqlite` is built in on Node 22.13+/24+; absent on older Node, where @@ -126,7 +132,7 @@ const makeHandle = (dbPath: string): CursorDbHandle | null => { `SELECT value FROM cursorDiskKV WHERE key >= ? AND key < ?`, ); - return { + const handle: CursorDbHandle = { composerHeaders(): CursorComposerHeader[] { try { const raw = rowValueString(headersStatement.get(COMPOSER_HEADERS_KEY)); @@ -164,22 +170,36 @@ const makeHandle = (dbPath: string): CursorDbHandle | null => { } }, }; + + return { + handle, + close: () => { + try { + database.close(); + } catch { + // Already closed or never fully opened — nothing to release. + } + }, + }; }; // One open handle per process — opening is cheap (SQLite memory-maps lazily), -// but reopening per composer during a scan would thrash. `closeCursorDb` resets -// it for tests; the CLI relies on process exit. -let cachedHandle: { dbPath: string; handle: CursorDbHandle | null } | null = null; +// but reopening per composer during a scan would thrash. `closeCursorDb` closes +// the underlying database for tests (so Windows can unlink the fixture file); +// the CLI relies on process exit. +let cachedDb: { dbPath: string; handle: CursorDbHandle | null; close: () => void } | null = null; /** Open (and memoize) the composer database, or `null` when unavailable. */ export const openCursorDb = (dbPath: string | null): CursorDbHandle | null => { if (!dbPath) return null; - if (cachedHandle && cachedHandle.dbPath === dbPath) return cachedHandle.handle; - cachedHandle = { dbPath, handle: makeHandle(dbPath) }; - return cachedHandle.handle; + if (cachedDb && cachedDb.dbPath === dbPath) return cachedDb.handle; + const opened = makeHandle(dbPath); + cachedDb = { dbPath, handle: opened?.handle ?? null, close: opened?.close ?? (() => {}) }; + return cachedDb.handle; }; -/** Drop the memoized handle (tests open fresh fixture databases). */ +/** Close and drop the memoized database (tests open fresh fixture databases). */ export const closeCursorDb = (): void => { - cachedHandle = null; + cachedDb?.close(); + cachedDb = null; }; diff --git a/packages/react-doctor/tests/stats-reconstruct.test.ts b/packages/react-doctor/tests/stats-reconstruct.test.ts index d27472fa8..2b2bfebd5 100644 --- a/packages/react-doctor/tests/stats-reconstruct.test.ts +++ b/packages/react-doctor/tests/stats-reconstruct.test.ts @@ -1,18 +1,26 @@ +import * as path from "node:path"; import { describe, expect, it } from "vite-plus/test"; import { reconstructSession, resolveEditPaths } from "../src/stats/reconstruct-files.js"; import type { AgentSession, FileEdit } from "../src/stats/types.js"; +const CWD = "/repo"; + const session = (overrides: Partial): AgentSession => ({ provider: "claude", sessionId: "s1", transcriptPath: "/tmp/s1.jsonl", model: "test-model", - cwd: "/repo", + cwd: CWD, edits: [], reads: [], ...overrides, }); +// Mirror reconstruct-files.ts' resolveAgainstCwd so expectations match the +// platform-normalized paths the reconstruction emits (backslashes on Windows). +const resolved = (rawPath: string): string => + path.isAbsolute(rawPath) ? path.normalize(rawPath) : path.resolve(CWD, rawPath); + const byPath = (files: ReadonlyArray<{ absolutePath: string; content: string }>) => new Map(files.map((file) => [file.absolutePath, file.content])); @@ -23,7 +31,7 @@ describe("reconstructSession", () => { { kind: "write", path: "/repo/src/a.ts", resultContent: "export const a = 1;\n" }, ]; const result = reconstructSession(session({ edits })); - expect(byPath(result.files).get("/repo/src/a.ts")).toBe("export const a = 1;\n"); + expect(byPath(result.files).get(resolved("/repo/src/a.ts"))).toBe("export const a = 1;\n"); expect(result.unreconstructable).toEqual([]); }); @@ -33,7 +41,7 @@ describe("reconstructSession", () => { { kind: "replace", path: "/repo/src/b.ts", oldString: "1", newString: "2" }, ]; const result = reconstructSession(session({ provider: "cursor", edits })); - expect(byPath(result.files).get("/repo/src/b.ts")).toBe("const x = 2;\n"); + expect(byPath(result.files).get(resolved("/repo/src/b.ts"))).toBe("const x = 2;\n"); }); it("flags a StrReplace with no in-session base as unreconstructable", () => { @@ -42,7 +50,7 @@ describe("reconstructSession", () => { ]; const result = reconstructSession(session({ provider: "cursor", edits })); expect(result.files).toEqual([]); - expect(result.unreconstructable).toEqual(["/repo/src/c.ts"]); + expect(result.unreconstructable).toEqual([resolved("/repo/src/c.ts")]); }); it("reconstructs a Codex apply_patch Add File", () => { @@ -51,7 +59,7 @@ describe("reconstructSession", () => { const result = reconstructSession( session({ provider: "codex", edits: [{ kind: "patch", path: "", patch }] }), ); - expect(byPath(result.files).get("/repo/src/d.ts")).toBe("export const d = 1;\n"); + expect(byPath(result.files).get(resolved("/repo/src/d.ts"))).toBe("export const d = 1;\n"); }); it("applies a Codex apply_patch Update File on an in-session base", () => { @@ -68,7 +76,7 @@ describe("reconstructSession", () => { ], }), ); - expect(byPath(result.files).get("/repo/src/e.ts")).toBe( + expect(byPath(result.files).get(resolved("/repo/src/e.ts"))).toBe( "const value = 2;\nexport default value;\n", ); }); @@ -76,7 +84,7 @@ describe("reconstructSession", () => { it("resolves relative edit paths against the session cwd", () => { const edits: FileEdit[] = [{ kind: "write", path: "src/f.ts", content: "export {};\n" }]; const result = reconstructSession(session({ edits })); - expect(result.files.map((file) => file.absolutePath)).toEqual(["/repo/src/f.ts"]); + expect(result.files.map((file) => file.absolutePath)).toEqual([resolved("src/f.ts")]); }); it("ignores files outside the lintable extension allowlist", () => { @@ -111,6 +119,6 @@ describe("resolveEditPaths", () => { ], }), ); - expect(new Set(result)).toEqual(new Set(["/repo/src/y.ts", "/repo/src/x.ts"])); + expect(new Set(result)).toEqual(new Set([resolved("src/y.ts"), resolved("/repo/src/x.ts")])); }); }); From f15f640b64e294e05c6ec0fe5d7f4bf2f91c8a51 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:49:31 -0700 Subject: [PATCH 07/17] fix(stats): correct reconstruction fidelity and skip bucketing (Bugbot) - A failed apply_patch update hunk left the prior in-session buffer in place and still emitted the file as faithfully reconstructed; drop it to unreconstructable so stale content is never linted as the model's output. - Sessions touching only non-lintable files (e.g. markdown) had zero reconstructed files and zero failures but were counted as "unreconstructable"; require an actual reconstruction failure for that bucket so the skip note stays accurate. --- packages/react-doctor/src/cli/commands/stats.ts | 5 ++++- .../react-doctor/src/stats/reconstruct-files.ts | 8 +++++++- .../tests/stats-reconstruct.test.ts | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts index 49083c289..686e30f46 100644 --- a/packages/react-doctor/src/cli/commands/stats.ts +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -96,7 +96,10 @@ export const statsAction = async (flags: StatsFlags): Promise => { (result) => result.filesScanned === 0 && result.reconstructedFiles > 0, ).length, sessionsUnreconstructable: results.filter( - (result) => result.filesScanned === 0 && result.reconstructedFiles === 0, + (result) => + result.filesScanned === 0 && + result.reconstructedFiles === 0 && + result.unreconstructable > 0, ).length, generatedAt: new Date().toISOString(), }; diff --git a/packages/react-doctor/src/stats/reconstruct-files.ts b/packages/react-doctor/src/stats/reconstruct-files.ts index 45ff72b8f..502757454 100644 --- a/packages/react-doctor/src/stats/reconstruct-files.ts +++ b/packages/react-doctor/src/stats/reconstruct-files.ts @@ -78,7 +78,13 @@ export const reconstructSession = (session: AgentSession): SessionReconstruction const base = buffers.get(resolved); if (typeof base !== "string") continue; const applied = applyUpdateHunks(base, op.hunkLines ?? []); - if (applied === null) continue; + if (applied === null) { + // The hunk didn't match our base, so our buffer is out of sync with + // what the model actually edited. Drop it to "no faithful base" rather + // than emit stale content as if it were the reconstructed result. + buffers.delete(resolved); + continue; + } const movedTo = op.movePath && resolveAgainstCwd(op.movePath, session.cwd); if (movedTo) { buffers.set(resolved, null); diff --git a/packages/react-doctor/tests/stats-reconstruct.test.ts b/packages/react-doctor/tests/stats-reconstruct.test.ts index 2b2bfebd5..2798cbda1 100644 --- a/packages/react-doctor/tests/stats-reconstruct.test.ts +++ b/packages/react-doctor/tests/stats-reconstruct.test.ts @@ -81,6 +81,23 @@ describe("reconstructSession", () => { ); }); + it("flags an apply_patch Update whose hunk does not match the base as unreconstructable", () => { + const add = "*** Begin Patch\n*** Add File: /repo/src/h.ts\n+const value = 1;\n*** End Patch"; + const update = + "*** Begin Patch\n*** Update File: /repo/src/h.ts\n@@\n-const value = 999;\n+const value = 2;\n*** End Patch"; + const result = reconstructSession( + session({ + provider: "codex", + edits: [ + { kind: "patch", path: "", patch: add }, + { kind: "patch", path: "", patch: update }, + ], + }), + ); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([resolved("/repo/src/h.ts")]); + }); + it("resolves relative edit paths against the session cwd", () => { const edits: FileEdit[] = [{ kind: "write", path: "src/f.ts", content: "export {};\n" }]; const result = reconstructSession(session({ edits })); From 0d13c862b386ced4c38f9952cf7315f8b44e1b4e Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 18:57:37 -0700 Subject: [PATCH 08/17] refactor(stats): stream JSONL transcripts via node:readline Replace the readFileSync + split("\n") transcript reader with a streaming node:readline parser so memory stays flat on large Claude/Codex transcripts. Makes session loading async (SessionCandidate.load + the parse adapters); the Cursor composer load wraps its sync DB walk to match. --- .../src/stats/discover-sessions.ts | 2 +- .../react-doctor/src/stats/sources/claude.ts | 4 +- .../react-doctor/src/stats/sources/codex.ts | 4 +- .../react-doctor/src/stats/sources/cursor.ts | 2 +- packages/react-doctor/src/stats/types.ts | 2 +- .../src/stats/walk-transcripts.ts | 44 +++++++++++-------- .../react-doctor/tests/stats-adapters.test.ts | 20 ++++----- 7 files changed, 42 insertions(+), 36 deletions(-) diff --git a/packages/react-doctor/src/stats/discover-sessions.ts b/packages/react-doctor/src/stats/discover-sessions.ts index a08711c40..f53e29228 100644 --- a/packages/react-doctor/src/stats/discover-sessions.ts +++ b/packages/react-doctor/src/stats/discover-sessions.ts @@ -41,7 +41,7 @@ export const discoverSessions = async ( for (const candidate of candidates) { if (sinceMs !== null && candidate.modifiedMs > 0 && candidate.modifiedMs < sinceMs) break; - const session = candidate.load(); + const session = await candidate.load(); scannedCount += 1; if ( session && diff --git a/packages/react-doctor/src/stats/sources/claude.ts b/packages/react-doctor/src/stats/sources/claude.ts index 50706cfb1..96e776788 100644 --- a/packages/react-doctor/src/stats/sources/claude.ts +++ b/packages/react-doctor/src/stats/sources/claude.ts @@ -41,7 +41,7 @@ const editsFromToolUse = (name: string, input: Record): FileEdi }); }; -export const parseClaudeSession = (transcriptPath: string): AgentSession | null => { +export const parseClaudeSession = async (transcriptPath: string): Promise => { const edits: FileEdit[] = []; const reads: FileRead[] = []; const modelCounts = new Map(); @@ -50,7 +50,7 @@ export const parseClaudeSession = (transcriptPath: string): AgentSession | null let endedAt: string | undefined; let sawAnything = false; - readJsonlEntries(transcriptPath, (entry) => { + await readJsonlEntries(transcriptPath, (entry) => { sawAnything = true; const timestamp = asString(entry.timestamp); if (timestamp) { diff --git a/packages/react-doctor/src/stats/sources/codex.ts b/packages/react-doctor/src/stats/sources/codex.ts index f487bdf37..0d9e0b6c2 100644 --- a/packages/react-doctor/src/stats/sources/codex.ts +++ b/packages/react-doctor/src/stats/sources/codex.ts @@ -9,13 +9,13 @@ import type { AgentSession, FileEdit, SourceDef } from "./index.js"; // Codex reconstructs only `apply_patch` (`custom_tool_call`) edits — `shell` // function calls (sed, heredoc redirects, …) are not faithfully reconstructable // and are skipped. Model comes from `turn_context`, cwd from `session_meta`. -export const parseCodexSession = (transcriptPath: string): AgentSession | null => { +export const parseCodexSession = async (transcriptPath: string): Promise => { const edits: FileEdit[] = []; const modelCounts = new Map(); let cwd: string | null = null; let sawAnything = false; - readJsonlEntries(transcriptPath, (entry) => { + await readJsonlEntries(transcriptPath, (entry) => { sawAnything = true; const payload = asRecord(entry.payload); if (!payload) return; diff --git a/packages/react-doctor/src/stats/sources/cursor.ts b/packages/react-doctor/src/stats/sources/cursor.ts index 2d288272d..da611c3f4 100644 --- a/packages/react-doctor/src/stats/sources/cursor.ts +++ b/packages/react-doctor/src/stats/sources/cursor.ts @@ -98,7 +98,7 @@ export const cursorComposerCandidates = (dbPath: string | null): SessionCandidat return db.composerHeaders().map((header) => ({ provider: "cursor" as const, modifiedMs: header.modifiedMs, - load: () => buildCursorSession(db, header.composerId), + load: async () => buildCursorSession(db, header.composerId), })); }; diff --git a/packages/react-doctor/src/stats/types.ts b/packages/react-doctor/src/stats/types.ts index d6480d2ac..3f6c6f1c9 100644 --- a/packages/react-doctor/src/stats/types.ts +++ b/packages/react-doctor/src/stats/types.ts @@ -50,7 +50,7 @@ export interface AgentSession { export interface SessionCandidate { readonly provider: StatsProvider; readonly modifiedMs: number; - load(): AgentSession | null; + load(): Promise; } /** A faithfully reconstructed file as the model left it at session end. */ diff --git a/packages/react-doctor/src/stats/walk-transcripts.ts b/packages/react-doctor/src/stats/walk-transcripts.ts index e37a13dfb..e56157d2c 100644 --- a/packages/react-doctor/src/stats/walk-transcripts.ts +++ b/packages/react-doctor/src/stats/walk-transcripts.ts @@ -1,5 +1,6 @@ import * as fs from "node:fs"; import * as path from "node:path"; +import * as readline from "node:readline"; import type { AgentSession, SessionCandidate, StatsProvider } from "./types.js"; /** File modification time in ms, or 0 when the file is missing/unreadable. */ @@ -20,7 +21,7 @@ export const fileSessionCandidates = ( provider: StatsProvider, roots: ReadonlyArray, discover: (root: string) => string[], - parse: (transcriptPath: string) => AgentSession | null, + parse: (transcriptPath: string) => Promise, ): SessionCandidate[] => { const candidates: SessionCandidate[] = []; for (const root of roots) { @@ -66,28 +67,33 @@ export const findJsonlFiles = (root: string, maxDepth: number): string[] => { }; /** - * Parse each non-empty line of a JSONL file, invoking `onEntry` with the decoded - * object. Unparseable lines and unreadable files are skipped silently so one - * corrupt transcript never sinks a whole run. + * Stream a JSONL file line-by-line through `node:readline`, invoking `onEntry` + * with each decoded object. Streaming keeps memory flat on large transcripts + * (no whole-file read). Unparseable lines and unreadable files are skipped + * silently so one corrupt transcript never sinks a whole run. */ -export const readJsonlEntries = ( +export const readJsonlEntries = async ( filePath: string, onEntry: (entry: Record) => void, -): void => { - let raw: string; +): Promise => { + const lines = readline.createInterface({ + input: fs.createReadStream(filePath, { encoding: "utf8" }), + crlfDelay: Infinity, + }); try { - raw = fs.readFileSync(filePath, "utf8"); - } catch { - return; - } - for (const line of raw.split("\n")) { - if (!line.trim()) continue; - let entry: unknown; - try { - entry = JSON.parse(line); - } catch { - continue; + for await (const line of lines) { + if (!line.trim()) continue; + let entry: unknown; + try { + entry = JSON.parse(line); + } catch { + continue; + } + if (entry && typeof entry === "object") onEntry(entry as Record); } - if (entry && typeof entry === "object") onEntry(entry as Record); + } catch { + // Unreadable file / stream error: stop silently, keep partial entries. + } finally { + lines.close(); } }; diff --git a/packages/react-doctor/tests/stats-adapters.test.ts b/packages/react-doctor/tests/stats-adapters.test.ts index 1138d1654..260a6421b 100644 --- a/packages/react-doctor/tests/stats-adapters.test.ts +++ b/packages/react-doctor/tests/stats-adapters.test.ts @@ -43,7 +43,7 @@ afterAll(() => { }); describe("parseClaudeSession", () => { - it("extracts model, cwd, edits, and post-edit result content", () => { + it("extracts model, cwd, edits, and post-edit result content", async () => { const filePath = writeTranscript("claude.jsonl", [ { type: "assistant", @@ -66,7 +66,7 @@ describe("parseClaudeSession", () => { toolUseResult: { filePath: "/repo/src/a.ts", content: "export const a = 1;\n" }, }, ]); - const session = parseClaudeSession(filePath); + const session = await parseClaudeSession(filePath); expect(session?.model).toBe("claude-x"); expect(session?.cwd).toBe("/repo"); expect(session?.edits.some((edit) => edit.resultContent === "export const a = 1;\n")).toBe( @@ -76,7 +76,7 @@ describe("parseClaudeSession", () => { }); describe("parseCodexSession", () => { - it("extracts model from turn_context, cwd from session_meta, and apply_patch edits", () => { + it("extracts model from turn_context, cwd from session_meta, and apply_patch edits", async () => { const filePath = writeTranscript("codex.jsonl", [ { type: "session_meta", payload: { cwd: "/repo" } }, { type: "turn_context", payload: { model: "gpt-5.5" } }, @@ -89,7 +89,7 @@ describe("parseCodexSession", () => { }, }, ]); - const session = parseCodexSession(filePath); + const session = await parseCodexSession(filePath); expect(session?.model).toBe("gpt-5.5"); expect(session?.cwd).toBe("/repo"); expect(session?.edits).toHaveLength(1); @@ -138,7 +138,7 @@ const writeComposerDb = (name: string, composers: ReadonlyArray const describeCursor = sqlite ? describe : describe.skip; describeCursor("cursorComposerCandidates", () => { - it("attributes the composer model and reconstructs exact content via afterContentId", () => { + it("attributes the composer model and reconstructs exact content via afterContentId", async () => { closeCursorDb(); const dbPath = writeComposerDb("cursor-model.vscdb", [ { @@ -172,7 +172,7 @@ describeCursor("cursorComposerCandidates", () => { const candidates = cursorComposerCandidates(dbPath); expect(candidates).toHaveLength(1); - const session = candidates[0].load(); + const session = await candidates[0].load(); expect(session?.provider).toBe("cursor"); expect(session?.model).toBe("claude-opus-4-8"); expect(session?.edits).toHaveLength(2); @@ -184,7 +184,7 @@ describeCursor("cursorComposerCandidates", () => { ).toBe(true); }); - it("falls back to the dominant bubble model when the composer is on Auto", () => { + it("falls back to the dominant bubble model when the composer is on Auto", async () => { closeCursorDb(); const dbPath = writeComposerDb("cursor-auto.vscdb", [ { @@ -207,12 +207,12 @@ describeCursor("cursorComposerCandidates", () => { }, ]); - const session = cursorComposerCandidates(dbPath)[0]?.load(); + const session = await cursorComposerCandidates(dbPath)[0]?.load(); expect(session?.model).toBe("gpt-5.5"); expect(session?.edits[0]?.resultContent).toBe("export const y = 2;\n"); }); - it("ignores non-lintable edits and skips when the database is absent", () => { + it("ignores non-lintable edits and skips when the database is absent", async () => { closeCursorDb(); expect(cursorComposerCandidates(null)).toEqual([]); @@ -234,7 +234,7 @@ describeCursor("cursorComposerCandidates", () => { ], }, ]); - const session = cursorComposerCandidates(dbPath)[0]?.load(); + const session = await cursorComposerCandidates(dbPath)[0]?.load(); expect(session?.edits).toEqual([]); }); }); From dad2a5ca986e809de2635ef33fbe974218cd1abe Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 19:12:55 -0700 Subject: [PATCH 09/17] fix(stats): address review feedback (score correctness, JSON errors, --since) - Drop scans that error/skip/lint-fail instead of counting them as clean code, which was inflating the leaderboard. - Emit structured JSON on failure in --json mode (reuse enableJsonMode), which also silences the incidental score-API stderr warning. - Exclude unknown-timestamp candidates under --since so the filter is consistent. - Consolidate the path-inside predicate, move render magic numbers to constants, type-guard the provider flag, throw on invalid --limit, rename op -> operation. --- .changeset/stats-agent-leaderboard.md | 6 ++--- .../react-doctor/src/cli/commands/stats.ts | 19 ++++++++++++---- packages/react-doctor/src/cli/index.ts | 7 +++--- packages/react-doctor/src/stats/constants.ts | 8 +++++++ .../src/stats/discover-sessions.ts | 22 +++++++++++-------- .../react-doctor/src/stats/is-path-inside.ts | 21 ++++++++++++++++++ .../stats/materialize-reconstructed-tree.ts | 8 ++----- .../src/stats/reconstruct-files.ts | 16 +++++++------- .../react-doctor/src/stats/render-stats.ts | 21 ++++++++++++------ .../react-doctor/src/stats/run-stats-scan.ts | 11 +++++++--- 10 files changed, 96 insertions(+), 43 deletions(-) create mode 100644 packages/react-doctor/src/stats/is-path-inside.ts diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md index 0d99b8e11..af7b92e74 100644 --- a/.changeset/stats-agent-leaderboard.md +++ b/.changeset/stats-agent-leaderboard.md @@ -6,9 +6,9 @@ Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard b `stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus the Cursor composer database — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo". -- Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. -- Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor. The terminal table shows both the raw `Score` and the `Weighted` score it ranks on. +- Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. A scan that errors, is skipped, or whose lint phase fails is dropped rather than counted as zero-diagnostic "clean" code, so un-lintable output can't inflate a model's score. +- Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor. - Cursor attribution reads the canonical composer database (`state.vscdb`) directly, so each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2`) and an exact post-edit snapshot of every edited file — the model-less agent-transcript JSONL files are no longer used. Attribution falls back to `unknown` only for chats left on the "Auto" model. - Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work. -- `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows model + provider tables with score bars and a best/worst callout. +- `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows the top models and per-tool tables with a single score bar (the confidence-weighted score) and a best/worst callout. - Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), the Cursor composer database requires `node:sqlite` (Node 22.13+) and covers GUI agent sessions (not cursor-agent CLI runs), and the score requires network access. diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts index 686e30f46..1455c586e 100644 --- a/packages/react-doctor/src/cli/commands/stats.ts +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -7,6 +7,7 @@ import { renderStatsReport } from "../../stats/render-stats.js"; import { runStatsScan } from "../../stats/run-stats-scan.js"; import type { StatsProvider, StatsReport, StatsScopeOptions } from "../../stats/types.js"; import { METRIC } from "../utils/constants.js"; +import { enableJsonMode } from "../utils/json-mode.js"; import { recordCount } from "../utils/record-metric.js"; import { spinner } from "../utils/spinner.js"; @@ -19,14 +20,16 @@ export interface StatsFlags { cwd?: string; } -const VALID_PROVIDERS = new Set(["claude", "codex", "cursor"]); +const VALID_PROVIDERS = new Set(["claude", "codex", "cursor"]); + +const isStatsProvider = (value: string): value is StatsProvider => VALID_PROVIDERS.has(value); const parseProvider = (value: string | undefined): StatsProvider | undefined => { if (value === undefined) return undefined; - if (!VALID_PROVIDERS.has(value as StatsProvider)) { + if (!isStatsProvider(value)) { throw new Error(`Unknown provider "${value}". Expected one of: claude, codex, cursor.`); } - return value as StatsProvider; + return value; }; const parseSince = (value: string | undefined): Date | undefined => { @@ -41,7 +44,10 @@ const parseSince = (value: string | undefined): Date | undefined => { const parseLimit = (value: string | undefined): number => { if (value === undefined) return STATS_DEFAULT_SESSION_LIMIT; const parsed = Number.parseInt(value, 10); - return Number.isFinite(parsed) && parsed > 0 ? parsed : STATS_DEFAULT_SESSION_LIMIT; + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`Invalid --limit "${value}". Use a positive integer, e.g. 200.`); + } + return parsed; }; const resolveTarget = async ( @@ -57,6 +63,11 @@ const resolveTarget = async ( export const statsAction = async (flags: StatsFlags): Promise => { const directory = flags.cwd ?? process.cwd(); + // Register JSON mode up front so any throw (flag parsing, scan, or score API + // failure) is emitted as a structured JSON error by the top-level handler + // instead of plain text — and so incidental logs (e.g. a score-API warning) + // never corrupt the report on stdout. + if (flags.json) enableJsonMode({ compact: false, directory }); const scope: StatsScopeOptions = { global: flags.global ?? false, since: parseSince(flags.since), diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts index 755dc8bcb..74482907c 100644 --- a/packages/react-doctor/src/cli/index.ts +++ b/packages/react-doctor/src/cli/index.ts @@ -269,9 +269,10 @@ program .option("--color", "force colored output") .option("--no-color", "disable colored output (also honors NO_COLOR)") .addHelpText("after", renderStatsHelpEpilog) - // HACK: `--json` is also declared on the root program, so Commander stashes - // it on the parent. Route through `optsWithGlobals()` so the merged option - // set (subcommand + inherited globals) is what the action sees. + // stats redeclares --json/--cwd/--color, but the root program also exposes + // them as globals (e.g. --json for the default inspect command). Merge via + // optsWithGlobals() so a flag works whether it lands before or after the + // subcommand. .action((_options, command) => statsAction(command.optsWithGlobals())); const rules = program diff --git a/packages/react-doctor/src/stats/constants.ts b/packages/react-doctor/src/stats/constants.ts index 26fd2d9b6..780289126 100644 --- a/packages/react-doctor/src/stats/constants.ts +++ b/packages/react-doctor/src/stats/constants.ts @@ -51,3 +51,11 @@ export const STATS_TOP_RULES_PER_GROUP = 3; // Label used when a session does not expose a stable model id (e.g. a Cursor // composer left on the "Auto" default with no per-bubble model recorded). export const STATS_UNKNOWN_MODEL = "unknown"; + +// Width (in cells) of the unicode score bar drawn next to each leaderboard score. +export const STATS_SCORE_BAR_WIDTH = 16; + +// Score thresholds that pick the bar color: at or above HIGH is green, at or +// above MEDIUM is yellow, below is red. +export const STATS_SCORE_COLOR_HIGH = 80; +export const STATS_SCORE_COLOR_MEDIUM = 50; diff --git a/packages/react-doctor/src/stats/discover-sessions.ts b/packages/react-doctor/src/stats/discover-sessions.ts index f53e29228..6024231cb 100644 --- a/packages/react-doctor/src/stats/discover-sessions.ts +++ b/packages/react-doctor/src/stats/discover-sessions.ts @@ -1,5 +1,5 @@ -import * as path from "node:path"; import { STATS_DISCOVERY_YIELD_INTERVAL } from "./constants.js"; +import { isPathInside } from "./is-path-inside.js"; import { STATS_SOURCES } from "./sources/index.js"; import { resolveEditPaths } from "./reconstruct-files.js"; import type { AgentSession, StatsScopeOptions } from "./types.js"; @@ -7,14 +7,11 @@ import type { AgentSession, StatsScopeOptions } from "./types.js"; /** Reports discovery progress: sessions kept so far, candidates scanned so far. */ export type DiscoveryProgress = (foundCount: number, scannedCount: number) => void; -const isPathUnder = (childPath: string, parentPath: string): boolean => { - const relative = path.relative(parentPath, childPath); - return !relative.startsWith("..") && !path.isAbsolute(relative); -}; - const sessionTouchesRepo = (session: AgentSession, repoRoot: string): boolean => { - if (session.cwd && isPathUnder(session.cwd, repoRoot)) return true; - return resolveEditPaths(session).some((editPath) => isPathUnder(editPath, repoRoot)); + if (session.cwd && isPathInside(session.cwd, repoRoot, { allowSame: true })) return true; + return resolveEditPaths(session).some((editPath) => + isPathInside(editPath, repoRoot, { allowSame: true }), + ); }; /** @@ -39,7 +36,14 @@ export const discoverSessions = async ( const sessions: AgentSession[] = []; let scannedCount = 0; for (const candidate of candidates) { - if (sinceMs !== null && candidate.modifiedMs > 0 && candidate.modifiedMs < sinceMs) break; + // With `--since`, a candidate whose timestamp is unknown (`modifiedMs <= 0`) + // can't be proven on-or-after the cutoff, so it's excluded rather than + // ambiguously kept. Dated candidates are sorted newest-first, so the first + // one older than the cutoff ends the walk. + if (sinceMs !== null) { + if (candidate.modifiedMs <= 0) continue; + if (candidate.modifiedMs < sinceMs) break; + } const session = await candidate.load(); scannedCount += 1; diff --git a/packages/react-doctor/src/stats/is-path-inside.ts b/packages/react-doctor/src/stats/is-path-inside.ts new file mode 100644 index 000000000..3daecb726 --- /dev/null +++ b/packages/react-doctor/src/stats/is-path-inside.ts @@ -0,0 +1,21 @@ +import * as path from "node:path"; + +export interface IsPathInsideOptions { + /** When `true`, `childPath` equal to `parentPath` counts as inside. */ + readonly allowSame?: boolean; +} + +/** + * `true` when `childPath` resolves within `parentPath`. By default the parent + * directory itself does not count (the strict zip-slip guard); pass + * `allowSame: true` to treat an exact match as inside (scope membership). + */ +export const isPathInside = ( + childPath: string, + parentPath: string, + options: IsPathInsideOptions = {}, +): boolean => { + const relative = path.relative(parentPath, childPath); + if (!relative) return Boolean(options.allowSame); + return !relative.startsWith("..") && !path.isAbsolute(relative); +}; diff --git a/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts index 744961598..955b17f28 100644 --- a/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts +++ b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts @@ -3,6 +3,7 @@ import * as os from "node:os"; import * as path from "node:path"; import { STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "@react-doctor/core"; import { STATS_TEMP_DIR_PREFIX } from "./constants.js"; +import { isPathInside } from "./is-path-inside.js"; import type { ReconstructedFile } from "./types.js"; export interface MaterializedReconstruction { @@ -13,11 +14,6 @@ export interface MaterializedReconstruction { readonly cleanup: () => void; } -const isInsideDirectory = (childPath: string, parentPath: string): boolean => { - const relative = path.relative(parentPath, childPath); - return Boolean(relative) && !relative.startsWith("..") && !path.isAbsolute(relative); -}; - /** * Write reconstructed file content into a fresh temp tree mirroring the scan * layout, copying the project-config files (`tsconfig` / `package.json` / @@ -35,7 +31,7 @@ export const materializeReconstructedTree = ( for (const file of files) { const targetPath = path.resolve(resolvedTempDirectory, file.relativePath); - if (!isInsideDirectory(targetPath, resolvedTempDirectory)) continue; + if (!isPathInside(targetPath, resolvedTempDirectory)) continue; fs.mkdirSync(path.dirname(targetPath), { recursive: true }); fs.writeFileSync(targetPath, file.content); relativePaths.push(file.relativePath); diff --git a/packages/react-doctor/src/stats/reconstruct-files.ts b/packages/react-doctor/src/stats/reconstruct-files.ts index 502757454..279310933 100644 --- a/packages/react-doctor/src/stats/reconstruct-files.ts +++ b/packages/react-doctor/src/stats/reconstruct-files.ts @@ -39,7 +39,7 @@ export const resolveEditPaths = (session: AgentSession): string[] => { }; for (const edit of session.edits) { if (edit.kind === "patch") { - for (const op of parseApplyPatch(edit.patch ?? "")) add(op.path); + for (const operation of parseApplyPatch(edit.patch ?? "")) add(operation.path); } else { add(edit.path); } @@ -65,19 +65,19 @@ export const reconstructSession = (session: AgentSession): SessionReconstruction } const applyPatchOps = (patchText: string): void => { - for (const op of parseApplyPatch(patchText)) { - const resolved = resolveAgainstCwd(op.path, session.cwd); + for (const operation of parseApplyPatch(patchText)) { + const resolved = resolveAgainstCwd(operation.path, session.cwd); if (!resolved) continue; if (isLintablePath(resolved)) touchedLintable.add(resolved); - if (op.type === "add") { - const lines = op.addedLines ?? []; + if (operation.type === "add") { + const lines = operation.addedLines ?? []; buffers.set(resolved, lines.length > 0 ? `${lines.join("\n")}\n` : ""); - } else if (op.type === "delete") { + } else if (operation.type === "delete") { buffers.set(resolved, null); } else { const base = buffers.get(resolved); if (typeof base !== "string") continue; - const applied = applyUpdateHunks(base, op.hunkLines ?? []); + const applied = applyUpdateHunks(base, operation.hunkLines ?? []); if (applied === null) { // The hunk didn't match our base, so our buffer is out of sync with // what the model actually edited. Drop it to "no faithful base" rather @@ -85,7 +85,7 @@ export const reconstructSession = (session: AgentSession): SessionReconstruction buffers.delete(resolved); continue; } - const movedTo = op.movePath && resolveAgainstCwd(op.movePath, session.cwd); + const movedTo = operation.movePath && resolveAgainstCwd(operation.movePath, session.cwd); if (movedTo) { buffers.set(resolved, null); buffers.set(movedTo, applied); diff --git a/packages/react-doctor/src/stats/render-stats.ts b/packages/react-doctor/src/stats/render-stats.ts index 16c13bb0f..66c1cecf1 100644 --- a/packages/react-doctor/src/stats/render-stats.ts +++ b/packages/react-doctor/src/stats/render-stats.ts @@ -1,12 +1,15 @@ import { highlighter } from "@react-doctor/core"; -import { STATS_LEADERBOARD_TOP_N } from "./constants.js"; +import { + STATS_LEADERBOARD_TOP_N, + STATS_SCORE_BAR_WIDTH, + STATS_SCORE_COLOR_HIGH, + STATS_SCORE_COLOR_MEDIUM, +} from "./constants.js"; import type { GroupStats, StatsReport } from "./types.js"; -const SCORE_BAR_WIDTH = 16; - const colorForScore = (score: number): ((text: string) => string) => { - if (score >= 80) return highlighter.success; - if (score >= 50) return highlighter.warn; + if (score >= STATS_SCORE_COLOR_HIGH) return highlighter.success; + if (score >= STATS_SCORE_COLOR_MEDIUM) return highlighter.warn; return highlighter.error; }; @@ -21,11 +24,15 @@ const renderScore = (group: GroupStats): string => { if (group.weightedScore === null) return highlighter.dim("n/a"); const filledCount = Math.max( 0, - Math.min(SCORE_BAR_WIDTH, Math.round((group.weightedScore / 100) * SCORE_BAR_WIDTH)), + Math.min( + STATS_SCORE_BAR_WIDTH, + Math.round((group.weightedScore / 100) * STATS_SCORE_BAR_WIDTH), + ), ); const paint = colorForScore(group.weightedScore); const bar = - paint("█".repeat(filledCount)) + highlighter.dim("░".repeat(SCORE_BAR_WIDTH - filledCount)); + paint("█".repeat(filledCount)) + + highlighter.dim("░".repeat(STATS_SCORE_BAR_WIDTH - filledCount)); return `${bar} ${paint(String(group.weightedScore).padStart(3))}`; }; diff --git a/packages/react-doctor/src/stats/run-stats-scan.ts b/packages/react-doctor/src/stats/run-stats-scan.ts index aa67fa25c..1beb39c84 100644 --- a/packages/react-doctor/src/stats/run-stats-scan.ts +++ b/packages/react-doctor/src/stats/run-stats-scan.ts @@ -1,6 +1,7 @@ import * as path from "node:path"; import { mapWithConcurrency, runEditorScan, type Diagnostic } from "@react-doctor/core"; import { STATS_SCAN_CONCURRENCY } from "./constants.js"; +import { isPathInside } from "./is-path-inside.js"; import { isReactSourceFile } from "./is-react-source.js"; import { materializeReconstructedTree } from "./materialize-reconstructed-tree.js"; import { reconstructSession } from "./reconstruct-files.js"; @@ -93,9 +94,8 @@ const scanSession = async ( const files: ReconstructedFile[] = []; for (const file of reactFiles) { - const relative = toPosix(path.relative(scanRoot, file.absolutePath)); - if (!relative || relative.startsWith("..") || path.isAbsolute(relative)) continue; - files.push({ ...file, relativePath: relative }); + if (!isPathInside(file.absolutePath, scanRoot)) continue; + files.push({ ...file, relativePath: toPosix(path.relative(scanRoot, file.absolutePath)) }); } if (files.length === 0) return empty; @@ -109,6 +109,11 @@ const scanSession = async ( // The node running the CLI can load oxlint's native binding. nodeBinaryPath: process.execPath, }); + // A scan that errored, was skipped (unanalyzable project), or whose lint + // phase failed yields zero diagnostics for reasons unrelated to code + // quality. Counting its files as clean would reward un-lintable code and + // inflate the leaderboard, so it joins the empty bucket instead. + if (!result.ok || result.skipped || result.didLintFail) return empty; const diagnostics: Diagnostic[] = result.diagnostics.map((diagnostic) => ({ ...diagnostic, filePath: remapDiagnosticPath( From 9a20e3d4952125783f6730641a54aa90bed22526 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 19:24:28 -0700 Subject: [PATCH 10/17] fix(stats): drop unfaithful StrReplace edits instead of linting stale content A replace/Edit whose oldString isn't in the in-session buffer now marks the file unreconstructable (like a failed apply_patch hunk) rather than keeping the stale snapshot and scoring it as the model's final output. --- .../src/stats/reconstruct-files.ts | 30 ++++++++++++------- .../tests/stats-reconstruct.test.ts | 10 +++++++ 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/packages/react-doctor/src/stats/reconstruct-files.ts b/packages/react-doctor/src/stats/reconstruct-files.ts index 279310933..3e39b69db 100644 --- a/packages/react-doctor/src/stats/reconstruct-files.ts +++ b/packages/react-doctor/src/stats/reconstruct-files.ts @@ -13,14 +13,19 @@ const resolveAgainstCwd = (rawPath: string, cwd: string | null): string | null = return path.resolve(cwd, rawPath); }; +/** + * Apply a string-replace edit, or `null` when it can't be applied faithfully + * (the `oldString` isn't in our buffer, so we're out of sync with what the + * model actually edited). An empty `oldString` is a no-op rather than a failure. + */ const applyStringReplace = ( source: string, oldString: string, newString: string, replaceAll: boolean, -): string => { +): string | null => { if (oldString === "") return source; - if (!source.includes(oldString)) return source; + if (!source.includes(oldString)) return null; return replaceAll ? source.split(oldString).join(newString) : source.replace(oldString, newString); @@ -112,15 +117,20 @@ export const reconstructSession = (session: AgentSession): SessionReconstruction } else { const base = buffers.get(resolved); if (typeof base !== "string") continue; - buffers.set( - resolved, - applyStringReplace( - base, - edit.oldString ?? "", - edit.newString ?? "", - edit.replaceAll ?? false, - ), + const applied = applyStringReplace( + base, + edit.oldString ?? "", + edit.newString ?? "", + edit.replaceAll ?? false, ); + // The oldString wasn't in our buffer, so it's out of sync with what the + // model actually edited. Drop to "no faithful base" rather than lint stale + // content — mirrors the apply_patch hunk-mismatch handling above. + if (applied === null) { + buffers.delete(resolved); + continue; + } + buffers.set(resolved, applied); } } diff --git a/packages/react-doctor/tests/stats-reconstruct.test.ts b/packages/react-doctor/tests/stats-reconstruct.test.ts index 2798cbda1..7e14b3fe8 100644 --- a/packages/react-doctor/tests/stats-reconstruct.test.ts +++ b/packages/react-doctor/tests/stats-reconstruct.test.ts @@ -53,6 +53,16 @@ describe("reconstructSession", () => { expect(result.unreconstructable).toEqual([resolved("/repo/src/c.ts")]); }); + it("drops a StrReplace whose oldString is absent from the base rather than linting stale content", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/r.ts", content: "const x = 1;\n" }, + { kind: "replace", path: "/repo/src/r.ts", oldString: "does-not-exist", newString: "y" }, + ]; + const result = reconstructSession(session({ provider: "cursor", edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([resolved("/repo/src/r.ts")]); + }); + it("reconstructs a Codex apply_patch Add File", () => { const patch = "*** Begin Patch\n*** Add File: /repo/src/d.ts\n+export const d = 1;\n*** End Patch"; From f6b2f033f31bfa1e0fca52efbfa8445c9d2231b7 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 19:34:48 -0700 Subject: [PATCH 11/17] fix(stats): weight scores by productive sessions, not dead ones Confidence weighting now counts only sessions that contributed scanned files, so non-React/failed/skipped sessions no longer raise session reliability or effective file weight. The reported per-group session count still reflects every analyzed session. --- .../react-doctor/src/stats/aggregate-stats.ts | 20 ++++++++++++++---- .../tests/stats-aggregate.test.ts | 21 +++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/packages/react-doctor/src/stats/aggregate-stats.ts b/packages/react-doctor/src/stats/aggregate-stats.ts index f04e4db64..149097836 100644 --- a/packages/react-doctor/src/stats/aggregate-stats.ts +++ b/packages/react-doctor/src/stats/aggregate-stats.ts @@ -27,6 +27,8 @@ interface Accumulator { readonly key: string; readonly provider: StatsProvider | "mixed"; sessions: number; + /** Sessions that contributed at least one scanned file — the weighting unit. */ + scoredSessions: number; filesScanned: number; unreconstructable: number; diagnostics: Diagnostic[]; @@ -40,10 +42,19 @@ const upsert = ( ): void => { let group = groups.get(key); if (!group) { - group = { key, provider, sessions: 0, filesScanned: 0, unreconstructable: 0, diagnostics: [] }; + group = { + key, + provider, + sessions: 0, + scoredSessions: 0, + filesScanned: 0, + unreconstructable: 0, + diagnostics: [], + }; groups.set(key, group); } group.sessions += 1; + if (result.filesScanned > 0) group.scoredSessions += 1; group.filesScanned += result.filesScanned; group.unreconstructable += result.unreconstructable; group.diagnostics.push(...result.diagnostics); @@ -62,13 +73,14 @@ export const confidenceWeightedScore = ( rawScore: number | null, priorScore: number | null, filesScanned: number, - sessions: number, + scoredSessions: number, ): number | null => { if (rawScore === null) return null; if (priorScore === null) return rawScore; const sessionReliability = STATS_SCORE_SESSION_FLOOR + - (1 - STATS_SCORE_SESSION_FLOOR) * (sessions / (sessions + STATS_SCORE_SESSION_PRIOR)); + (1 - STATS_SCORE_SESSION_FLOOR) * + (scoredSessions / (scoredSessions + STATS_SCORE_SESSION_PRIOR)); const effectiveFiles = filesScanned * sessionReliability; return Math.round( (priorScore * STATS_SCORE_PRIOR_FILES + rawScore * effectiveFiles) / @@ -123,7 +135,7 @@ const toGroupStats = async ( rawScore, priorScore, accumulator.filesScanned, - accumulator.sessions, + accumulator.scoredSessions, ), topRules: topRules(accumulator.diagnostics), }; diff --git a/packages/react-doctor/tests/stats-aggregate.test.ts b/packages/react-doctor/tests/stats-aggregate.test.ts index a43b25eed..3de14e90e 100644 --- a/packages/react-doctor/tests/stats-aggregate.test.ts +++ b/packages/react-doctor/tests/stats-aggregate.test.ts @@ -110,6 +110,27 @@ describe("aggregateStats", () => { expect(aggregated.models[0]?.weightedScore ?? 0).toBeGreaterThan(small?.weightedScore ?? 0); }); + it("does not let dead (0-file) sessions inflate a group's weighting", async () => { + const productive = [ + result("claude", "a", 10, [diagnostic("r1")]), + result("claude", "a", 10, [diagnostic("r1")]), + ]; + // Same model "b": identical scored output, but padded with non-React/failed + // sessions that scanned no files. Those must not change the weighted score. + const padded = [ + result("codex", "b", 10, [diagnostic("r1")]), + result("codex", "b", 10, [diagnostic("r1")]), + ...Array.from({ length: 8 }, () => result("codex", "b", 0, [])), + ]; + const aggregated = await aggregateStats([...productive, ...padded], null, stubScore); + const a = aggregated.models.find((group) => group.key === "claude/a"); + const b = aggregated.models.find((group) => group.key === "codex/b"); + expect(a?.score).toBe(b?.score); + expect(a?.weightedScore).toBe(b?.weightedScore); + // The reported session count still reflects every analyzed session. + expect(b?.sessions).toBe(10); + }); + it("leaves the score null when a group lacks enough files to rank fairly", async () => { const results = [result("claude", "m1", 1, [diagnostic("r1")])]; let called = false; From 3f50df7d4c8ff6e7ca53ec6ac4ce2b993112df10 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 19:43:58 -0700 Subject: [PATCH 12/17] chore(stats): bump changeset to patch --- .changeset/stats-agent-leaderboard.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md index af7b92e74..db55aa003 100644 --- a/.changeset/stats-agent-leaderboard.md +++ b/.changeset/stats-agent-leaderboard.md @@ -1,5 +1,5 @@ --- -"react-doctor": minor +"react-doctor": patch --- Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history. From 509f2296b1d1d417f974bda42e402e59832c0b23 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Sun, 21 Jun 2026 20:21:22 -0700 Subject: [PATCH 13/17] fix(ci): publish deslop-js to pkg.pr.new so previews install react-doctor has a runtime `deslop-js: workspace:*` dependency, but the Continuous Releases workflow didn't publish deslop-js, so pkg.pr.new couldn't rewrite the ref and `npx https://pkg.pr.new/react-doctor@` failed with EUNSUPPORTEDPROTOCOL ("workspace:"). Add deslop-js to the publish set. --- .github/workflows/publish-any-commit.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish-any-commit.yml b/.github/workflows/publish-any-commit.yml index f31673c2b..ad9b6d4b4 100644 --- a/.github/workflows/publish-any-commit.yml +++ b/.github/workflows/publish-any-commit.yml @@ -40,7 +40,8 @@ jobs: if pnpm dlx pkg-pr-new publish \ ./packages/react-doctor \ ./packages/oxlint-plugin-react-doctor \ - ./packages/eslint-plugin-react-doctor; then + ./packages/eslint-plugin-react-doctor \ + ./packages/deslop-js; then exit 0 fi From f26f96015641bbe8e00b0f3adde82171d471a0ce Mon Sep 17 00:00:00 2001 From: Rayhan Noufal Arayilakath Date: Mon, 22 Jun 2026 17:34:58 -0700 Subject: [PATCH 14/17] feat(stats): scan every Cursor store (Nightly GUI + CLI agent) and deslop the engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broaden which local agent history `stats` reads, and refine the engine. Coverage: - Cursor GUI: scan both the stable and Nightly builds' composer databases (was stable-only — a Nightly-only user got zero GUI sessions), and read a live, editor-locked database via SQLite's immutable mode instead of letting the lock crash the run. - Cursor CLI agent: new source for the per-session content-addressed stores under ~/.cursor and ~/.cursor-nightly — decode the hex meta row, parse the binary message manifest, and map Write/ApplyPatch/StrReplace/Delete tool calls to edits, capturing Read results as reconstruction bases. - Codex (~/.codex) was already covered; verified. Engine deslop (behavior-preserving): - consolidate the zip-slip path-inside guard into one audited core util (@react-doctor/core isPathInside), and share the node:sqlite read-only open and the empty-string-preserving string narrow (coerce asNullableString) instead of hand-rolling copies - drop dead code (write-only session timestamps, an unused export, an unreachable branch), replace forbidden nested ternaries with if/else and a lookup table, collapse a redundant variable and a pass-through wrapper - guard every SQLite close so a locked/unreadable store degrades to "skip" rather than sinking the whole stats run Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/stats-agent-leaderboard.md | 6 +- packages/core/src/index.ts | 1 + packages/core/src/materialize-source-tree.ts | 14 +- .../src/utils}/is-path-inside.ts | 7 + .../react-doctor/src/cli/commands/stats.ts | 4 +- packages/react-doctor/src/cli/index.ts | 2 +- .../react-doctor/src/stats/aggregate-stats.ts | 2 +- packages/react-doctor/src/stats/coerce.ts | 4 + .../src/stats/cursor-cli-store.ts | 115 ++++++++++ packages/react-doctor/src/stats/cursor-db.ts | 204 +++++++++--------- .../src/stats/discover-sessions.ts | 2 +- .../stats/materialize-reconstructed-tree.ts | 5 +- .../react-doctor/src/stats/open-sqlite.ts | 66 ++++++ .../src/stats/parse-apply-patch.ts | 7 +- .../react-doctor/src/stats/run-stats-scan.ts | 8 +- .../react-doctor/src/stats/sources/claude.ts | 9 - .../react-doctor/src/stats/sources/codex.ts | 2 +- .../src/stats/sources/cursor-cli.ts | 159 ++++++++++++++ .../react-doctor/src/stats/sources/cursor.ts | 6 +- .../react-doctor/src/stats/sources/index.ts | 13 +- packages/react-doctor/src/stats/types.ts | 2 - .../react-doctor/tests/stats-adapters.test.ts | 162 +++++++++++++- .../tests/stats-aggregate.test.ts | 10 +- 23 files changed, 655 insertions(+), 155 deletions(-) rename packages/{react-doctor/src/stats => core/src/utils}/is-path-inside.ts (61%) create mode 100644 packages/react-doctor/src/stats/cursor-cli-store.ts create mode 100644 packages/react-doctor/src/stats/open-sqlite.ts create mode 100644 packages/react-doctor/src/stats/sources/cursor-cli.ts diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md index db55aa003..2da732083 100644 --- a/.changeset/stats-agent-leaderboard.md +++ b/.changeset/stats-agent-leaderboard.md @@ -4,11 +4,11 @@ Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history. -`stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus the Cursor composer database — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo". +`stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus Cursor's GUI composer databases and CLI agent stores (`~/.cursor`, `~/.cursor-nightly`) — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo". - Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. A scan that errors, is skipped, or whose lint phase fails is dropped rather than counted as zero-diagnostic "clean" code, so un-lintable output can't inflate a model's score. - Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor. -- Cursor attribution reads the canonical composer database (`state.vscdb`) directly, so each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2`) and an exact post-edit snapshot of every edited file — the model-less agent-transcript JSONL files are no longer used. Attribution falls back to `unknown` only for chats left on the "Auto" model. +- Cursor is read from every place it stores chats: the GUI composer database (`state.vscdb`) for both the stable and Nightly builds, and the CLI agent's per-session stores under `~/.cursor` and `~/.cursor-nightly`. Each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2.5`) and a faithful reconstruction of every edited file (full GUI post-edit snapshots; CLI `Write`/`ApplyPatch`/`StrReplace`/`Delete` tool calls replayed against captured reads). A database a running editor holds locked is read via SQLite's `immutable` mode rather than skipped. Attribution falls back to `unknown` only for GUI chats left on the "Auto" model. - Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work. - `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows the top models and per-tool tables with a single score bar (the confidence-weighted score) and a best/worst callout. -- Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), the Cursor composer database requires `node:sqlite` (Node 22.13+) and covers GUI agent sessions (not cursor-agent CLI runs), and the score requires network access. +- Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), reading any Cursor database requires `node:sqlite` (Node 22.13+), and the score requires network access. diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 244ed742d..f8ce93af2 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -88,6 +88,7 @@ export * from "./utils/define-config.js"; export * from "./utils/group-by.js"; export * from "./utils/has-published-fix-recipe.js"; export * from "./utils/is-large-minified-file.js"; +export * from "./utils/is-path-inside.js"; export * from "./utils/list-source-files.js"; export * from "./utils/map-with-concurrency.js"; export * from "./utils/match-glob-pattern.js"; diff --git a/packages/core/src/materialize-source-tree.ts b/packages/core/src/materialize-source-tree.ts index 7d8eb2819..9851608ce 100644 --- a/packages/core/src/materialize-source-tree.ts +++ b/packages/core/src/materialize-source-tree.ts @@ -3,6 +3,7 @@ import fs from "node:fs"; import path from "node:path"; import { STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "./constants.js"; import type { ReactDoctorError } from "./errors.js"; +import { isPathInside } from "./utils/is-path-inside.js"; export interface MaterializedTree { readonly tempDirectory: string; @@ -10,17 +11,6 @@ export interface MaterializedTree { readonly cleanup: () => void; } -/** - * Zip-Slip defense: relative paths come from git (`diff --name-only`), which - * normalizes during ordinary adds, but a crafted index/pack/symlinked tree can - * smuggle `..` segments that escape the temp root. Resolve against the temp dir - * and reject anything that lands outside before writing. - */ -const isPathInsideDirectory = (childAbsolutePath: string, parentAbsolutePath: string): boolean => { - const relative = path.relative(parentAbsolutePath, childAbsolutePath); - return Boolean(relative) && !relative.startsWith("..") && !path.isAbsolute(relative); -}; - /** * Writes a set of source files (supplied by `readContent` — e.g. * `git show :` for a baseline tree, or `git show :` for the @@ -44,7 +34,7 @@ export const materializeSourceTree = (input: { const content = yield* input.readContent(relativePath).pipe(Effect.orElseSucceed(() => null)); if (content === null) continue; const candidateTargetPath = path.resolve(resolvedTempDirectory, relativePath); - if (!isPathInsideDirectory(candidateTargetPath, resolvedTempDirectory)) continue; + if (!isPathInside(candidateTargetPath, resolvedTempDirectory)) continue; yield* Effect.sync(() => { fs.mkdirSync(path.dirname(candidateTargetPath), { recursive: true }); fs.writeFileSync(candidateTargetPath, content); diff --git a/packages/react-doctor/src/stats/is-path-inside.ts b/packages/core/src/utils/is-path-inside.ts similarity index 61% rename from packages/react-doctor/src/stats/is-path-inside.ts rename to packages/core/src/utils/is-path-inside.ts index 3daecb726..0c6f4f468 100644 --- a/packages/react-doctor/src/stats/is-path-inside.ts +++ b/packages/core/src/utils/is-path-inside.ts @@ -9,6 +9,13 @@ export interface IsPathInsideOptions { * `true` when `childPath` resolves within `parentPath`. By default the parent * directory itself does not count (the strict zip-slip guard); pass * `allowSame: true` to treat an exact match as inside (scope membership). + * + * Zip-Slip defense: relative paths can arrive from untrusted sources — a + * crafted git index/pack/symlinked tree, or a reconstructed agent transcript — + * and smuggle `..` segments that escape a temp root. Resolve against the parent + * and reject anything that lands outside before writing. This is the one + * audited copy of that guard, shared across the staged/baseline scan paths and + * the stats reconstruction tree so the two cannot drift. */ export const isPathInside = ( childPath: string, diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts index 1455c586e..acb1d7df7 100644 --- a/packages/react-doctor/src/cli/commands/stats.ts +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -80,7 +80,6 @@ export const statsAction = async (flags: StatsFlags): Promise => { // ora renders to stderr; suppress it in JSON mode so the run stays quiet. const progress = flags.json ? null : spinner("Looking through your agent history…").start(); let report: StatsReport; - let providerCount: number; try { const sessions = await discoverSessions(root, scope, (foundCount) => progress?.update(`Looking through your agent history… (${foundCount} found)`), @@ -92,7 +91,6 @@ export const statsAction = async (flags: StatsFlags): Promise => { }); progress?.update("Scoring…"); const aggregated = await aggregateStats(results, userConfig); - providerCount = aggregated.providers.length; report = { scope: scope.global ? "global" : "repo", @@ -122,7 +120,7 @@ export const statsAction = async (flags: StatsFlags): Promise => { recordCount(METRIC.statsRun, 1, { scope: report.scope, sessions: report.sessionsAnalyzed, - providers: providerCount, + providers: report.providers.length, provider: scope.provider ?? "all", }); diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts index 74482907c..19429e97a 100644 --- a/packages/react-doctor/src/cli/index.ts +++ b/packages/react-doctor/src/cli/index.ts @@ -235,7 +235,7 @@ program .option("-c, --cwd ", "working directory", process.cwd()) .option("--color", "force colored output") .option("--no-color", "disable colored output (also honors NO_COLOR)") - .action((location, options) => whyAction(location, options)); + .action(whyAction); program .command("install") diff --git a/packages/react-doctor/src/stats/aggregate-stats.ts b/packages/react-doctor/src/stats/aggregate-stats.ts index 149097836..d89fa207d 100644 --- a/packages/react-doctor/src/stats/aggregate-stats.ts +++ b/packages/react-doctor/src/stats/aggregate-stats.ts @@ -69,7 +69,7 @@ const upsert = ( * mean; high-evidence groups keep their raw score. Returns the raw score when * there's no prior. */ -export const confidenceWeightedScore = ( +const confidenceWeightedScore = ( rawScore: number | null, priorScore: number | null, filesScanned: number, diff --git a/packages/react-doctor/src/stats/coerce.ts b/packages/react-doctor/src/stats/coerce.ts index 0721228aa..b621bf418 100644 --- a/packages/react-doctor/src/stats/coerce.ts +++ b/packages/react-doctor/src/stats/coerce.ts @@ -6,6 +6,10 @@ export const asString = (value: unknown): string | undefined => typeof value === "string" && value.length > 0 ? value : undefined; +/** Narrow an unknown to a string, preserving the empty string (unlike `asString`). */ +export const asNullableString = (value: unknown): string | null => + typeof value === "string" ? value : null; + /** Narrow an unknown to a plain object record, else undefined. */ export const asRecord = (value: unknown): Record | undefined => value && typeof value === "object" && !Array.isArray(value) diff --git a/packages/react-doctor/src/stats/cursor-cli-store.ts b/packages/react-doctor/src/stats/cursor-cli-store.ts new file mode 100644 index 000000000..cfcbd43b5 --- /dev/null +++ b/packages/react-doctor/src/stats/cursor-cli-store.ts @@ -0,0 +1,115 @@ +import { asRecord } from "./coerce.js"; +import { openReadOnlySqlite } from "./open-sqlite.js"; + +// The Cursor CLI agent (`~/.cursor` / `~/.cursor-nightly`) stores each chat as +// its own content-addressed SQLite store, distinct from the GUI's single +// `state.vscdb`. The `meta` table holds one row whose `value` is hex-encoded +// JSON (the latest root blob id + last-used model); the `blobs` table maps a +// sha256 id to either a message (JSON: `{ role, content }`) or the binary root +// manifest. The manifest is a protobuf-style flat list of `0x0a 0x20` followed +// by a 32-byte blob id, giving the conversation's messages in order. + +export interface CursorCliMessage { + readonly role: string; + readonly content: unknown; +} + +export interface CursorCliStore { + readonly lastUsedModel: string | null; + readonly messages: CursorCliMessage[]; +} + +const MANIFEST_RECORD_TAG = 0x0a; +const MANIFEST_ID_LENGTH = 0x20; +const MANIFEST_RECORD_LENGTH = 2 + MANIFEST_ID_LENGTH; + +/** + * The conversation's message blob ids, in order, read from the leading run of + * `[0x0a, 0x20, <32-byte id>]` records. Trailing protobuf fields after the run + * are ignored; a manifest that doesn't start with the run yields `[]`. + */ +const parseManifestBlobIds = (manifest: Buffer): string[] => { + const ids: string[] = []; + let offset = 0; + while ( + offset + MANIFEST_RECORD_LENGTH <= manifest.length && + manifest[offset] === MANIFEST_RECORD_TAG && + manifest[offset + 1] === MANIFEST_ID_LENGTH + ) { + ids.push(manifest.subarray(offset + 2, offset + MANIFEST_RECORD_LENGTH).toString("hex")); + offset += MANIFEST_RECORD_LENGTH; + } + return ids; +}; + +/** blobs.data is a BLOB (Uint8Array); meta.value is hex-encoded TEXT. */ +const toBuffer = (value: unknown): Buffer | null => { + if (value instanceof Uint8Array) return Buffer.from(value); + if (typeof value === "string") return Buffer.from(value, "hex"); + return null; +}; + +/** + * Read a Cursor CLI per-session `store.db`: the last-used model and every + * conversation message in order. Returns `null` when the store can't be opened + * (older Node without `node:sqlite`, or an unreadable/locked file) or has no + * usable `meta` row; the messages array is empty when the manifest is missing. + */ +export const readCursorCliStore = (storeDbPath: string): CursorCliStore | null => { + const database = openReadOnlySqlite(storeDbPath); + if (!database) return null; + try { + const metaRow = asRecord(database.prepare("SELECT value FROM meta LIMIT 1").get()); + const metaValue = metaRow && typeof metaRow.value === "string" ? metaRow.value : null; + if (!metaValue) return null; + let meta: Record | undefined; + try { + meta = asRecord(JSON.parse(Buffer.from(metaValue, "hex").toString("utf8"))); + } catch { + return null; + } + if (!meta) return null; + + const lastUsedModel = typeof meta.lastUsedModel === "string" ? meta.lastUsedModel : null; + const latestRootBlobId = + typeof meta.latestRootBlobId === "string" ? meta.latestRootBlobId : null; + if (!latestRootBlobId) return { lastUsedModel, messages: [] }; + + const blobStatement = database.prepare("SELECT data FROM blobs WHERE id = ?"); + const blobBuffer = (id: string): Buffer | null => { + const row = asRecord(blobStatement.get(id)); + return row ? toBuffer(row.data) : null; + }; + + const manifest = blobBuffer(latestRootBlobId); + if (!manifest) return { lastUsedModel, messages: [] }; + + const messages: CursorCliMessage[] = []; + for (const blobId of parseManifestBlobIds(manifest)) { + const raw = blobBuffer(blobId); + if (!raw) continue; + const text = raw.toString("utf8"); + if (!text.startsWith("{")) continue; + let message: Record | undefined; + try { + message = asRecord(JSON.parse(text)); + } catch { + continue; + } + if (message && typeof message.role === "string") { + messages.push({ role: message.role, content: message.content }); + } + } + return { lastUsedModel, messages }; + } catch { + // A locked or unreadable store can throw mid-read; skip it rather than + // sinking the whole stats run. + return null; + } finally { + try { + database.close(); + } catch { + // Already closed or never fully opened — nothing to release. + } + } +}; diff --git a/packages/react-doctor/src/stats/cursor-db.ts b/packages/react-doctor/src/stats/cursor-db.ts index 1c8178030..6ed3e2e2f 100644 --- a/packages/react-doctor/src/stats/cursor-db.ts +++ b/packages/react-doctor/src/stats/cursor-db.ts @@ -1,9 +1,8 @@ import * as fs from "node:fs"; -import { createRequire } from "node:module"; import * as os from "node:os"; import * as path from "node:path"; - -const nodeRequire = createRequire(import.meta.url); +import { asNullableString } from "./coerce.js"; +import { openReadOnlySqlite } from "./open-sqlite.js"; // Cursor persists chat state in a single SQLite file. The GUI agent's model // selection, tool calls (edits), and full post-edit file snapshots all live in @@ -29,34 +28,39 @@ interface CursorDbHandle { export type { CursorComposerHeader, CursorDbHandle }; -const asString = (value: unknown): string | null => (typeof value === "string" ? value : null); - const asRecord = (value: unknown): Record | null => value && typeof value === "object" && !Array.isArray(value) ? (value as Record) : null; -const cursorAppDir = (): string => { +// Cursor ships a stable build and a "Nightly" build; each keeps its own +// application-support tree, so both are scanned. +const CURSOR_APP_DIR_NAMES = ["Cursor", "Cursor Nightly"]; + +const cursorAppDirs = (): string[] => { if (process.platform === "darwin") { - return path.join(os.homedir(), "Library", "Application Support", "Cursor"); + const base = path.join(os.homedir(), "Library", "Application Support"); + return CURSOR_APP_DIR_NAMES.map((name) => path.join(base, name)); } if (process.platform === "win32") { const appData = process.env.APPDATA ?? path.join(os.homedir(), "AppData", "Roaming"); - return path.join(appData, "Cursor"); + return CURSOR_APP_DIR_NAMES.map((name) => path.join(appData, name)); } const configHome = process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), ".config"); - return path.join(configHome, "Cursor"); + return CURSOR_APP_DIR_NAMES.map((name) => path.join(configHome, name)); }; /** - * Absolute path to the Cursor composer database, honoring a - * `REACT_DOCTOR_CURSOR_DB` override (used by tests). Returns `null` when no - * readable database exists. + * Absolute paths to every readable Cursor composer database — the stable and + * Nightly builds each keep their own. A `REACT_DOCTOR_CURSOR_DB` override pins + * a single database (used by tests). Returns `[]` when none exist. */ -export const resolveCursorDbPath = (): string | null => { - const candidate = - process.env.REACT_DOCTOR_CURSOR_DB ?? path.join(cursorAppDir(), CURSOR_DB_RELATIVE_PATH); - return fs.existsSync(candidate) ? candidate : null; +export const resolveCursorDbPaths = (): string[] => { + const override = process.env.REACT_DOCTOR_CURSOR_DB; + const candidates = override + ? [override] + : cursorAppDirs().map((directory) => path.join(directory, CURSOR_DB_RELATIVE_PATH)); + return candidates.filter((candidate) => fs.existsSync(candidate)); }; const modifiedMsFromHeader = (head: Record): number => { @@ -75,15 +79,16 @@ const parseComposerHeaders = (raw: string): CursorComposerHeader[] => { return []; } const record = asRecord(decoded); - const list = Array.isArray(decoded) - ? decoded - : record && Array.isArray(record.allComposers) - ? record.allComposers - : []; + let list: unknown[] = []; + if (Array.isArray(decoded)) { + list = decoded; + } else if (record && Array.isArray(record.allComposers)) { + list = record.allComposers; + } const headers: CursorComposerHeader[] = []; for (const entry of list) { const head = asRecord(entry); - const composerId = head && asString(head.composerId); + const composerId = head && asNullableString(head.composerId); if (head && composerId) { headers.push({ composerId, modifiedMs: modifiedMsFromHeader(head) }); } @@ -94,7 +99,7 @@ const parseComposerHeaders = (raw: string): CursorComposerHeader[] => { // node:sqlite returns each row as an object keyed by column name. const rowValueString = (row: unknown): string | null => { const record = asRecord(row); - return record ? asString(record.value) : null; + return record ? asNullableString(record.value) : null; }; // The exclusive upper bound for a key prefix: the prefix with its last byte @@ -110,96 +115,93 @@ interface OpenDb { } const makeHandle = (dbPath: string): OpenDb | null => { - let database: { - prepare(sql: string): { - get(...params: unknown[]): unknown; - all(...params: unknown[]): unknown[]; - }; - close(): void; + // `node:sqlite` is built in on Node 22.13+/24+; absent on older Node, where + // opening returns null and Cursor stats degrade to "no sessions found". + const database = openReadOnlySqlite(dbPath); + if (!database) return null; + const close = (): void => { + try { + database.close(); + } catch { + // Already closed or never fully opened — nothing to release. + } }; + try { - // `node:sqlite` is built in on Node 22.13+/24+; absent on older Node, where - // the require throws and Cursor stats degrade to "no sessions found". - const { DatabaseSync } = nodeRequire("node:sqlite"); - database = new DatabaseSync(dbPath, { readOnly: true }); + const headersStatement = database.prepare(`SELECT value FROM ItemTable WHERE key = ?`); + const composerStatement = database.prepare(`SELECT value FROM cursorDiskKV WHERE key = ?`); + const bubbleStatement = database.prepare( + `SELECT value FROM cursorDiskKV WHERE key >= ? AND key < ?`, + ); + + const handle: CursorDbHandle = { + composerHeaders(): CursorComposerHeader[] { + try { + const raw = rowValueString(headersStatement.get(COMPOSER_HEADERS_KEY)); + return raw ? parseComposerHeaders(raw) : []; + } catch { + return []; + } + }, + composerValue(composerId: string): string | null { + try { + return rowValueString(composerStatement.get(`${COMPOSER_DATA_PREFIX}${composerId}`)); + } catch { + return null; + } + }, + bubbleValues(composerId: string): string[] { + try { + const prefix = `${BUBBLE_PREFIX}${composerId}:`; + const rows = bubbleStatement.all(prefix, prefixUpperBound(prefix)); + const values: string[] = []; + for (const row of rows) { + const value = rowValueString(row); + if (value) values.push(value); + } + return values; + } catch { + return []; + } + }, + contentValue(contentId: string): string | null { + try { + return rowValueString(composerStatement.get(contentId)); + } catch { + return null; + } + }, + }; + + return { handle, close }; } catch { + // A locked or unreadable database can throw when statements are prepared; + // skip it rather than sinking the whole stats run. + close(); return null; } - - const headersStatement = database.prepare(`SELECT value FROM ItemTable WHERE key = ?`); - const composerStatement = database.prepare(`SELECT value FROM cursorDiskKV WHERE key = ?`); - const bubbleStatement = database.prepare( - `SELECT value FROM cursorDiskKV WHERE key >= ? AND key < ?`, - ); - - const handle: CursorDbHandle = { - composerHeaders(): CursorComposerHeader[] { - try { - const raw = rowValueString(headersStatement.get(COMPOSER_HEADERS_KEY)); - return raw ? parseComposerHeaders(raw) : []; - } catch { - return []; - } - }, - composerValue(composerId: string): string | null { - try { - return rowValueString(composerStatement.get(`${COMPOSER_DATA_PREFIX}${composerId}`)); - } catch { - return null; - } - }, - bubbleValues(composerId: string): string[] { - try { - const prefix = `${BUBBLE_PREFIX}${composerId}:`; - const rows = bubbleStatement.all(prefix, prefixUpperBound(prefix)); - const values: string[] = []; - for (const row of rows) { - const value = rowValueString(row); - if (value) values.push(value); - } - return values; - } catch { - return []; - } - }, - contentValue(contentId: string): string | null { - try { - return rowValueString(composerStatement.get(contentId)); - } catch { - return null; - } - }, - }; - - return { - handle, - close: () => { - try { - database.close(); - } catch { - // Already closed or never fully opened — nothing to release. - } - }, - }; }; -// One open handle per process — opening is cheap (SQLite memory-maps lazily), -// but reopening per composer during a scan would thrash. `closeCursorDb` closes -// the underlying database for tests (so Windows can unlink the fixture file); -// the CLI relies on process exit. -let cachedDb: { dbPath: string; handle: CursorDbHandle | null; close: () => void } | null = null; +// One open handle per database path — opening is cheap (SQLite memory-maps +// lazily), but reopening per composer during a scan would thrash. The stable +// and Nightly databases can both be open at once, so they're memoized by path. +// `closeCursorDb` closes them for tests (so Windows can unlink the fixture +// file); the CLI relies on process exit. +const openDatabases = new Map void }>(); -/** Open (and memoize) the composer database, or `null` when unavailable. */ +/** Open (and memoize) a composer database by path, or `null` when unavailable. */ export const openCursorDb = (dbPath: string | null): CursorDbHandle | null => { if (!dbPath) return null; - if (cachedDb && cachedDb.dbPath === dbPath) return cachedDb.handle; + const cached = openDatabases.get(dbPath); + if (cached) return cached.handle; const opened = makeHandle(dbPath); - cachedDb = { dbPath, handle: opened?.handle ?? null, close: opened?.close ?? (() => {}) }; - return cachedDb.handle; + const entry = { handle: opened?.handle ?? null, close: opened?.close ?? (() => {}) }; + openDatabases.set(dbPath, entry); + return entry.handle; }; -/** Close and drop the memoized database (tests open fresh fixture databases). */ +/** Close and drop every memoized database (tests open fresh fixture databases). */ export const closeCursorDb = (): void => { - cachedDb?.close(); - cachedDb = null; + for (const database of openDatabases.values()) database.close(); + openDatabases.clear(); }; diff --git a/packages/react-doctor/src/stats/discover-sessions.ts b/packages/react-doctor/src/stats/discover-sessions.ts index 6024231cb..312494c4b 100644 --- a/packages/react-doctor/src/stats/discover-sessions.ts +++ b/packages/react-doctor/src/stats/discover-sessions.ts @@ -1,5 +1,5 @@ +import { isPathInside } from "@react-doctor/core"; import { STATS_DISCOVERY_YIELD_INTERVAL } from "./constants.js"; -import { isPathInside } from "./is-path-inside.js"; import { STATS_SOURCES } from "./sources/index.js"; import { resolveEditPaths } from "./reconstruct-files.js"; import type { AgentSession, StatsScopeOptions } from "./types.js"; diff --git a/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts index 955b17f28..7dbcc9397 100644 --- a/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts +++ b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts @@ -1,9 +1,8 @@ import * as fs from "node:fs"; import * as os from "node:os"; import * as path from "node:path"; -import { STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "@react-doctor/core"; +import { isPathInside, STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "@react-doctor/core"; import { STATS_TEMP_DIR_PREFIX } from "./constants.js"; -import { isPathInside } from "./is-path-inside.js"; import type { ReconstructedFile } from "./types.js"; export interface MaterializedReconstruction { @@ -49,7 +48,7 @@ export const materializeReconstructedTree = ( try { realTempDirectory = fs.realpathSync(resolvedTempDirectory); } catch { - realTempDirectory = resolvedTempDirectory; + // realpath unavailable (broken symlink, permission); keep the resolved path. } return { diff --git a/packages/react-doctor/src/stats/open-sqlite.ts b/packages/react-doctor/src/stats/open-sqlite.ts new file mode 100644 index 000000000..f8abd1cb6 --- /dev/null +++ b/packages/react-doctor/src/stats/open-sqlite.ts @@ -0,0 +1,66 @@ +import { createRequire } from "node:module"; + +const nodeRequire = createRequire(import.meta.url); + +/** Minimal read surface over a `node:sqlite` database (prepared statements). */ +export interface ReadOnlySqliteDatabase { + prepare(sql: string): { + get(...params: unknown[]): unknown; + all(...params: unknown[]): unknown[]; + }; + close(): void; +} + +// A read-only `file:` URI with `immutable=1`. A running editor (Cursor/VSCode) +// holds its SQLite store locked, so a plain read-only connection opens but then +// throws "database is locked" on first read; `immutable=1` tells SQLite the file +// won't change and skips all locking, reading the live database (it ignores any +// uncommitted WAL, which is fine for a history scan). Forward-slashed + encoded +// for the URI grammar so paths with spaces (e.g. "Cursor Nightly") parse. +const toImmutableFileUri = (databasePath: string): string => { + const forwardSlashed = databasePath.replace(/\\/g, "/"); + const absolute = forwardSlashed.startsWith("/") ? forwardSlashed : `/${forwardSlashed}`; + // Encode each segment so reserved URI characters (`?`, `#`, …) inside a path + // can't be parsed as the query/fragment delimiter; the `/` separators stay. + const encoded = absolute.split("/").map(encodeURIComponent).join("/"); + return `file:${encoded}?immutable=1`; +}; + +/** + * Open a SQLite database read-only via the built-in `node:sqlite`, or `null` + * when it is unavailable (Node < 22.13, where the require throws) or the file + * cannot be read. A plain read-only open is tried first (it sees the WAL, so + * historical stores read accurately); if its probe trips the lock a running + * editor holds, an `immutable` open takes over. Shared by the Cursor GUI + * composer database and the Cursor CLI per-session store. + */ +export const openReadOnlySqlite = (databasePath: string): ReadOnlySqliteDatabase | null => { + let DatabaseSync: new ( + location: string, + options: { readOnly: boolean }, + ) => ReadOnlySqliteDatabase; + try { + ({ DatabaseSync } = nodeRequire("node:sqlite")); + } catch { + return null; + } + const locations = [databasePath, toImmutableFileUri(databasePath)]; + for (const location of locations) { + let database: ReadOnlySqliteDatabase | undefined; + try { + database = new DatabaseSync(location, { readOnly: true }); + // A read-only open succeeds even against a locked database; the lock only + // surfaces on the first page read. Probe with a schema read so a locked + // store falls through to the immutable strategy instead of throwing later. + database.prepare("SELECT name FROM sqlite_master LIMIT 1").get(); + return database; + } catch { + try { + database?.close(); + } catch { + // Nothing to release. + } + } + } + return null; +}; diff --git a/packages/react-doctor/src/stats/parse-apply-patch.ts b/packages/react-doctor/src/stats/parse-apply-patch.ts index c01f04979..e44033eb4 100644 --- a/packages/react-doctor/src/stats/parse-apply-patch.ts +++ b/packages/react-doctor/src/stats/parse-apply-patch.ts @@ -13,6 +13,11 @@ export interface PatchOp { const FILE_HEADER = /^\*\*\* (Add|Update|Delete) File: (.+)$/; const MOVE_HEADER = /^\*\*\* Move to: (.+)$/; +const KIND_TO_OP_TYPE: Record = { + add: "add", + update: "update", + delete: "delete", +}; /** * Parse a Codex / Cursor `apply_patch` envelope (`*** Begin Patch` … @@ -55,7 +60,7 @@ export const parseApplyPatch = (patchText: string): PatchOp[] => { flush(); const kind = header[1].toLowerCase(); current = { - type: kind === "add" ? "add" : kind === "delete" ? "delete" : "update", + type: KIND_TO_OP_TYPE[kind], path: header[2].trim(), body: [], }; diff --git a/packages/react-doctor/src/stats/run-stats-scan.ts b/packages/react-doctor/src/stats/run-stats-scan.ts index 1beb39c84..1328b8eed 100644 --- a/packages/react-doctor/src/stats/run-stats-scan.ts +++ b/packages/react-doctor/src/stats/run-stats-scan.ts @@ -1,7 +1,11 @@ import * as path from "node:path"; -import { mapWithConcurrency, runEditorScan, type Diagnostic } from "@react-doctor/core"; +import { + isPathInside, + mapWithConcurrency, + runEditorScan, + type Diagnostic, +} from "@react-doctor/core"; import { STATS_SCAN_CONCURRENCY } from "./constants.js"; -import { isPathInside } from "./is-path-inside.js"; import { isReactSourceFile } from "./is-react-source.js"; import { materializeReconstructedTree } from "./materialize-reconstructed-tree.js"; import { reconstructSession } from "./reconstruct-files.js"; diff --git a/packages/react-doctor/src/stats/sources/claude.ts b/packages/react-doctor/src/stats/sources/claude.ts index 96e776788..1a5fa11f7 100644 --- a/packages/react-doctor/src/stats/sources/claude.ts +++ b/packages/react-doctor/src/stats/sources/claude.ts @@ -46,17 +46,10 @@ export const parseClaudeSession = async (transcriptPath: string): Promise(); let cwd: string | null = null; - let startedAt: string | undefined; - let endedAt: string | undefined; let sawAnything = false; await readJsonlEntries(transcriptPath, (entry) => { sawAnything = true; - const timestamp = asString(entry.timestamp); - if (timestamp) { - if (!startedAt || timestamp < startedAt) startedAt = timestamp; - if (!endedAt || timestamp > endedAt) endedAt = timestamp; - } if (!cwd) cwd = asString(entry.cwd) ?? null; // Post-edit / read snapshots ride a top-level `toolUseResult` on the @@ -99,8 +92,6 @@ export const parseClaudeSession = async (transcriptPath: string): Promise { export const codexSource: SourceDef = { name: "codex", candidates() { - // sessions/YYYY/MM/DD/rollout-*.jsonl → 4 levels. + // sessions/YYYY/MM/DD/rollout-*.jsonl — files sit 3 levels below the root; 5 leaves headroom. return fileSessionCandidates( "codex", codexRoots(), diff --git a/packages/react-doctor/src/stats/sources/cursor-cli.ts b/packages/react-doctor/src/stats/sources/cursor-cli.ts new file mode 100644 index 000000000..fc037cb4b --- /dev/null +++ b/packages/react-doctor/src/stats/sources/cursor-cli.ts @@ -0,0 +1,159 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { asArray, asNullableString, asRecord, asString } from "../coerce.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import { readCursorCliStore } from "../cursor-cli-store.js"; +import { isLintablePath } from "../reconstruct-files.js"; +import { statMtimeMs } from "../walk-transcripts.js"; +import type { AgentSession, FileEdit, FileRead, SessionCandidate, SourceDef } from "./index.js"; + +// The Cursor CLI agent keeps one content-addressed SQLite store per chat at +// `/chats///store.db`, beside a `meta.json` that +// records the chat's last-updated time. Two install channels exist — stable +// (`~/.cursor`) and nightly (`~/.cursor-nightly`). +const cursorCliHomes = (): string[] => { + const override = process.env.REACT_DOCTOR_CURSOR_CLI_HOME; + if (override) return override.split(path.delimiter).filter(Boolean); + return [path.join(os.homedir(), ".cursor"), path.join(os.homedir(), ".cursor-nightly")]; +}; + +// File-mutating tool calls. `Write` carries the full post-edit content; +// `ApplyPatch` carries a raw apply_patch envelope (the same format as Codex); +// `StrReplace` carries an old/new string pair; `Delete` removes a path. Planning +// tools (`CreatePlan`, `TodoWrite`) write no source file and are ignored. +const READ_TOOL_NAMES = new Set(["Read", "ReadFile"]); + +const editFromToolCall = (toolName: string, args: unknown): FileEdit | null => { + if (toolName === "ApplyPatch") { + return typeof args === "string" && args.length > 0 + ? { kind: "patch", path: "", patch: args } + : null; + } + const record = asRecord(args); + const filePath = record && asString(record.path); + if (!record || !filePath) return null; + if (toolName === "Write") { + const contents = asNullableString(record.contents); + return contents === null ? null : { kind: "write", path: filePath, resultContent: contents }; + } + if (toolName === "Delete") { + return { kind: "delete", path: filePath }; + } + if (toolName === "StrReplace") { + const oldString = asNullableString(record.old_string); + const newString = asNullableString(record.new_string); + if (oldString === null || newString === null) return null; + return { kind: "replace", path: filePath, oldString, newString }; + } + return null; +}; + +const buildCliSession = (storeDbPath: string, sessionId: string): AgentSession | null => { + const store = readCursorCliStore(storeDbPath); + if (!store) return null; + + const edits: FileEdit[] = []; + const reads: FileRead[] = []; + const capturedReadPaths = new Set(); + // tool-call → tool-result are separate messages, so a Read's path is recorded + // when its call is seen, then paired with the content in its later result. + const pendingReadPaths = new Map(); + + for (const message of store.messages) { + for (const rawBlock of asArray(message.content)) { + const block = asRecord(rawBlock); + if (!block) continue; + if (block.type === "tool-call") { + const toolName = asString(block.toolName); + if (!toolName) continue; + const edit = editFromToolCall(toolName, block.args); + if (edit) { + edits.push(edit); + continue; + } + const toolCallId = asString(block.toolCallId); + const readRecord = asRecord(block.args); + const readPath = readRecord && asString(readRecord.path); + if (READ_TOOL_NAMES.has(toolName) && toolCallId && readPath && isLintablePath(readPath)) { + pendingReadPaths.set(toolCallId, readPath); + } + } else if (block.type === "tool-result") { + const toolCallId = asString(block.toolCallId); + const readPath = toolCallId ? pendingReadPaths.get(toolCallId) : undefined; + // Keep the first read of a path (the pre-edit base); a later post-edit + // read would otherwise overwrite it and desync replace/patch replay. + if (readPath && !capturedReadPaths.has(readPath) && typeof block.result === "string") { + reads.push({ path: readPath, content: block.result }); + capturedReadPaths.add(readPath); + } + } + } + } + + if (edits.length === 0) return null; + + return { + provider: "cursor", + sessionId, + transcriptPath: storeDbPath, + model: store.lastUsedModel ?? STATS_UNKNOWN_MODEL, + cwd: null, + edits, + reads, + }; +}; + +/** mtime key for `--since`/sorting: the chat's `meta.json` time, else the store's. */ +const sessionModifiedMs = (sessionDir: string, storeDbPath: string): number => { + try { + const meta = JSON.parse(fs.readFileSync(path.join(sessionDir, "meta.json"), "utf8")); + if (meta && typeof meta.updatedAtMs === "number") return meta.updatedAtMs; + } catch { + // No or unreadable meta.json — fall back to the store's mtime. + } + return statMtimeMs(storeDbPath); +}; + +const discoverCliSessions = (home: string): SessionCandidate[] => { + const chatsRoot = path.join(home, "chats"); + let workspaceEntries: fs.Dirent[]; + try { + workspaceEntries = fs.readdirSync(chatsRoot, { withFileTypes: true }); + } catch { + return []; + } + const candidates: SessionCandidate[] = []; + for (const workspace of workspaceEntries) { + if (!workspace.isDirectory()) continue; + const workspaceDir = path.join(chatsRoot, workspace.name); + let sessionEntries: fs.Dirent[]; + try { + sessionEntries = fs.readdirSync(workspaceDir, { withFileTypes: true }); + } catch { + continue; + } + for (const session of sessionEntries) { + if (!session.isDirectory()) continue; + const sessionDir = path.join(workspaceDir, session.name); + const storeDbPath = path.join(sessionDir, "store.db"); + if (!fs.existsSync(storeDbPath)) continue; + candidates.push({ + provider: "cursor", + modifiedMs: sessionModifiedMs(sessionDir, storeDbPath), + load: async () => buildCliSession(storeDbPath, session.name), + }); + } + } + return candidates; +}; + +export const cursorCliCandidates = (homes: ReadonlyArray): SessionCandidate[] => + homes.flatMap((home) => discoverCliSessions(home)); + +export const cursorCliSource: SourceDef = { + name: "cursor", + candidates() { + return cursorCliCandidates(cursorCliHomes()); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/cursor.ts b/packages/react-doctor/src/stats/sources/cursor.ts index da611c3f4..d6f567e19 100644 --- a/packages/react-doctor/src/stats/sources/cursor.ts +++ b/packages/react-doctor/src/stats/sources/cursor.ts @@ -1,6 +1,6 @@ import { asRecord, asString, parseJson } from "../coerce.js"; import { STATS_UNKNOWN_MODEL } from "../constants.js"; -import { openCursorDb, resolveCursorDbPath, type CursorDbHandle } from "../cursor-db.js"; +import { openCursorDb, resolveCursorDbPaths, type CursorDbHandle } from "../cursor-db.js"; import { mostCommonKey } from "../most-common-key.js"; import { isLintablePath } from "../reconstruct-files.js"; import type { AgentSession, FileEdit, SessionCandidate, SourceDef } from "./index.js"; @@ -37,7 +37,7 @@ const editFromToolCall = ( const afterContentId = result && asString(result.afterContentId); const content = afterContentId ? db.contentValue(afterContentId) : null; const resultContent = content ?? asString(params?.streamingContent); - if (resultContent === undefined || resultContent === null) return null; + if (resultContent === undefined) return null; return { kind: "write", path: filePath, resultContent }; }; @@ -105,6 +105,6 @@ export const cursorComposerCandidates = (dbPath: string | null): SessionCandidat export const cursorSource: SourceDef = { name: "cursor", candidates() { - return cursorComposerCandidates(resolveCursorDbPath()); + return resolveCursorDbPaths().flatMap((dbPath) => cursorComposerCandidates(dbPath)); }, }; diff --git a/packages/react-doctor/src/stats/sources/index.ts b/packages/react-doctor/src/stats/sources/index.ts index bc1c69d2d..4ee12076a 100644 --- a/packages/react-doctor/src/stats/sources/index.ts +++ b/packages/react-doctor/src/stats/sources/index.ts @@ -2,6 +2,7 @@ import type { SessionCandidate, StatsProvider } from "../types.js"; import { claudeSource } from "./claude.js"; import { codexSource } from "./codex.js"; import { cursorSource } from "./cursor.js"; +import { cursorCliSource } from "./cursor-cli.js"; export type { AgentSession, @@ -14,8 +15,9 @@ export type { /** * A per-provider session source. Each source enumerates its sessions as cheap, * lazily-loadable `SessionCandidate`s — transcript files for Claude/Codex, rows - * from the Cursor composer database for Cursor — so the rest of the pipeline is - * provider-agnostic. + * from the GUI composer database and the CLI per-session stores for Cursor — so + * the rest of the pipeline is provider-agnostic. A provider may have more than + * one source (Cursor's GUI app and CLI agent store chats differently). */ export interface SourceDef { readonly name: StatsProvider; @@ -23,4 +25,9 @@ export interface SourceDef { candidates(): SessionCandidate[]; } -export const STATS_SOURCES: ReadonlyArray = [claudeSource, codexSource, cursorSource]; +export const STATS_SOURCES: ReadonlyArray = [ + claudeSource, + codexSource, + cursorSource, + cursorCliSource, +]; diff --git a/packages/react-doctor/src/stats/types.ts b/packages/react-doctor/src/stats/types.ts index 3f6c6f1c9..12b44c128 100644 --- a/packages/react-doctor/src/stats/types.ts +++ b/packages/react-doctor/src/stats/types.ts @@ -35,8 +35,6 @@ export interface AgentSession { readonly transcriptPath: string; readonly model: string; readonly cwd: string | null; - readonly startedAt?: string; - readonly endedAt?: string; readonly edits: FileEdit[]; readonly reads: FileRead[]; } diff --git a/packages/react-doctor/tests/stats-adapters.test.ts b/packages/react-doctor/tests/stats-adapters.test.ts index 260a6421b..5829fb9e3 100644 --- a/packages/react-doctor/tests/stats-adapters.test.ts +++ b/packages/react-doctor/tests/stats-adapters.test.ts @@ -2,11 +2,12 @@ import * as fs from "node:fs"; import { createRequire } from "node:module"; import * as os from "node:os"; import * as path from "node:path"; -import { afterAll, describe, expect, it } from "vite-plus/test"; +import { afterAll, beforeEach, describe, expect, it } from "vite-plus/test"; import { closeCursorDb } from "../src/stats/cursor-db.js"; import { parseClaudeSession } from "../src/stats/sources/claude.js"; import { parseCodexSession } from "../src/stats/sources/codex.js"; import { cursorComposerCandidates } from "../src/stats/sources/cursor.js"; +import { cursorCliCandidates } from "../src/stats/sources/cursor-cli.js"; interface SqliteDb { exec(sql: string): void; @@ -138,8 +139,9 @@ const writeComposerDb = (name: string, composers: ReadonlyArray const describeCursor = sqlite ? describe : describe.skip; describeCursor("cursorComposerCandidates", () => { + beforeEach(() => closeCursorDb()); + it("attributes the composer model and reconstructs exact content via afterContentId", async () => { - closeCursorDb(); const dbPath = writeComposerDb("cursor-model.vscdb", [ { composerId: "comp-1", @@ -185,7 +187,6 @@ describeCursor("cursorComposerCandidates", () => { }); it("falls back to the dominant bubble model when the composer is on Auto", async () => { - closeCursorDb(); const dbPath = writeComposerDb("cursor-auto.vscdb", [ { composerId: "comp-2", @@ -213,7 +214,6 @@ describeCursor("cursorComposerCandidates", () => { }); it("ignores non-lintable edits and skips when the database is absent", async () => { - closeCursorDb(); expect(cursorComposerCandidates(null)).toEqual([]); const dbPath = writeComposerDb("cursor-nonlintable.vscdb", [ @@ -238,3 +238,157 @@ describeCursor("cursorComposerCandidates", () => { expect(session?.edits).toEqual([]); }); }); + +interface CliStoreFixture { + readonly model: string; + readonly updatedAtMs: number; + readonly messages: ReadonlyArray<{ role: string; content: unknown }>; +} + +const CLI_ROOT_BLOB_ID = "f".repeat(64); + +// Build a Cursor CLI per-session store: hex-encoded `meta`, a protobuf-style +// manifest blob (`0x0a 0x20` + 32-byte id per message, in order), and one JSON +// message blob per entry — the shape `readCursorCliStore` parses. +const writeCliStore = (home: string, sessionId: string, fixture: CliStoreFixture): void => { + if (!sqlite) throw new Error("node:sqlite unavailable"); + const sessionDir = path.join(home, "chats", "workspace-hash", sessionId); + fs.mkdirSync(sessionDir, { recursive: true }); + fs.writeFileSync( + path.join(sessionDir, "meta.json"), + JSON.stringify({ schemaVersion: 1, updatedAtMs: fixture.updatedAtMs }), + ); + + const database = new sqlite.DatabaseSync(path.join(sessionDir, "store.db")); + database.exec("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)"); + database.exec("CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB)"); + + const messageIds = fixture.messages.map((_, index) => index.toString(16).padStart(64, "0")); + const manifest = Buffer.concat( + messageIds.map((id) => Buffer.concat([Buffer.from([0x0a, 0x20]), Buffer.from(id, "hex")])), + ); + + const insertMeta = database.prepare("INSERT INTO meta (key, value) VALUES (?, ?)"); + insertMeta.run( + "0", + Buffer.from( + JSON.stringify({ latestRootBlobId: CLI_ROOT_BLOB_ID, lastUsedModel: fixture.model }), + ).toString("hex"), + ); + + const insertBlob = database.prepare("INSERT INTO blobs (id, data) VALUES (?, ?)"); + insertBlob.run(CLI_ROOT_BLOB_ID, manifest); + fixture.messages.forEach((message, index) => { + insertBlob.run(messageIds[index], Buffer.from(JSON.stringify(message))); + }); + database.close(); +}; + +describeCursor("cursorCliCandidates", () => { + it("reconstructs model, ordered edits, and read bases from a CLI store", async () => { + const home = path.join(tempDir, "cursor-cli-home"); + writeCliStore(home, "session-1", { + model: "claude-opus-4-8", + updatedAtMs: 5_000, + messages: [ + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "Write", + toolCallId: "w1", + args: { path: "/repo/a.tsx", contents: "export const A = () => null;\n" }, + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "ApplyPatch", + toolCallId: "p1", + args: "*** Begin Patch\n*** Add File: /repo/b.ts\n+export const b = 2;\n*** End Patch", + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "Read", + toolCallId: "r1", + args: { path: "/repo/c.tsx" }, + }, + ], + }, + { + role: "tool", + content: [{ type: "tool-result", toolCallId: "r1", result: "export const C = 1;\n" }], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "StrReplace", + toolCallId: "s1", + args: { path: "/repo/c.tsx", old_string: "1", new_string: "2" }, + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "Delete", + toolCallId: "d1", + args: { path: "/repo/old.ts" }, + }, + ], + }, + { + role: "assistant", + content: [ + { type: "tool-call", toolName: "TodoWrite", toolCallId: "t1", args: { todos: [] } }, + ], + }, + ], + }); + + const candidates = cursorCliCandidates([home]); + expect(candidates).toHaveLength(1); + expect(candidates[0].modifiedMs).toBe(5_000); + + const session = await candidates[0].load(); + expect(session?.provider).toBe("cursor"); + expect(session?.model).toBe("claude-opus-4-8"); + + const write = session?.edits.find((edit) => edit.kind === "write"); + expect(write?.path).toBe("/repo/a.tsx"); + expect(write?.resultContent).toBe("export const A = () => null;\n"); + + const patch = session?.edits.find((edit) => edit.kind === "patch"); + expect(patch?.patch).toContain("Add File: /repo/b.ts"); + + const replace = session?.edits.find((edit) => edit.kind === "replace"); + expect(replace?.path).toBe("/repo/c.tsx"); + expect(replace?.oldString).toBe("1"); + expect(replace?.newString).toBe("2"); + + expect( + session?.edits.some((edit) => edit.kind === "delete" && edit.path === "/repo/old.ts"), + ).toBe(true); + // Write + ApplyPatch + StrReplace + Delete; the Read and the TodoWrite plan are not edits. + expect(session?.edits).toHaveLength(4); + // The Read result is captured as a base so the StrReplace reconstructs. + expect(session?.reads).toEqual([{ path: "/repo/c.tsx", content: "export const C = 1;\n" }]); + }); + + it("returns no candidates when the CLI home has no chats", () => { + expect(cursorCliCandidates([path.join(tempDir, "missing-cli-home")])).toEqual([]); + }); +}); diff --git a/packages/react-doctor/tests/stats-aggregate.test.ts b/packages/react-doctor/tests/stats-aggregate.test.ts index 3de14e90e..ccfd9c36d 100644 --- a/packages/react-doctor/tests/stats-aggregate.test.ts +++ b/packages/react-doctor/tests/stats-aggregate.test.ts @@ -123,12 +123,12 @@ describe("aggregateStats", () => { ...Array.from({ length: 8 }, () => result("codex", "b", 0, [])), ]; const aggregated = await aggregateStats([...productive, ...padded], null, stubScore); - const a = aggregated.models.find((group) => group.key === "claude/a"); - const b = aggregated.models.find((group) => group.key === "codex/b"); - expect(a?.score).toBe(b?.score); - expect(a?.weightedScore).toBe(b?.weightedScore); + const productiveGroup = aggregated.models.find((group) => group.key === "claude/a"); + const paddedGroup = aggregated.models.find((group) => group.key === "codex/b"); + expect(productiveGroup?.score).toBe(paddedGroup?.score); + expect(productiveGroup?.weightedScore).toBe(paddedGroup?.weightedScore); // The reported session count still reflects every analyzed session. - expect(b?.sessions).toBe(10); + expect(paddedGroup?.sessions).toBe(10); }); it("leaves the score null when a group lacks enough files to rank fairly", async () => { From ac04d51fde95b86affedf13c5e83cb2b6c1b5f84 Mon Sep 17 00:00:00 2001 From: Rayhan Noufal Arayilakath Date: Mon, 22 Jun 2026 18:10:39 -0700 Subject: [PATCH 15/17] feat(stats): trace stats runs in Sentry (cli.stats + per-model leaderboard rows) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wrap `react-doctor stats` in a `cli.stats` root span with a discover/scan/ aggregate latency waterfall, and emit one queryable `stats.leaderboard_row` span per ranked model carrying its model, harness, confidence-weighted score, and files scored — the four leaderboard columns. Same gating as the scan path (no-op under --no-score, in tests, and for @react-doctor/api). - Extract the shared `modelLabel` helper (render + tracing) into one util. - Pure, exported `buildStatsRowAttributes` for testability, mirroring `buildRunEventAttributes`. - Fix `detectCommand`: `stats` runs were mis-tagged `command=inspect`. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/stats-agent-leaderboard.md | 1 + .../react-doctor/src/cli/commands/stats.ts | 92 +++++++++++-------- .../src/cli/utils/build-run-context.ts | 2 +- .../src/cli/utils/with-sentry-stats-span.ts | 85 +++++++++++++++++ .../react-doctor/src/stats/model-label.ts | 11 +++ .../react-doctor/src/stats/render-stats.ts | 6 +- .../react-doctor/tests/stats-trace.test.ts | 58 ++++++++++++ 7 files changed, 212 insertions(+), 43 deletions(-) create mode 100644 packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts create mode 100644 packages/react-doctor/src/stats/model-label.ts create mode 100644 packages/react-doctor/tests/stats-trace.test.ts diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md index 2da732083..c6da718dc 100644 --- a/.changeset/stats-agent-leaderboard.md +++ b/.changeset/stats-agent-leaderboard.md @@ -12,3 +12,4 @@ Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard b - Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work. - `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows the top models and per-tool tables with a single score bar (the confidence-weighted score) and a best/worst callout. - Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), reading any Cursor database requires `node:sqlite` (Node 22.13+), and the score requires network access. +- Anonymized Sentry tracing (CLI only, same gating as the scan path — off under `--no-score`, in tests, and for the programmatic API): each run is one `cli.stats` trace with a discover/scan/aggregate latency waterfall, and every ranked model is a queryable `stats.leaderboard_row` span carrying its model, harness, confidence-weighted score, and files scored — so the leaderboard is sliceable in Sentry's Trace Explorer. diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts index acb1d7df7..cadf5f2dc 100644 --- a/packages/react-doctor/src/cli/commands/stats.ts +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -10,6 +10,11 @@ import { METRIC } from "../utils/constants.js"; import { enableJsonMode } from "../utils/json-mode.js"; import { recordCount } from "../utils/record-metric.js"; import { spinner } from "../utils/spinner.js"; +import { + recordStatsLeaderboard, + traceStatsPhase, + withSentryStatsSpan, +} from "../utils/with-sentry-stats-span.js"; export interface StatsFlags { global?: boolean; @@ -78,44 +83,57 @@ export const statsAction = async (flags: StatsFlags): Promise => { const { root, userConfig } = await resolveTarget(directory); // ora renders to stderr; suppress it in JSON mode so the run stays quiet. - const progress = flags.json ? null : spinner("Looking through your agent history…").start(); - let report: StatsReport; - try { - const sessions = await discoverSessions(root, scope, (foundCount) => - progress?.update(`Looking through your agent history… (${foundCount} found)`), - ); - progress?.update("Checking the code each agent wrote…"); - const results = await runStatsScan(sessions, scope.global ? null : root, { - onProgress: (completedCount, totalCount) => - progress?.update(`Checking the code each agent wrote… (${completedCount}/${totalCount})`), - }); - progress?.update("Scoring…"); - const aggregated = await aggregateStats(results, userConfig); + // The whole run is one Sentry trace: each phase below is a child span, and + // every ranked model becomes a queryable leaderboard-row span. + const report = await withSentryStatsSpan(async (rootSpan) => { + const progress = flags.json ? null : spinner("Looking through your agent history…").start(); + try { + const sessions = await traceStatsPhase("discover sessions", () => + discoverSessions(root, scope, (foundCount) => + progress?.update(`Looking through your agent history… (${foundCount} found)`), + ), + ); + progress?.update("Checking the code each agent wrote…"); + const results = await traceStatsPhase("scan sessions", () => + runStatsScan(sessions, scope.global ? null : root, { + onProgress: (completedCount, totalCount) => + progress?.update( + `Checking the code each agent wrote… (${completedCount}/${totalCount})`, + ), + }), + ); + progress?.update("Scoring…"); + const aggregated = await traceStatsPhase("aggregate + score", () => + aggregateStats(results, userConfig), + ); - report = { - scope: scope.global ? "global" : "repo", - directory: root, - models: aggregated.models, - providers: aggregated.providers, - best: aggregated.best, - worst: aggregated.worst, - sessionsAnalyzed: results.length, - sessionsRanked: results.filter((result) => result.filesScanned > 0).length, - sessionsNonReact: results.filter( - (result) => result.filesScanned === 0 && result.reconstructedFiles > 0, - ).length, - sessionsUnreconstructable: results.filter( - (result) => - result.filesScanned === 0 && - result.reconstructedFiles === 0 && - result.unreconstructable > 0, - ).length, - generatedAt: new Date().toISOString(), - }; - progress?.succeed("Done."); - } finally { - progress?.stop(); - } + const built: StatsReport = { + scope: scope.global ? "global" : "repo", + directory: root, + models: aggregated.models, + providers: aggregated.providers, + best: aggregated.best, + worst: aggregated.worst, + sessionsAnalyzed: results.length, + sessionsRanked: results.filter((result) => result.filesScanned > 0).length, + sessionsNonReact: results.filter( + (result) => result.filesScanned === 0 && result.reconstructedFiles > 0, + ).length, + sessionsUnreconstructable: results.filter( + (result) => + result.filesScanned === 0 && + result.reconstructedFiles === 0 && + result.unreconstructable > 0, + ).length, + generatedAt: new Date().toISOString(), + }; + recordStatsLeaderboard(built.models, rootSpan); + progress?.succeed("Done."); + return built; + } finally { + progress?.stop(); + } + }); recordCount(METRIC.statsRun, 1, { scope: report.scope, diff --git a/packages/react-doctor/src/cli/utils/build-run-context.ts b/packages/react-doctor/src/cli/utils/build-run-context.ts index d1a7815e5..4d702ee90 100644 --- a/packages/react-doctor/src/cli/utils/build-run-context.ts +++ b/packages/react-doctor/src/cli/utils/build-run-context.ts @@ -55,7 +55,7 @@ export interface RunContext { lintBatchOrdering: "cost" | "arrival"; } -const ROOT_SUBCOMMANDS = new Set(["install", "setup"]); +const ROOT_SUBCOMMANDS = new Set(["install", "setup", "stats"]); // `npm_config_user_agent` looks like "pnpm/9.1.0 npm/? node/v22.0.0 ..."; // the leading token names the package manager that spawned the process. diff --git a/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts b/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts new file mode 100644 index 000000000..00344ce7e --- /dev/null +++ b/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts @@ -0,0 +1,85 @@ +import * as Sentry from "@sentry/node"; +import { isSentryTracingEnabled } from "../../instrument.js"; +import { modelLabel } from "../../stats/model-label.js"; +import type { GroupStats } from "../../stats/types.js"; +import { buildSentryScope } from "./build-sentry-scope.js"; +import { toSpanAttributes } from "./to-span-attributes.js"; + +export type SentryStatsSpan = ReturnType | undefined; + +/** + * Runs a `react-doctor stats` invocation inside a Sentry root span so each + * leaderboard run is a first-class trace: the discover/scan/aggregate phases + * become a latency waterfall (see {@link traceStatsPhase}) and every ranked + * model is one queryable child span (see {@link recordStatsLeaderboard}). The + * run snapshot rides along as attributes, exactly like the inspect root span. + * + * A no-op pass-through when Sentry performance tracing is off (Sentry disabled, + * `--no-score`, tests, `SENTRY_TRACES_SAMPLE_RATE=0`): `run` receives `undefined` + * and no transaction is created, so there's no added exit latency. Unlike the + * inspect root span there's no active-run-trace handle to record — stats has no + * Effect pipeline whose spans need parenting and no in-scan crash path to link. + */ +export const withSentryStatsSpan = ( + run: (rootSpan: SentryStatsSpan) => Promise, +): Promise => { + if (!isSentryTracingEnabled()) return run(undefined); + return Sentry.startSpan( + { + name: "react-doctor stats", + op: "cli.stats", + attributes: toSpanAttributes(buildSentryScope().tags), + }, + (rootSpan) => run(rootSpan), + ); +}; + +/** + * Wraps one phase of the stats pipeline in a child span so the trace shows where + * the wall-clock goes (the per-session oxlint scans dominate). A no-op + * pass-through when tracing is off; otherwise parents under the active stats + * root span. + */ +export const traceStatsPhase = (name: string, thunk: () => Promise): Promise => { + if (!isSentryTracingEnabled()) return thunk(); + return Sentry.startSpan({ name, op: "stats.phase" }, () => thunk()); +}; + +/** + * The four leaderboard dimensions of one ranked model, projected to span + * attributes: the model name, its harness (the agent tool that ran it), the + * confidence-weighted 0-100 score (the column the board ranks on — `null` when + * undersampled, dropped rather than coerced), and the React files scored. Pure + * and exported so the projection is unit-testable without a live SDK, mirroring + * `build-run-event.ts`'s `buildRunEventAttributes`. + */ +export const buildStatsRowAttributes = ( + model: GroupStats, +): Record => + toSpanAttributes({ + "stats.model": modelLabel(model), + "stats.harness": model.provider, + "stats.score": model.weightedScore, + "stats.files": model.filesScanned, + }); + +/** + * Emits one zero-duration child span per ranked model so the leaderboard is + * queryable in Sentry's Trace Explorer / Spans dataset — filter or group by + * `stats.harness`, aggregate `stats.score` / `stats.files`. A no-op when the run + * isn't traced (`rootSpan` absent); otherwise the spans parent under it via the + * active scope. + */ +export const recordStatsLeaderboard = ( + models: ReadonlyArray, + rootSpan: SentryStatsSpan, +): void => { + if (!rootSpan) return; + for (const model of models) { + Sentry.startInactiveSpan({ + name: model.key, + op: "stats.leaderboard_row", + attributes: buildStatsRowAttributes(model), + }).end(); + } +}; diff --git a/packages/react-doctor/src/stats/model-label.ts b/packages/react-doctor/src/stats/model-label.ts new file mode 100644 index 000000000..2fd82ef69 --- /dev/null +++ b/packages/react-doctor/src/stats/model-label.ts @@ -0,0 +1,11 @@ +import type { GroupStats } from "./types.js"; + +/** + * The bare model name for a leaderboard row — strips the `provider/` prefix that + * keys model groups, so `claude/claude-sonnet-4-5` reads as `claude-sonnet-4-5`. + * Provider groups (whose key is just the provider) pass through unchanged. + */ +export const modelLabel = (group: GroupStats): string => { + const slash = group.key.indexOf("/"); + return slash === -1 ? group.key : group.key.slice(slash + 1); +}; diff --git a/packages/react-doctor/src/stats/render-stats.ts b/packages/react-doctor/src/stats/render-stats.ts index 66c1cecf1..7e543a230 100644 --- a/packages/react-doctor/src/stats/render-stats.ts +++ b/packages/react-doctor/src/stats/render-stats.ts @@ -5,6 +5,7 @@ import { STATS_SCORE_COLOR_HIGH, STATS_SCORE_COLOR_MEDIUM, } from "./constants.js"; +import { modelLabel } from "./model-label.js"; import type { GroupStats, StatsReport } from "./types.js"; const colorForScore = (score: number): ((text: string) => string) => { @@ -36,11 +37,6 @@ const renderScore = (group: GroupStats): string => { return `${bar} ${paint(String(group.weightedScore).padStart(3))}`; }; -const modelLabel = (group: GroupStats): string => { - const slash = group.key.indexOf("/"); - return slash === -1 ? group.key : group.key.slice(slash + 1); -}; - const ANSI_PATTERN = new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, "g"); const stripAnsi = (text: string): string => text.replace(ANSI_PATTERN, ""); diff --git a/packages/react-doctor/tests/stats-trace.test.ts b/packages/react-doctor/tests/stats-trace.test.ts new file mode 100644 index 000000000..7e7955f98 --- /dev/null +++ b/packages/react-doctor/tests/stats-trace.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, it } from "vite-plus/test"; +import { + buildStatsRowAttributes, + recordStatsLeaderboard, +} from "../src/cli/utils/with-sentry-stats-span.js"; +import type { GroupStats } from "../src/stats/types.js"; + +const group = (overrides: Partial): GroupStats => ({ + key: "claude/claude-sonnet-4-5", + provider: "claude", + sessions: 1, + filesScanned: 4, + unreconstructable: 0, + totalDiagnostics: 2, + errorCount: 0, + warningCount: 2, + diagnosticsPerFile: 0.5, + score: 90, + scoreLabel: "good", + weightedScore: 88, + topRules: [], + ...overrides, +}); + +describe("buildStatsRowAttributes", () => { + it("projects the four leaderboard columns, stripping the provider prefix from the model", () => { + expect(buildStatsRowAttributes(group({}))).toEqual({ + "stats.model": "claude-sonnet-4-5", + "stats.harness": "claude", + "stats.score": 88, + "stats.files": 4, + }); + }); + + it("ranks on the confidence-weighted score, not the raw score", () => { + expect(buildStatsRowAttributes(group({ score: 90, weightedScore: 72 }))["stats.score"]).toBe( + 72, + ); + }); + + it("drops an undersampled (null) score rather than coercing it to a string", () => { + expect(buildStatsRowAttributes(group({ weightedScore: null }))).not.toHaveProperty( + "stats.score", + ); + }); + + it("passes a provider-only key (no slash) through as the model name", () => { + expect(buildStatsRowAttributes(group({ key: "codex", provider: "codex" }))["stats.model"]).toBe( + "codex", + ); + }); +}); + +describe("recordStatsLeaderboard", () => { + it("is a no-op when the run is not traced (no root span)", () => { + expect(recordStatsLeaderboard([group({})], undefined)).toBeUndefined(); + }); +}); From db52fc69af75333b230ae618a8cc0190094e13d0 Mon Sep 17 00:00:00 2001 From: Rayhan Noufal Arayilakath Date: Mon, 22 Jun 2026 19:43:50 -0700 Subject: [PATCH 16/17] feat(stats): report leaderboard rows to /api/stats + render the community board MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Send the same per-model rows the stats command puts on Sentry ({model, harness, score, files}) to our own /api/stats so we store them and get back the community leaderboard, shown beneath the local board. - stats/leaderboard-row.ts: one shared projection feeds BOTH the Sentry span attributes and the /api/stats payload, so they can't drift and both stay code-free (no source, paths, or identity ever leaves the machine). - stats/report-stats-run.ts: best-effort gzip POST (null on any failure), honoring an optional REACT_DOCTOR_STATS_API_URL override for local e2e. - stats command: honors --no-score/--no-telemetry — skips the score API (scores n/a, ranked by diagnostics-per-file) AND the /api/stats report, so a --no-telemetry run is fully local and less rich. - render-stats: appends a "Community leaderboard" table (with run counts) when one is returned. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/core/src/constants.ts | 6 + .../react-doctor/src/cli/commands/stats.ts | 47 +++++- .../src/cli/utils/with-sentry-stats-span.ts | 27 ++-- packages/react-doctor/src/stats/constants.ts | 4 + .../react-doctor/src/stats/leaderboard-row.ts | 26 ++++ .../react-doctor/src/stats/render-stats.ts | 36 ++++- .../src/stats/report-stats-run.ts | 88 ++++++++++++ packages/react-doctor/src/stats/types.ts | 20 +++ .../react-doctor/tests/stats-render.test.ts | 17 +++ .../tests/stats-report-run.test.ts | 134 ++++++++++++++++++ .../react-doctor/tests/stats-trace.test.ts | 26 ++++ 11 files changed, 409 insertions(+), 22 deletions(-) create mode 100644 packages/react-doctor/src/stats/leaderboard-row.ts create mode 100644 packages/react-doctor/src/stats/report-stats-run.ts create mode 100644 packages/react-doctor/tests/stats-report-run.test.ts diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 8b519dce1..e08f882fd 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -81,6 +81,12 @@ export const SCORE_BAR_WIDTH_CHARS = 50; export const SCORE_API_URL = "https://www.react.doctor/api/score"; +// Sink for the `react-doctor stats` leaderboard rows: the CLI POSTs the same +// code-free `{model, harness, score, files}` rows it reports to Sentry, and the +// endpoint stores them and returns the community leaderboard. Overridable for +// local e2e via `REACT_DOCTOR_STATS_API_URL` (read in the CLI client, not here). +export const STATS_API_URL = "https://www.react.doctor/api/stats"; + export const ENTERPRISE_CONTACT_URL = "https://react.doctor/enterprise"; export const SHARE_BASE_URL = "https://react.doctor/share"; diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts index cadf5f2dc..ddb0e8214 100644 --- a/packages/react-doctor/src/cli/commands/stats.ts +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -4,8 +4,14 @@ import { aggregateStats } from "../../stats/aggregate-stats.js"; import { STATS_DEFAULT_SESSION_LIMIT } from "../../stats/constants.js"; import { discoverSessions } from "../../stats/discover-sessions.js"; import { renderStatsReport } from "../../stats/render-stats.js"; +import { reportStatsRun } from "../../stats/report-stats-run.js"; import { runStatsScan } from "../../stats/run-stats-scan.js"; -import type { StatsProvider, StatsReport, StatsScopeOptions } from "../../stats/types.js"; +import type { + CommunityLeaderboard, + StatsProvider, + StatsReport, + StatsScopeOptions, +} from "../../stats/types.js"; import { METRIC } from "../utils/constants.js"; import { enableJsonMode } from "../utils/json-mode.js"; import { recordCount } from "../utils/record-metric.js"; @@ -23,6 +29,10 @@ export interface StatsFlags { provider?: string; json?: boolean; cwd?: string; + // Commander negations from the root program: `--no-score` → `score: false`, + // `--no-telemetry` → `telemetry: false`. Both opt out of the network. + score?: boolean; + telemetry?: boolean; } const VALID_PROVIDERS = new Set(["claude", "codex", "cursor"]); @@ -82,10 +92,23 @@ export const statsAction = async (flags: StatsFlags): Promise => { const { root, userConfig } = await resolveTarget(directory); + // `--no-score` / `--no-telemetry` (or `noScore` in config) opt out of the + // network entirely — same signal `resolve-cli-inspect-options` uses. When off, + // we skip the score API (scores show n/a, ranked by diagnostics-per-file) and + // the `/api/stats` report, so a `--no-telemetry` run is fully local. + const telemetryEnabled = !( + flags.score === false || + flags.telemetry === false || + Boolean(userConfig?.noScore) + ); + // ora renders to stderr; suppress it in JSON mode so the run stays quiet. // The whole run is one Sentry trace: each phase below is a child span, and // every ranked model becomes a queryable leaderboard-row span. - const report = await withSentryStatsSpan(async (rootSpan) => { + const { report, community } = await withSentryStatsSpan<{ + report: StatsReport; + community: CommunityLeaderboard | null; + }>(async (rootSpan) => { const progress = flags.json ? null : spinner("Looking through your agent history…").start(); try { const sessions = await traceStatsPhase("discover sessions", () => @@ -102,9 +125,15 @@ export const statsAction = async (flags: StatsFlags): Promise => { ), }), ); - progress?.update("Scoring…"); + progress?.update(telemetryEnabled ? "Scoring…" : "Ranking…"); const aggregated = await traceStatsPhase("aggregate + score", () => - aggregateStats(results, userConfig), + // Skip the score API when telemetry is off: a null scorer leaves every + // score null, and ranking falls back to diagnostics-per-file. + aggregateStats( + results, + userConfig, + telemetryEnabled ? undefined : () => Promise.resolve(null), + ), ); const built: StatsReport = { @@ -128,8 +157,14 @@ export const statsAction = async (flags: StatsFlags): Promise => { generatedAt: new Date().toISOString(), }; recordStatsLeaderboard(built.models, rootSpan); + // Send the same leaderboard rows to our own store and get the community + // board back. Best-effort and telemetry-gated; never blocks the result. + progress?.update("Comparing with the community…"); + const communityBoard = telemetryEnabled + ? await traceStatsPhase("report leaderboard", () => reportStatsRun(built)) + : null; progress?.succeed("Done."); - return built; + return { report: built, community: communityBoard }; } finally { progress?.stop(); } @@ -147,5 +182,5 @@ export const statsAction = async (flags: StatsFlags): Promise => { return; } - process.stdout.write(`${renderStatsReport(report)}\n`); + process.stdout.write(`${renderStatsReport(report, community)}\n`); }; diff --git a/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts b/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts index 00344ce7e..5254c8207 100644 --- a/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts +++ b/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts @@ -1,6 +1,6 @@ import * as Sentry from "@sentry/node"; import { isSentryTracingEnabled } from "../../instrument.js"; -import { modelLabel } from "../../stats/model-label.js"; +import { toLeaderboardRow } from "../../stats/leaderboard-row.js"; import type { GroupStats } from "../../stats/types.js"; import { buildSentryScope } from "./build-sentry-scope.js"; import { toSpanAttributes } from "./to-span-attributes.js"; @@ -46,22 +46,23 @@ export const traceStatsPhase = (name: string, thunk: () => Promise): Promi }; /** - * The four leaderboard dimensions of one ranked model, projected to span - * attributes: the model name, its harness (the agent tool that ran it), the - * confidence-weighted 0-100 score (the column the board ranks on — `null` when - * undersampled, dropped rather than coerced), and the React files scored. Pure - * and exported so the projection is unit-testable without a live SDK, mirroring - * `build-run-event.ts`'s `buildRunEventAttributes`. + * One ranked model's four leaderboard dimensions projected to span attributes, + * built from the shared {@link toLeaderboardRow} projection so the Sentry span and + * the `/api/stats` payload carry identical, code-free data. `null` score is + * dropped rather than coerced. Pure and exported so it's unit-testable without a + * live SDK, mirroring `build-run-event.ts`'s `buildRunEventAttributes`. */ export const buildStatsRowAttributes = ( model: GroupStats, -): Record => - toSpanAttributes({ - "stats.model": modelLabel(model), - "stats.harness": model.provider, - "stats.score": model.weightedScore, - "stats.files": model.filesScanned, +): Record => { + const row = toLeaderboardRow(model); + return toSpanAttributes({ + "stats.model": row.model, + "stats.harness": row.harness, + "stats.score": row.score, + "stats.files": row.files, }); +}; /** * Emits one zero-duration child span per ranked model so the leaderboard is diff --git a/packages/react-doctor/src/stats/constants.ts b/packages/react-doctor/src/stats/constants.ts index 780289126..9718f7e26 100644 --- a/packages/react-doctor/src/stats/constants.ts +++ b/packages/react-doctor/src/stats/constants.ts @@ -59,3 +59,7 @@ export const STATS_SCORE_BAR_WIDTH = 16; // above MEDIUM is yellow, below is red. export const STATS_SCORE_COLOR_HIGH = 80; export const STATS_SCORE_COLOR_MEDIUM = 50; + +// Wire-format version for the `/api/stats` request body. Bump when the shape of +// the reported leaderboard rows changes so the endpoint can branch on it. +export const STATS_REPORT_SCHEMA_VERSION = 1; diff --git a/packages/react-doctor/src/stats/leaderboard-row.ts b/packages/react-doctor/src/stats/leaderboard-row.ts new file mode 100644 index 000000000..ff9e63d45 --- /dev/null +++ b/packages/react-doctor/src/stats/leaderboard-row.ts @@ -0,0 +1,26 @@ +import { modelLabel } from "./model-label.js"; +import type { GroupStats } from "./types.js"; + +/** + * One leaderboard row reduced to its four shareable dimensions: the bare model + * name, the harness (the agent tool that ran it), the confidence-weighted 0-100 + * score (`null` when undersampled), and the React files scored. + * + * This is the single source for everything the stats feature reports off the + * machine — the Sentry `stats.leaderboard_row` span attributes and the + * `/api/stats` payload both project from it, so the two sinks can never drift and + * both stay code-free: no source text, paths, or repo identity ever appears here. + */ +export interface LeaderboardRow { + readonly model: string; + readonly harness: string; + readonly score: number | null; + readonly files: number; +} + +export const toLeaderboardRow = (group: GroupStats): LeaderboardRow => ({ + model: modelLabel(group), + harness: group.provider, + score: group.weightedScore, + files: group.filesScanned, +}); diff --git a/packages/react-doctor/src/stats/render-stats.ts b/packages/react-doctor/src/stats/render-stats.ts index 7e543a230..0cc787a44 100644 --- a/packages/react-doctor/src/stats/render-stats.ts +++ b/packages/react-doctor/src/stats/render-stats.ts @@ -6,7 +6,7 @@ import { STATS_SCORE_COLOR_MEDIUM, } from "./constants.js"; import { modelLabel } from "./model-label.js"; -import type { GroupStats, StatsReport } from "./types.js"; +import type { CommunityLeaderboard, CommunityModel, GroupStats, StatsReport } from "./types.js"; const colorForScore = (score: number): ((text: string) => string) => { if (score >= STATS_SCORE_COLOR_HIGH) return highlighter.success; @@ -74,6 +74,21 @@ const renderProviderTable = (providers: ReadonlyArray): string => { return renderTable(["Tool", "Files", "Score"], rows); }; +const renderCommunityScore = (score: number | null): string => + score === null ? highlighter.dim("n/a") : colorForScore(score)(String(score).padStart(3)); + +const renderCommunityTable = (models: ReadonlyArray): string => { + const rows = models.map((model, index) => [ + String(index + 1), + highlighter.bold(model.model), + colorForProvider(model.harness)(model.harness), + renderCommunityScore(model.communityScore), + // Sample size beside the score so a thinly-sampled model isn't read as authoritative. + highlighter.dim(String(model.runs)), + ]); + return renderTable(["#", "Model", "Tool", "Score", "Runs"], rows); +}; + const calloutScore = (group: GroupStats): string => group.weightedScore !== null ? ` (${group.weightedScore})` : ""; @@ -95,8 +110,15 @@ const renderCallout = (report: StatsReport): string => { return lines.join("\n"); }; -/** Render the leaderboard to a string for the terminal. */ -export const renderStatsReport = (report: StatsReport): string => { +/** + * Render the leaderboard to a string for the terminal. When a `community` board is + * supplied (telemetry on, `/api/stats` reachable), append how these agents rank + * across everyone for context. + */ +export const renderStatsReport = ( + report: StatsReport, + community: CommunityLeaderboard | null = null, +): string => { const scopePhrase = report.scope === "global" ? "across all your projects" : "in this project"; const header = [ highlighter.bold("React Doctor leaderboard"), @@ -128,6 +150,14 @@ export const renderStatsReport = (report: StatsReport): string => { sections.push("", callout); } + if (community && community.models.length > 0) { + sections.push( + "", + highlighter.dim("Community leaderboard (all react-doctor users):"), + renderCommunityTable(community.models.slice(0, STATS_LEADERBOARD_TOP_N)), + ); + } + const notes: string[] = []; if (report.sessionsNonReact > 0) { notes.push(`Skipped ${report.sessionsNonReact} that changed only non-React files.`); diff --git a/packages/react-doctor/src/stats/report-stats-run.ts b/packages/react-doctor/src/stats/report-stats-run.ts new file mode 100644 index 000000000..f4eeb721b --- /dev/null +++ b/packages/react-doctor/src/stats/report-stats-run.ts @@ -0,0 +1,88 @@ +import { gzipSync } from "node:zlib"; +import { FETCH_TIMEOUT_MS, STATS_API_URL } from "@react-doctor/core"; +import * as Option from "effect/Option"; +import * as Schema from "effect/Schema"; +import { STATS_REPORT_SCHEMA_VERSION } from "./constants.js"; +import { toLeaderboardRow } from "./leaderboard-row.js"; +import type { CommunityLeaderboard, StatsReport } from "./types.js"; + +const CommunityModelSchema = Schema.Struct({ + model: Schema.String, + harness: Schema.String, + communityScore: Schema.NullOr(Schema.Number), + runs: Schema.Number, + files: Schema.Number, +}); + +// The endpoint stores the submitted rows and returns the community leaderboard. +// `Schema.Struct` ignores unknown fields, so extra keys (e.g. `stored`) are +// harmless; a missing/!ok/malformed `community` simply drops to `null`. +const StatsResponseSchema = Schema.Struct({ + community: Schema.optional( + Schema.Struct({ + generatedAt: Schema.String, + models: Schema.Array(CommunityModelSchema), + }), + ), +}); + +const parseCommunity = (value: unknown): CommunityLeaderboard | null => { + const decoded = Option.getOrNull(Schema.decodeUnknownOption(StatsResponseSchema)(value)); + return decoded?.community ?? null; +}; + +const isAbortError = (error: unknown): boolean => + error instanceof Error && (error.name === "AbortError" || error.name === "TimeoutError"); + +const describeFailure = (error: unknown): string => { + if (isAbortError(error)) return `timed out after ${FETCH_TIMEOUT_MS / 1000}s`; + if (error instanceof Error && error.message) return error.message; + return String(error); +}; + +// Local-only override so an e2e run can point at a dev server; production uses the +// hardcoded `STATS_API_URL`. Read here (the CLI layer) rather than in core, which +// routes ambient config through `Context.Reference`. +const resolveStatsApiUrl = (): string => + process.env.REACT_DOCTOR_STATS_API_URL?.trim() || STATS_API_URL; + +/** + * Sends the run's leaderboard rows — and only those: `{model, harness, score, + * files}` per model, identical to what the Sentry `stats.leaderboard_row` spans + * carry — to `/api/stats`, which stores them and returns the community + * leaderboard. Best-effort: any failure (offline, timeout, non-2xx, malformed + * body) resolves to `null` so reporting never breaks the rendered board. The + * caller gates this on telemetry being enabled, so it never runs under + * `--no-telemetry` / `--no-score`. + */ +export const reportStatsRun = async (report: StatsReport): Promise => { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + try { + const requestBody = JSON.stringify({ + schemaVersion: STATS_REPORT_SCHEMA_VERSION, + models: report.models.map(toLeaderboardRow), + }); + const response = await fetch(resolveStatsApiUrl(), { + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Encoding": "gzip", + }, + body: gzipSync(requestBody), + signal: controller.signal, + }); + + if (!response.ok) { + console.warn(`[react-doctor] Stats API returned ${response.status} ${response.statusText}`); + return null; + } + + return parseCommunity(await response.json()); + } catch (error) { + console.warn(`[react-doctor] Stats API unreachable (${describeFailure(error)})`); + return null; + } finally { + clearTimeout(timeoutId); + } +}; diff --git a/packages/react-doctor/src/stats/types.ts b/packages/react-doctor/src/stats/types.ts index 12b44c128..9b0548626 100644 --- a/packages/react-doctor/src/stats/types.ts +++ b/packages/react-doctor/src/stats/types.ts @@ -133,3 +133,23 @@ export interface StatsScopeOptions { readonly limit: number; readonly provider?: StatsProvider; } + +/** One model's standing across every `react-doctor stats` run (the community). */ +export interface CommunityModel { + readonly model: string; + readonly harness: string; + /** Files-weighted mean score across all runs (null if undersampled globally). */ + readonly communityScore: number | null; + /** Distinct runs that contributed this model — the sample size behind the score. */ + readonly runs: number; + readonly files: number; +} + +/** + * The global agent leaderboard returned by `/api/stats` in exchange for a run's + * rows — how these agents rank across everyone, so a local board reads in context. + */ +export interface CommunityLeaderboard { + readonly generatedAt: string; + readonly models: ReadonlyArray; +} diff --git a/packages/react-doctor/tests/stats-render.test.ts b/packages/react-doctor/tests/stats-render.test.ts index 199433232..97dc17aa2 100644 --- a/packages/react-doctor/tests/stats-render.test.ts +++ b/packages/react-doctor/tests/stats-render.test.ts @@ -77,4 +77,21 @@ describe("renderStatsReport", () => { expect(output).toContain("Skipped 3 that changed only non-React files"); expect(output).toContain("Skipped 1 that used edits we could not replay"); }); + + it("appends the community leaderboard with sample sizes when one is supplied", () => { + const only = group({ key: "claude/opus" }); + const output = renderStatsReport(report({ models: [only], providers: [only], best: only }), { + generatedAt: "2026-06-22T00:00:00.000Z", + models: [{ model: "opus", harness: "claude", communityScore: 81, runs: 42, files: 900 }], + }); + expect(output).toContain("Community leaderboard (all react-doctor users)"); + expect(output).toContain("81"); + expect(output).toContain("42"); + }); + + it("omits the community section when no board is supplied (offline / --no-telemetry)", () => { + const only = group({}); + const output = renderStatsReport(report({ models: [only], providers: [only], best: only })); + expect(output).not.toContain("Community leaderboard"); + }); }); diff --git a/packages/react-doctor/tests/stats-report-run.test.ts b/packages/react-doctor/tests/stats-report-run.test.ts new file mode 100644 index 000000000..0f11e6cd1 --- /dev/null +++ b/packages/react-doctor/tests/stats-report-run.test.ts @@ -0,0 +1,134 @@ +import { gunzipSync } from "node:zlib"; +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { reportStatsRun } from "../src/stats/report-stats-run.js"; +import type { GroupStats, StatsReport } from "../src/stats/types.js"; + +const group = (overrides: Partial): GroupStats => ({ + key: "claude/claude-sonnet-4-5", + provider: "claude", + sessions: 2, + filesScanned: 8, + unreconstructable: 0, + totalDiagnostics: 3, + errorCount: 1, + warningCount: 2, + diagnosticsPerFile: 0.375, + score: 90, + scoreLabel: "Great", + weightedScore: 84, + // topRules carries rule messages — must NEVER reach the wire payload. + topRules: [{ rule: "react-doctor/no-array-index-key", count: 3 }], + ...overrides, +}); + +const report = (models: GroupStats[]): StatsReport => ({ + scope: "repo", + directory: "/repo", + models, + providers: [group({ key: "claude", provider: "claude" })], + best: models[0] ?? null, + worst: null, + sessionsAnalyzed: 4, + sessionsRanked: 2, + sessionsNonReact: 1, + sessionsUnreconstructable: 0, + generatedAt: "2026-06-22T00:00:00.000Z", +}); + +const stubFetch = (impl: typeof fetch): void => { + vi.stubGlobal("fetch", vi.fn(impl)); +}; + +const decodeBody = (body: BodyInit | null | undefined): unknown => + JSON.parse(gunzipSync(body as Uint8Array).toString("utf8")); + +describe("reportStatsRun", () => { + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + }); + + it("sends only the four code-free leaderboard fields per row — no source, paths, or identity", async () => { + let captured: unknown; + stubFetch(async (_url, init) => { + captured = decodeBody(init?.body); + return new Response(JSON.stringify({ stored: true }), { status: 200 }); + }); + + await reportStatsRun(report([group({})])); + + expect(captured).toEqual({ + schemaVersion: 1, + models: [{ model: "claude-sonnet-4-5", harness: "claude", score: 84, files: 8 }], + }); + // Belt-and-suspenders: the serialized body must carry none of the leaky fields. + const serialized = JSON.stringify(captured); + for (const leak of [ + "topRules", + "message", + "help", + "filePath", + "repo", + "sha", + "directory", + "no-array-index-key", + ]) { + expect(serialized).not.toContain(leak); + } + }); + + it("gzips the body and returns the parsed community leaderboard", async () => { + let encoding: string | undefined; + stubFetch(async (_url, init) => { + encoding = new Headers(init?.headers).get("content-encoding") ?? undefined; + return new Response( + JSON.stringify({ + stored: true, + community: { + generatedAt: "2026-06-22T00:00:00.000Z", + models: [ + { + model: "claude-sonnet-4-5", + harness: "claude", + communityScore: 81, + runs: 42, + files: 900, + }, + ], + }, + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ); + }); + + const community = await reportStatsRun(report([group({})])); + + expect(encoding).toBe("gzip"); + expect(community?.models[0]).toEqual({ + model: "claude-sonnet-4-5", + harness: "claude", + communityScore: 81, + runs: 42, + files: 900, + }); + }); + + it("returns null (never throws) when the API is unreachable", async () => { + vi.spyOn(console, "warn").mockImplementation(() => {}); + stubFetch(async () => { + throw new Error("network unavailable"); + }); + expect(await reportStatsRun(report([group({})]))).toBeNull(); + }); + + it("returns null on a non-2xx response", async () => { + vi.spyOn(console, "warn").mockImplementation(() => {}); + stubFetch(async () => new Response("boom", { status: 500, statusText: "Server Error" })); + expect(await reportStatsRun(report([group({})]))).toBeNull(); + }); + + it("returns null when the response omits a community board", async () => { + stubFetch(async () => new Response(JSON.stringify({ stored: true }), { status: 200 })); + expect(await reportStatsRun(report([group({})]))).toBeNull(); + }); +}); diff --git a/packages/react-doctor/tests/stats-trace.test.ts b/packages/react-doctor/tests/stats-trace.test.ts index 7e7955f98..7e93377d9 100644 --- a/packages/react-doctor/tests/stats-trace.test.ts +++ b/packages/react-doctor/tests/stats-trace.test.ts @@ -3,6 +3,7 @@ import { buildStatsRowAttributes, recordStatsLeaderboard, } from "../src/cli/utils/with-sentry-stats-span.js"; +import { toLeaderboardRow } from "../src/stats/leaderboard-row.js"; import type { GroupStats } from "../src/stats/types.js"; const group = (overrides: Partial): GroupStats => ({ @@ -49,6 +50,31 @@ describe("buildStatsRowAttributes", () => { "codex", ); }); + + it("emits only the four leaderboard attribute keys — never code, paths, or identity", () => { + expect(Object.keys(buildStatsRowAttributes(group({}))).sort()).toEqual([ + "stats.files", + "stats.harness", + "stats.model", + "stats.score", + ]); + }); + + it("derives the span attributes from the same projection the /api/stats payload uses (no drift)", () => { + const sample = group({ + key: "cursor/composer-2.5", + provider: "cursor", + weightedScore: 67, + filesScanned: 12, + }); + const row = toLeaderboardRow(sample); + expect(buildStatsRowAttributes(sample)).toEqual({ + "stats.model": row.model, + "stats.harness": row.harness, + "stats.score": row.score, + "stats.files": row.files, + }); + }); }); describe("recordStatsLeaderboard", () => { From 83a92102a0a2710b14c44adbbe04d3c0e563e42b Mon Sep 17 00:00:00 2001 From: Rayhan Noufal Arayilakath Date: Mon, 22 Jun 2026 20:02:41 -0700 Subject: [PATCH 17/17] refactor(stats): send /api/stats payload as plain JSON, not gzip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The leaderboard payload is a handful of tiny rows ({model, harness, score, files}), so gzip cost more than it saved — it was cargo-culted from the diagnostics-heavy score API. Send plain JSON and drop the Content-Encoding header. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../react-doctor/src/stats/report-stats-run.ts | 18 ++++++++---------- .../tests/stats-report-run.test.ts | 8 +++----- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/packages/react-doctor/src/stats/report-stats-run.ts b/packages/react-doctor/src/stats/report-stats-run.ts index f4eeb721b..9341de7d9 100644 --- a/packages/react-doctor/src/stats/report-stats-run.ts +++ b/packages/react-doctor/src/stats/report-stats-run.ts @@ -1,4 +1,3 @@ -import { gzipSync } from "node:zlib"; import { FETCH_TIMEOUT_MS, STATS_API_URL } from "@react-doctor/core"; import * as Option from "effect/Option"; import * as Schema from "effect/Schema"; @@ -59,17 +58,16 @@ export const reportStatsRun = async (report: StatsReport): Promise controller.abort(), FETCH_TIMEOUT_MS); try { - const requestBody = JSON.stringify({ - schemaVersion: STATS_REPORT_SCHEMA_VERSION, - models: report.models.map(toLeaderboardRow), - }); const response = await fetch(resolveStatsApiUrl(), { method: "POST", - headers: { - "Content-Type": "application/json", - "Content-Encoding": "gzip", - }, - body: gzipSync(requestBody), + headers: { "Content-Type": "application/json" }, + // Plain JSON, not gzip: the payload is a handful of tiny rows, so + // compression would cost more than it saves (unlike the score API's large + // diagnostics body). + body: JSON.stringify({ + schemaVersion: STATS_REPORT_SCHEMA_VERSION, + models: report.models.map(toLeaderboardRow), + }), signal: controller.signal, }); diff --git a/packages/react-doctor/tests/stats-report-run.test.ts b/packages/react-doctor/tests/stats-report-run.test.ts index 0f11e6cd1..50d84112e 100644 --- a/packages/react-doctor/tests/stats-report-run.test.ts +++ b/packages/react-doctor/tests/stats-report-run.test.ts @@ -1,4 +1,3 @@ -import { gunzipSync } from "node:zlib"; import { afterEach, describe, expect, it, vi } from "vite-plus/test"; import { reportStatsRun } from "../src/stats/report-stats-run.js"; import type { GroupStats, StatsReport } from "../src/stats/types.js"; @@ -39,8 +38,7 @@ const stubFetch = (impl: typeof fetch): void => { vi.stubGlobal("fetch", vi.fn(impl)); }; -const decodeBody = (body: BodyInit | null | undefined): unknown => - JSON.parse(gunzipSync(body as Uint8Array).toString("utf8")); +const decodeBody = (body: BodyInit | null | undefined): unknown => JSON.parse(body as string); describe("reportStatsRun", () => { afterEach(() => { @@ -77,7 +75,7 @@ describe("reportStatsRun", () => { } }); - it("gzips the body and returns the parsed community leaderboard", async () => { + it("sends plain JSON (no gzip) and returns the parsed community leaderboard", async () => { let encoding: string | undefined; stubFetch(async (_url, init) => { encoding = new Headers(init?.headers).get("content-encoding") ?? undefined; @@ -103,7 +101,7 @@ describe("reportStatsRun", () => { const community = await reportStatsRun(report([group({})])); - expect(encoding).toBe("gzip"); + expect(encoding).toBeUndefined(); expect(community?.models[0]).toEqual({ model: "claude-sonnet-4-5", harness: "claude",