diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md new file mode 100644 index 000000000..c6da718dc --- /dev/null +++ b/.changeset/stats-agent-leaderboard.md @@ -0,0 +1,15 @@ +--- +"react-doctor": patch +--- + +Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history. + +`stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus Cursor's GUI composer databases and CLI agent stores (`~/.cursor`, `~/.cursor-nightly`) — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo". + +- Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. A scan that errors, is skipped, or whose lint phase fails is dropped rather than counted as zero-diagnostic "clean" code, so un-lintable output can't inflate a model's score. +- Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor. +- Cursor is read from every place it stores chats: the GUI composer database (`state.vscdb`) for both the stable and Nightly builds, and the CLI agent's per-session stores under `~/.cursor` and `~/.cursor-nightly`. Each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2.5`) and a faithful reconstruction of every edited file (full GUI post-edit snapshots; CLI `Write`/`ApplyPatch`/`StrReplace`/`Delete` tool calls replayed against captured reads). A database a running editor holds locked is read via SQLite's `immutable` mode rather than skipped. Attribution falls back to `unknown` only for GUI chats left on the "Auto" model. +- Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work. +- `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows the top models and per-tool tables with a single score bar (the confidence-weighted score) and a best/worst callout. +- Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), reading any Cursor database requires `node:sqlite` (Node 22.13+), and the score requires network access. +- Anonymized Sentry tracing (CLI only, same gating as the scan path — off under `--no-score`, in tests, and for the programmatic API): each run is one `cli.stats` trace with a discover/scan/aggregate latency waterfall, and every ranked model is a queryable `stats.leaderboard_row` span carrying its model, harness, confidence-weighted score, and files scored — so the leaderboard is sliceable in Sentry's Trace Explorer. diff --git a/.github/workflows/publish-any-commit.yml b/.github/workflows/publish-any-commit.yml index f31673c2b..ad9b6d4b4 100644 --- a/.github/workflows/publish-any-commit.yml +++ b/.github/workflows/publish-any-commit.yml @@ -40,7 +40,8 @@ jobs: if pnpm dlx pkg-pr-new publish \ ./packages/react-doctor \ ./packages/oxlint-plugin-react-doctor \ - ./packages/eslint-plugin-react-doctor; then + ./packages/eslint-plugin-react-doctor \ + ./packages/deslop-js; then exit 0 fi diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 8b519dce1..e08f882fd 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -81,6 +81,12 @@ export const SCORE_BAR_WIDTH_CHARS = 50; export const SCORE_API_URL = "https://www.react.doctor/api/score"; +// Sink for the `react-doctor stats` leaderboard rows: the CLI POSTs the same +// code-free `{model, harness, score, files}` rows it reports to Sentry, and the +// endpoint stores them and returns the community leaderboard. Overridable for +// local e2e via `REACT_DOCTOR_STATS_API_URL` (read in the CLI client, not here). +export const STATS_API_URL = "https://www.react.doctor/api/stats"; + export const ENTERPRISE_CONTACT_URL = "https://react.doctor/enterprise"; export const SHARE_BASE_URL = "https://react.doctor/share"; diff --git a/packages/core/src/highlighter.ts b/packages/core/src/highlighter.ts index 2beee1325..c216d3728 100644 --- a/packages/core/src/highlighter.ts +++ b/packages/core/src/highlighter.ts @@ -1,5 +1,13 @@ import pc from "picocolors"; +// picocolors only ships the 16-color palette, so orange (Claude's brand) is a +// 256-color escape built by hand. Honors color-disabled by returning the input. +const ORANGE_ANSI_CODE = 208; +const makeOrange = + (enabled: boolean): ((input: string | number) => string) => + (input) => + enabled ? `\u001b[38;5;${ORANGE_ANSI_CODE}m${input}\u001b[39m` : String(input); + export const highlighter = { error: pc.red, warn: pc.yellow, @@ -7,6 +15,7 @@ export const highlighter = { success: pc.green, dim: pc.dim, gray: pc.gray, + orange: makeOrange(pc.isColorSupported), bold: pc.bold, }; @@ -27,5 +36,6 @@ export const setColorEnabled = (enabled: boolean): void => { highlighter.success = colors.green; highlighter.dim = colors.dim; highlighter.gray = colors.gray; + highlighter.orange = makeOrange(enabled); highlighter.bold = colors.bold; }; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 244ed742d..f8ce93af2 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -88,6 +88,7 @@ export * from "./utils/define-config.js"; export * from "./utils/group-by.js"; export * from "./utils/has-published-fix-recipe.js"; export * from "./utils/is-large-minified-file.js"; +export * from "./utils/is-path-inside.js"; export * from "./utils/list-source-files.js"; export * from "./utils/map-with-concurrency.js"; export * from "./utils/match-glob-pattern.js"; diff --git a/packages/core/src/materialize-source-tree.ts b/packages/core/src/materialize-source-tree.ts index 7d8eb2819..9851608ce 100644 --- a/packages/core/src/materialize-source-tree.ts +++ b/packages/core/src/materialize-source-tree.ts @@ -3,6 +3,7 @@ import fs from "node:fs"; import path from "node:path"; import { STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "./constants.js"; import type { ReactDoctorError } from "./errors.js"; +import { isPathInside } from "./utils/is-path-inside.js"; export interface MaterializedTree { readonly tempDirectory: string; @@ -10,17 +11,6 @@ export interface MaterializedTree { readonly cleanup: () => void; } -/** - * Zip-Slip defense: relative paths come from git (`diff --name-only`), which - * normalizes during ordinary adds, but a crafted index/pack/symlinked tree can - * smuggle `..` segments that escape the temp root. Resolve against the temp dir - * and reject anything that lands outside before writing. - */ -const isPathInsideDirectory = (childAbsolutePath: string, parentAbsolutePath: string): boolean => { - const relative = path.relative(parentAbsolutePath, childAbsolutePath); - return Boolean(relative) && !relative.startsWith("..") && !path.isAbsolute(relative); -}; - /** * Writes a set of source files (supplied by `readContent` — e.g. * `git show :` for a baseline tree, or `git show :` for the @@ -44,7 +34,7 @@ export const materializeSourceTree = (input: { const content = yield* input.readContent(relativePath).pipe(Effect.orElseSucceed(() => null)); if (content === null) continue; const candidateTargetPath = path.resolve(resolvedTempDirectory, relativePath); - if (!isPathInsideDirectory(candidateTargetPath, resolvedTempDirectory)) continue; + if (!isPathInside(candidateTargetPath, resolvedTempDirectory)) continue; yield* Effect.sync(() => { fs.mkdirSync(path.dirname(candidateTargetPath), { recursive: true }); fs.writeFileSync(candidateTargetPath, content); diff --git a/packages/core/src/utils/is-path-inside.ts b/packages/core/src/utils/is-path-inside.ts new file mode 100644 index 000000000..0c6f4f468 --- /dev/null +++ b/packages/core/src/utils/is-path-inside.ts @@ -0,0 +1,28 @@ +import * as path from "node:path"; + +export interface IsPathInsideOptions { + /** When `true`, `childPath` equal to `parentPath` counts as inside. */ + readonly allowSame?: boolean; +} + +/** + * `true` when `childPath` resolves within `parentPath`. By default the parent + * directory itself does not count (the strict zip-slip guard); pass + * `allowSame: true` to treat an exact match as inside (scope membership). + * + * Zip-Slip defense: relative paths can arrive from untrusted sources — a + * crafted git index/pack/symlinked tree, or a reconstructed agent transcript — + * and smuggle `..` segments that escape a temp root. Resolve against the parent + * and reject anything that lands outside before writing. This is the one + * audited copy of that guard, shared across the staged/baseline scan paths and + * the stats reconstruction tree so the two cannot drift. + */ +export const isPathInside = ( + childPath: string, + parentPath: string, + options: IsPathInsideOptions = {}, +): boolean => { + const relative = path.relative(parentPath, childPath); + if (!relative) return Boolean(options.allowSame); + return !relative.startsWith("..") && !path.isAbsolute(relative); +}; diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts new file mode 100644 index 000000000..ddb0e8214 --- /dev/null +++ b/packages/react-doctor/src/cli/commands/stats.ts @@ -0,0 +1,186 @@ +import * as path from "node:path"; +import { resolveScanTarget, type ReactDoctorConfig } from "@react-doctor/core"; +import { aggregateStats } from "../../stats/aggregate-stats.js"; +import { STATS_DEFAULT_SESSION_LIMIT } from "../../stats/constants.js"; +import { discoverSessions } from "../../stats/discover-sessions.js"; +import { renderStatsReport } from "../../stats/render-stats.js"; +import { reportStatsRun } from "../../stats/report-stats-run.js"; +import { runStatsScan } from "../../stats/run-stats-scan.js"; +import type { + CommunityLeaderboard, + StatsProvider, + StatsReport, + StatsScopeOptions, +} from "../../stats/types.js"; +import { METRIC } from "../utils/constants.js"; +import { enableJsonMode } from "../utils/json-mode.js"; +import { recordCount } from "../utils/record-metric.js"; +import { spinner } from "../utils/spinner.js"; +import { + recordStatsLeaderboard, + traceStatsPhase, + withSentryStatsSpan, +} from "../utils/with-sentry-stats-span.js"; + +export interface StatsFlags { + global?: boolean; + since?: string; + limit?: string; + provider?: string; + json?: boolean; + cwd?: string; + // Commander negations from the root program: `--no-score` → `score: false`, + // `--no-telemetry` → `telemetry: false`. Both opt out of the network. + score?: boolean; + telemetry?: boolean; +} + +const VALID_PROVIDERS = new Set(["claude", "codex", "cursor"]); + +const isStatsProvider = (value: string): value is StatsProvider => VALID_PROVIDERS.has(value); + +const parseProvider = (value: string | undefined): StatsProvider | undefined => { + if (value === undefined) return undefined; + if (!isStatsProvider(value)) { + throw new Error(`Unknown provider "${value}". Expected one of: claude, codex, cursor.`); + } + return value; +}; + +const parseSince = (value: string | undefined): Date | undefined => { + if (value === undefined) return undefined; + const parsed = new Date(value); + if (Number.isNaN(parsed.getTime())) { + throw new Error(`Invalid --since date "${value}". Use e.g. 2026-06-01.`); + } + return parsed; +}; + +const parseLimit = (value: string | undefined): number => { + if (value === undefined) return STATS_DEFAULT_SESSION_LIMIT; + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`Invalid --limit "${value}". Use a positive integer, e.g. 200.`); + } + return parsed; +}; + +const resolveTarget = async ( + directory: string, +): Promise<{ root: string; userConfig: ReactDoctorConfig | null }> => { + try { + const target = await resolveScanTarget(directory); + return { root: target.resolvedDirectory, userConfig: target.userConfig }; + } catch { + return { root: path.resolve(directory), userConfig: null }; + } +}; + +export const statsAction = async (flags: StatsFlags): Promise => { + const directory = flags.cwd ?? process.cwd(); + // Register JSON mode up front so any throw (flag parsing, scan, or score API + // failure) is emitted as a structured JSON error by the top-level handler + // instead of plain text — and so incidental logs (e.g. a score-API warning) + // never corrupt the report on stdout. + if (flags.json) enableJsonMode({ compact: false, directory }); + const scope: StatsScopeOptions = { + global: flags.global ?? false, + since: parseSince(flags.since), + limit: parseLimit(flags.limit), + provider: parseProvider(flags.provider), + }; + + const { root, userConfig } = await resolveTarget(directory); + + // `--no-score` / `--no-telemetry` (or `noScore` in config) opt out of the + // network entirely — same signal `resolve-cli-inspect-options` uses. When off, + // we skip the score API (scores show n/a, ranked by diagnostics-per-file) and + // the `/api/stats` report, so a `--no-telemetry` run is fully local. + const telemetryEnabled = !( + flags.score === false || + flags.telemetry === false || + Boolean(userConfig?.noScore) + ); + + // ora renders to stderr; suppress it in JSON mode so the run stays quiet. + // The whole run is one Sentry trace: each phase below is a child span, and + // every ranked model becomes a queryable leaderboard-row span. + const { report, community } = await withSentryStatsSpan<{ + report: StatsReport; + community: CommunityLeaderboard | null; + }>(async (rootSpan) => { + const progress = flags.json ? null : spinner("Looking through your agent history…").start(); + try { + const sessions = await traceStatsPhase("discover sessions", () => + discoverSessions(root, scope, (foundCount) => + progress?.update(`Looking through your agent history… (${foundCount} found)`), + ), + ); + progress?.update("Checking the code each agent wrote…"); + const results = await traceStatsPhase("scan sessions", () => + runStatsScan(sessions, scope.global ? null : root, { + onProgress: (completedCount, totalCount) => + progress?.update( + `Checking the code each agent wrote… (${completedCount}/${totalCount})`, + ), + }), + ); + progress?.update(telemetryEnabled ? "Scoring…" : "Ranking…"); + const aggregated = await traceStatsPhase("aggregate + score", () => + // Skip the score API when telemetry is off: a null scorer leaves every + // score null, and ranking falls back to diagnostics-per-file. + aggregateStats( + results, + userConfig, + telemetryEnabled ? undefined : () => Promise.resolve(null), + ), + ); + + const built: StatsReport = { + scope: scope.global ? "global" : "repo", + directory: root, + models: aggregated.models, + providers: aggregated.providers, + best: aggregated.best, + worst: aggregated.worst, + sessionsAnalyzed: results.length, + sessionsRanked: results.filter((result) => result.filesScanned > 0).length, + sessionsNonReact: results.filter( + (result) => result.filesScanned === 0 && result.reconstructedFiles > 0, + ).length, + sessionsUnreconstructable: results.filter( + (result) => + result.filesScanned === 0 && + result.reconstructedFiles === 0 && + result.unreconstructable > 0, + ).length, + generatedAt: new Date().toISOString(), + }; + recordStatsLeaderboard(built.models, rootSpan); + // Send the same leaderboard rows to our own store and get the community + // board back. Best-effort and telemetry-gated; never blocks the result. + progress?.update("Comparing with the community…"); + const communityBoard = telemetryEnabled + ? await traceStatsPhase("report leaderboard", () => reportStatsRun(built)) + : null; + progress?.succeed("Done."); + return { report: built, community: communityBoard }; + } finally { + progress?.stop(); + } + }); + + recordCount(METRIC.statsRun, 1, { + scope: report.scope, + sessions: report.sessionsAnalyzed, + providers: report.providers.length, + provider: scope.provider ?? "all", + }); + + if (flags.json) { + process.stdout.write(`${JSON.stringify({ schemaVersion: 1, ...report }, null, 2)}\n`); + return; + } + + process.stdout.write(`${renderStatsReport(report, community)}\n`); +}; diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts index f8425d28e..19429e97a 100644 --- a/packages/react-doctor/src/cli/index.ts +++ b/packages/react-doctor/src/cli/index.ts @@ -13,6 +13,7 @@ import { rulesSetAction, rulesUnignoreTagAction, } from "./commands/rules.js"; +import { statsAction } from "./commands/stats.js"; import { versionAction } from "./commands/version.js"; import { whyAction } from "./commands/why.js"; import { applyColorPreference } from "./utils/apply-color-preference.js"; @@ -80,8 +81,12 @@ ${formatExampleLines([ ])} ${highlighter.dim("Configuration:")} - Add a ${highlighter.info("doctor.config.ts")} (or .js/.mjs/.json — or a ${highlighter.info('"reactDoctor"')} key in your package.json) in the project root. - Use ${highlighter.info("react-doctor rules")} to list, explain, and configure rules. CLI flags always override config values. + Add a ${highlighter.info("doctor.config.ts")} (or .js/.mjs/.json — or a ${highlighter.info( + '"reactDoctor"', + )} key in your package.json) in the project root. + Use ${highlighter.info( + "react-doctor rules", + )} to list, explain, and configure rules. CLI flags always override config values. ${highlighter.dim("Feedback & bug reports:")} ${highlighter.info(`${CANONICAL_GITHUB_URL}/issues`)} @@ -103,6 +108,31 @@ ${highlighter.dim("Learn more:")} ${highlighter.info(CANONICAL_GITHUB_URL)} `; +const renderStatsHelpEpilog = (): string => ` +${highlighter.dim("Examples:")} +${formatExampleLines([ + ["react-doctor stats", "rank agents on sessions that touched this repo"], + ["react-doctor stats --global", "rank across every repository on this machine"], + ["react-doctor stats --provider claude", "only Claude Code sessions"], + ["react-doctor stats --since 2026-06-01", "only recent sessions"], + ["react-doctor stats --json", "machine-readable leaderboard"], +])} + +${highlighter.dim("How it works:")} + Reads local agent history (Claude Code + Codex transcripts, the Cursor + composer database), reconstructs the code each model wrote, lints it, and + ranks models + providers by score. + +${highlighter.dim("Caveats:")} + Codex shell-based edits aren't reconstructable (partial coverage). Cursor uses + the GUI composer database (cursor-agent CLI transcripts are not included), and + attribution falls back to "unknown" only for chats left on "Auto". The score + requires network access. + +${highlighter.dim("Learn more:")} + ${highlighter.info(CANONICAL_GITHUB_URL)} +`; + const collectCategoryOption = (value: string, previousValues: string[] | undefined): string[] => [ ...(previousValues ?? []), value, @@ -205,7 +235,7 @@ program .option("-c, --cwd ", "working directory", process.cwd()) .option("--color", "force colored output") .option("--no-color", "disable colored output (also honors NO_COLOR)") - .action((location, options) => whyAction(location, options)); + .action(whyAction); program .command("install") @@ -227,6 +257,24 @@ program .option("--no-color", "disable colored output (also honors NO_COLOR)") .action(versionAction); +program + .command("stats") + .description("Rank agents/models by the React Doctor health of the code they wrote") + .option("--global", "include sessions from every repository (default: this repo only)") + .option("--since ", "only sessions modified on or after this date (e.g. 2026-06-01)") + .option("--limit ", "max sessions to analyze, newest first (default: 200)") + .option("--provider ", "only one source: claude, codex, or cursor") + .option("--json", "output a structured JSON leaderboard") + .option("-c, --cwd ", "working directory", process.cwd()) + .option("--color", "force colored output") + .option("--no-color", "disable colored output (also honors NO_COLOR)") + .addHelpText("after", renderStatsHelpEpilog) + // stats redeclares --json/--cwd/--color, but the root program also exposes + // them as globals (e.g. --json for the default inspect command). Merge via + // optsWithGlobals() so a flag works whether it lands before or after the + // subcommand. + .action((_options, command) => statsAction(command.optsWithGlobals())); + const rules = program .command("rules") .description("List, explain, and configure which React Doctor rules run"); diff --git a/packages/react-doctor/src/cli/utils/build-run-context.ts b/packages/react-doctor/src/cli/utils/build-run-context.ts index d1a7815e5..4d702ee90 100644 --- a/packages/react-doctor/src/cli/utils/build-run-context.ts +++ b/packages/react-doctor/src/cli/utils/build-run-context.ts @@ -55,7 +55,7 @@ export interface RunContext { lintBatchOrdering: "cost" | "arrival"; } -const ROOT_SUBCOMMANDS = new Set(["install", "setup"]); +const ROOT_SUBCOMMANDS = new Set(["install", "setup", "stats"]); // `npm_config_user_agent` looks like "pnpm/9.1.0 npm/? node/v22.0.0 ..."; // the leading token names the package manager that spawned the process. diff --git a/packages/react-doctor/src/cli/utils/constants.ts b/packages/react-doctor/src/cli/utils/constants.ts index 7210b6230..508668180 100644 --- a/packages/react-doctor/src/cli/utils/constants.ts +++ b/packages/react-doctor/src/cli/utils/constants.ts @@ -181,6 +181,9 @@ export const METRIC = { installDependency: "install.dependency", rulesChanged: "rules.changed", rulesQueried: "rules.queried", + // `react-doctor stats`: one counter per run (adoption), with the providers + // discovered and the number of agent sessions scored as attributes. + statsRun: "stats.run", // Editor language server (`react-doctor experimental-lsp`). Each workspace // scan burst is one wide-event span (op `lsp.scan`) plus these metrics. lspSessionStarted: "lsp.session.started", diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts index 88502c907..84cd5d06d 100644 --- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts +++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts @@ -99,12 +99,22 @@ const WHY_FLAG_SPEC: CliFlagSpec = { shortOptionsWithRequiredValues: new Set(["-c"]), }; +// `stats` takes no positionals — just the scope/output options. +const STATS_FLAG_SPEC: CliFlagSpec = { + longOptionsWithoutValues: new Set(["--color", "--global", "--help", "--json", "--no-color"]), + longOptionsWithRequiredValues: new Set(["--cwd", "--limit", "--provider", "--since"]), + longOptionsWithOptionalValues: new Set(), + shortOptionsWithoutValues: new Set(["-h"]), + shortOptionsWithRequiredValues: new Set(["-c"]), +}; + const COMMAND_FLAG_SPECS = new Map([ ["install", INSTALL_FLAG_SPEC], ["setup", INSTALL_FLAG_SPEC], ["version", VERSION_FLAG_SPEC], ["rules", RULES_FLAG_SPEC], ["why", WHY_FLAG_SPEC], + ["stats", STATS_FLAG_SPEC], ]); const isFlagLike = (argument: string): boolean => argument.startsWith("-") && argument !== "-"; diff --git a/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts b/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts new file mode 100644 index 000000000..5254c8207 --- /dev/null +++ b/packages/react-doctor/src/cli/utils/with-sentry-stats-span.ts @@ -0,0 +1,86 @@ +import * as Sentry from "@sentry/node"; +import { isSentryTracingEnabled } from "../../instrument.js"; +import { toLeaderboardRow } from "../../stats/leaderboard-row.js"; +import type { GroupStats } from "../../stats/types.js"; +import { buildSentryScope } from "./build-sentry-scope.js"; +import { toSpanAttributes } from "./to-span-attributes.js"; + +export type SentryStatsSpan = ReturnType | undefined; + +/** + * Runs a `react-doctor stats` invocation inside a Sentry root span so each + * leaderboard run is a first-class trace: the discover/scan/aggregate phases + * become a latency waterfall (see {@link traceStatsPhase}) and every ranked + * model is one queryable child span (see {@link recordStatsLeaderboard}). The + * run snapshot rides along as attributes, exactly like the inspect root span. + * + * A no-op pass-through when Sentry performance tracing is off (Sentry disabled, + * `--no-score`, tests, `SENTRY_TRACES_SAMPLE_RATE=0`): `run` receives `undefined` + * and no transaction is created, so there's no added exit latency. Unlike the + * inspect root span there's no active-run-trace handle to record — stats has no + * Effect pipeline whose spans need parenting and no in-scan crash path to link. + */ +export const withSentryStatsSpan = ( + run: (rootSpan: SentryStatsSpan) => Promise, +): Promise => { + if (!isSentryTracingEnabled()) return run(undefined); + return Sentry.startSpan( + { + name: "react-doctor stats", + op: "cli.stats", + attributes: toSpanAttributes(buildSentryScope().tags), + }, + (rootSpan) => run(rootSpan), + ); +}; + +/** + * Wraps one phase of the stats pipeline in a child span so the trace shows where + * the wall-clock goes (the per-session oxlint scans dominate). A no-op + * pass-through when tracing is off; otherwise parents under the active stats + * root span. + */ +export const traceStatsPhase = (name: string, thunk: () => Promise): Promise => { + if (!isSentryTracingEnabled()) return thunk(); + return Sentry.startSpan({ name, op: "stats.phase" }, () => thunk()); +}; + +/** + * One ranked model's four leaderboard dimensions projected to span attributes, + * built from the shared {@link toLeaderboardRow} projection so the Sentry span and + * the `/api/stats` payload carry identical, code-free data. `null` score is + * dropped rather than coerced. Pure and exported so it's unit-testable without a + * live SDK, mirroring `build-run-event.ts`'s `buildRunEventAttributes`. + */ +export const buildStatsRowAttributes = ( + model: GroupStats, +): Record => { + const row = toLeaderboardRow(model); + return toSpanAttributes({ + "stats.model": row.model, + "stats.harness": row.harness, + "stats.score": row.score, + "stats.files": row.files, + }); +}; + +/** + * Emits one zero-duration child span per ranked model so the leaderboard is + * queryable in Sentry's Trace Explorer / Spans dataset — filter or group by + * `stats.harness`, aggregate `stats.score` / `stats.files`. A no-op when the run + * isn't traced (`rootSpan` absent); otherwise the spans parent under it via the + * active scope. + */ +export const recordStatsLeaderboard = ( + models: ReadonlyArray, + rootSpan: SentryStatsSpan, +): void => { + if (!rootSpan) return; + for (const model of models) { + Sentry.startInactiveSpan({ + name: model.key, + op: "stats.leaderboard_row", + attributes: buildStatsRowAttributes(model), + }).end(); + } +}; diff --git a/packages/react-doctor/src/stats/aggregate-stats.ts b/packages/react-doctor/src/stats/aggregate-stats.ts new file mode 100644 index 000000000..d89fa207d --- /dev/null +++ b/packages/react-doctor/src/stats/aggregate-stats.ts @@ -0,0 +1,228 @@ +import { + calculateScore, + filterDiagnosticsForSurface, + type Diagnostic, + type ReactDoctorConfig, + type ScoreResult, +} from "@react-doctor/core"; +import { + STATS_MIN_FILES_FOR_SCORE, + STATS_SCORE_PRIOR_FILES, + STATS_SCORE_SESSION_FLOOR, + STATS_SCORE_SESSION_PRIOR, + STATS_TOP_RULES_PER_GROUP, +} from "./constants.js"; +import type { GroupStats, SessionScanResult, StatsProvider } from "./types.js"; + +/** Computes a 0-100 score for a diagnostic set. Injectable for tests. */ +export type ScoreComputer = ( + diagnostics: Diagnostic[], + sourceFileCount: number, +) => Promise; + +const defaultScoreComputer: ScoreComputer = (diagnostics, sourceFileCount) => + calculateScore(diagnostics, { metadata: { sourceFileCount } }); + +interface Accumulator { + readonly key: string; + readonly provider: StatsProvider | "mixed"; + sessions: number; + /** Sessions that contributed at least one scanned file — the weighting unit. */ + scoredSessions: number; + filesScanned: number; + unreconstructable: number; + diagnostics: Diagnostic[]; +} + +const upsert = ( + groups: Map, + key: string, + provider: StatsProvider | "mixed", + result: SessionScanResult, +): void => { + let group = groups.get(key); + if (!group) { + group = { + key, + provider, + sessions: 0, + scoredSessions: 0, + filesScanned: 0, + unreconstructable: 0, + diagnostics: [], + }; + groups.set(key, group); + } + group.sessions += 1; + if (result.filesScanned > 0) group.scoredSessions += 1; + group.filesScanned += result.filesScanned; + group.unreconstructable += result.unreconstructable; + group.diagnostics.push(...result.diagnostics); +}; + +/** + * Confidence-weight a raw score with a Bayesian average: pull it toward the + * global mean (`priorScore`) by the group's evidence. Files are the dominant + * sample unit; sessions only lightly discount the file weight (many files from + * one session are one correlated sample), bounded below by a floor so a + * file-rich, session-poor group still counts. Low-evidence groups regress to the + * mean; high-evidence groups keep their raw score. Returns the raw score when + * there's no prior. + */ +const confidenceWeightedScore = ( + rawScore: number | null, + priorScore: number | null, + filesScanned: number, + scoredSessions: number, +): number | null => { + if (rawScore === null) return null; + if (priorScore === null) return rawScore; + const sessionReliability = + STATS_SCORE_SESSION_FLOOR + + (1 - STATS_SCORE_SESSION_FLOOR) * + (scoredSessions / (scoredSessions + STATS_SCORE_SESSION_PRIOR)); + const effectiveFiles = filesScanned * sessionReliability; + return Math.round( + (priorScore * STATS_SCORE_PRIOR_FILES + rawScore * effectiveFiles) / + (STATS_SCORE_PRIOR_FILES + effectiveFiles), + ); +}; + +const topRules = (diagnostics: ReadonlyArray): GroupStats["topRules"] => { + const counts = new Map(); + for (const diagnostic of diagnostics) { + const ruleKey = `${diagnostic.plugin}/${diagnostic.rule}`; + counts.set(ruleKey, (counts.get(ruleKey) ?? 0) + 1); + } + return [...counts.entries()] + .sort((left, right) => right[1] - left[1]) + .slice(0, STATS_TOP_RULES_PER_GROUP) + .map(([rule, count]) => ({ rule, count })); +}; + +const toGroupStats = async ( + accumulator: Accumulator, + userConfig: ReactDoctorConfig | null, + computeScore: ScoreComputer, + priorScore: number | null, +): Promise => { + const errorCount = accumulator.diagnostics.filter( + (diagnostic) => diagnostic.severity === "error", + ).length; + const scoreEligible = accumulator.filesScanned >= STATS_MIN_FILES_FOR_SCORE; + const score = scoreEligible + ? await computeScore( + filterDiagnosticsForSurface(accumulator.diagnostics, "score", userConfig), + accumulator.filesScanned, + ) + : null; + const rawScore = score?.score ?? null; + + return { + key: accumulator.key, + provider: accumulator.provider, + sessions: accumulator.sessions, + filesScanned: accumulator.filesScanned, + unreconstructable: accumulator.unreconstructable, + totalDiagnostics: accumulator.diagnostics.length, + errorCount, + warningCount: accumulator.diagnostics.length - errorCount, + diagnosticsPerFile: + accumulator.filesScanned > 0 ? accumulator.diagnostics.length / accumulator.filesScanned : 0, + score: rawScore, + scoreLabel: score?.label ?? null, + weightedScore: confidenceWeightedScore( + rawScore, + priorScore, + accumulator.filesScanned, + accumulator.scoredSessions, + ), + topRules: topRules(accumulator.diagnostics), + }; +}; + +/** + * Rank groups best-first by the confidence-weighted score; ties (and score-less + * groups) break on fewer diagnostics-per-file. Only groups with enough scanned + * files to be ranked fairly are returned. + */ +const rankGroups = (groups: ReadonlyArray): GroupStats[] => + [...groups] + .filter((group) => group.filesScanned >= STATS_MIN_FILES_FOR_SCORE) + .sort((left, right) => { + if ( + left.weightedScore !== null && + right.weightedScore !== null && + left.weightedScore !== right.weightedScore + ) { + return right.weightedScore - left.weightedScore; + } + if (left.weightedScore !== null && right.weightedScore === null) return -1; + if (left.weightedScore === null && right.weightedScore !== null) return 1; + return left.diagnosticsPerFile - right.diagnosticsPerFile; + }); + +export interface AggregatedStats { + readonly models: GroupStats[]; + readonly providers: GroupStats[]; + readonly best: GroupStats | null; + readonly worst: GroupStats | null; +} + +/** + * Group scan results by model and by provider, compute a 0-100 score per group + * (one Score API call each), and rank them into a leaderboard. + */ +export const aggregateStats = async ( + results: ReadonlyArray, + userConfig: ReactDoctorConfig | null, + computeScore: ScoreComputer = defaultScoreComputer, +): Promise => { + const modelGroups = new Map(); + const providerGroups = new Map(); + for (const result of results) { + upsert( + modelGroups, + `${result.session.provider}/${result.session.model}`, + result.session.provider, + result, + ); + upsert(providerGroups, result.session.provider, result.session.provider, result); + } + + // Global mean across every scanned file — the prior every group regresses + // toward, so a small sample can't top the board on a lucky run. + const totalFiles = results.reduce((sum, result) => sum + result.filesScanned, 0); + const priorScore = + totalFiles >= STATS_MIN_FILES_FOR_SCORE + ? (( + await computeScore( + filterDiagnosticsForSurface( + results.flatMap((result) => result.diagnostics), + "score", + userConfig, + ), + totalFiles, + ) + )?.score ?? null) + : null; + + const models = await Promise.all( + [...modelGroups.values()].map((group) => + toGroupStats(group, userConfig, computeScore, priorScore), + ), + ); + const providers = await Promise.all( + [...providerGroups.values()].map((group) => + toGroupStats(group, userConfig, computeScore, priorScore), + ), + ); + + const rankedModels = rankGroups(models); + return { + models: rankedModels, + providers: rankGroups(providers), + best: rankedModels[0] ?? null, + worst: rankedModels.length > 1 ? rankedModels[rankedModels.length - 1] : null, + }; +}; diff --git a/packages/react-doctor/src/stats/coerce.ts b/packages/react-doctor/src/stats/coerce.ts new file mode 100644 index 000000000..b621bf418 --- /dev/null +++ b/packages/react-doctor/src/stats/coerce.ts @@ -0,0 +1,30 @@ +// Defensive coercion for untrusted transcript JSON. Every agent source parses +// data the user didn't write by hand, so values are narrowed before use rather +// than trusted. Shared by the Claude/Codex/Cursor adapters. + +/** Narrow an unknown to a non-empty string, else undefined. */ +export const asString = (value: unknown): string | undefined => + typeof value === "string" && value.length > 0 ? value : undefined; + +/** Narrow an unknown to a string, preserving the empty string (unlike `asString`). */ +export const asNullableString = (value: unknown): string | null => + typeof value === "string" ? value : null; + +/** Narrow an unknown to a plain object record, else undefined. */ +export const asRecord = (value: unknown): Record | undefined => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : undefined; + +/** Narrow an unknown to an array, else an empty array. */ +export const asArray = (value: unknown): unknown[] => (Array.isArray(value) ? value : []); + +/** Parse a JSON string, returning undefined on non-strings or parse errors. */ +export const parseJson = (raw: string | null | undefined): unknown => { + if (typeof raw !== "string") return undefined; + try { + return JSON.parse(raw); + } catch { + return undefined; + } +}; diff --git a/packages/react-doctor/src/stats/constants.ts b/packages/react-doctor/src/stats/constants.ts new file mode 100644 index 000000000..9718f7e26 --- /dev/null +++ b/packages/react-doctor/src/stats/constants.ts @@ -0,0 +1,65 @@ +// Source file extensions React Doctor can lint. Reconstructed files outside +// this allowlist are dropped before scanning (assets, notebooks, markdown). +export const STATS_LINTABLE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"] as const; + +// Default cap on agent sessions scanned in one `stats` run. Each session is one +// oxlint subprocess, so an unbounded run over a machine's whole history could +// spawn thousands. Bounded by default; raise with `--limit`. +export const STATS_DEFAULT_SESSION_LIMIT = 200; + +// Concurrent per-session lint scans. Each scan pins oxlint to a single worker, +// so this is the real fan-out across CPU cores. +export const STATS_SCAN_CONCURRENCY = 6; + +// Temp-dir prefix for a per-session reconstructed source tree. +export const STATS_TEMP_DIR_PREFIX = "react-doctor-stats-"; + +// Discovery loads each candidate session from disk/SQLite synchronously. Yield +// to the event loop after this many loads so the spinner keeps animating instead +// of looking frozen during the initial history walk. +export const STATS_DISCOVERY_YIELD_INTERVAL = 10; + +// A group (model/provider) needs at least this many scanned files before its +// score is shown; below it the sample is too small to rank fairly. +export const STATS_MIN_FILES_FOR_SCORE = 3; + +// Confidence weighting for the leaderboard. A group's raw 0-100 score is pulled +// toward the global mean by a Bayesian average so a model can't top the board on +// a handful of files. The prior carries this many "average" effective files of +// weight; a group needs more effective files than this before its own score +// dominates the prior. +export const STATS_SCORE_PRIOR_FILES = 25; + +// Sessions discount the file weight (many files from a single session are one +// correlated sample), but only mildly — files are the heavier signal. Session +// reliability ramps from the floor below toward 1 as sessions grow: +// reliability = FLOOR + (1 - FLOOR) * sessions / (sessions + PRIOR) +export const STATS_SCORE_SESSION_PRIOR = 2; + +// Floor on session reliability: a group keeps at least this fraction of its file +// weight no matter how few sessions it has, so sessions can only shave off the +// remaining (1 - FLOOR). Closer to 1 = files dominate even harder. +export const STATS_SCORE_SESSION_FLOOR = 0.6; + +// Models shown in the terminal leaderboard. The full ranking is always in the +// `--json` report; the table stays short so it reads at a glance. +export const STATS_LEADERBOARD_TOP_N = 5; + +// Most-fired rules shown per group in the report. +export const STATS_TOP_RULES_PER_GROUP = 3; + +// Label used when a session does not expose a stable model id (e.g. a Cursor +// composer left on the "Auto" default with no per-bubble model recorded). +export const STATS_UNKNOWN_MODEL = "unknown"; + +// Width (in cells) of the unicode score bar drawn next to each leaderboard score. +export const STATS_SCORE_BAR_WIDTH = 16; + +// Score thresholds that pick the bar color: at or above HIGH is green, at or +// above MEDIUM is yellow, below is red. +export const STATS_SCORE_COLOR_HIGH = 80; +export const STATS_SCORE_COLOR_MEDIUM = 50; + +// Wire-format version for the `/api/stats` request body. Bump when the shape of +// the reported leaderboard rows changes so the endpoint can branch on it. +export const STATS_REPORT_SCHEMA_VERSION = 1; diff --git a/packages/react-doctor/src/stats/cursor-cli-store.ts b/packages/react-doctor/src/stats/cursor-cli-store.ts new file mode 100644 index 000000000..cfcbd43b5 --- /dev/null +++ b/packages/react-doctor/src/stats/cursor-cli-store.ts @@ -0,0 +1,115 @@ +import { asRecord } from "./coerce.js"; +import { openReadOnlySqlite } from "./open-sqlite.js"; + +// The Cursor CLI agent (`~/.cursor` / `~/.cursor-nightly`) stores each chat as +// its own content-addressed SQLite store, distinct from the GUI's single +// `state.vscdb`. The `meta` table holds one row whose `value` is hex-encoded +// JSON (the latest root blob id + last-used model); the `blobs` table maps a +// sha256 id to either a message (JSON: `{ role, content }`) or the binary root +// manifest. The manifest is a protobuf-style flat list of `0x0a 0x20` followed +// by a 32-byte blob id, giving the conversation's messages in order. + +export interface CursorCliMessage { + readonly role: string; + readonly content: unknown; +} + +export interface CursorCliStore { + readonly lastUsedModel: string | null; + readonly messages: CursorCliMessage[]; +} + +const MANIFEST_RECORD_TAG = 0x0a; +const MANIFEST_ID_LENGTH = 0x20; +const MANIFEST_RECORD_LENGTH = 2 + MANIFEST_ID_LENGTH; + +/** + * The conversation's message blob ids, in order, read from the leading run of + * `[0x0a, 0x20, <32-byte id>]` records. Trailing protobuf fields after the run + * are ignored; a manifest that doesn't start with the run yields `[]`. + */ +const parseManifestBlobIds = (manifest: Buffer): string[] => { + const ids: string[] = []; + let offset = 0; + while ( + offset + MANIFEST_RECORD_LENGTH <= manifest.length && + manifest[offset] === MANIFEST_RECORD_TAG && + manifest[offset + 1] === MANIFEST_ID_LENGTH + ) { + ids.push(manifest.subarray(offset + 2, offset + MANIFEST_RECORD_LENGTH).toString("hex")); + offset += MANIFEST_RECORD_LENGTH; + } + return ids; +}; + +/** blobs.data is a BLOB (Uint8Array); meta.value is hex-encoded TEXT. */ +const toBuffer = (value: unknown): Buffer | null => { + if (value instanceof Uint8Array) return Buffer.from(value); + if (typeof value === "string") return Buffer.from(value, "hex"); + return null; +}; + +/** + * Read a Cursor CLI per-session `store.db`: the last-used model and every + * conversation message in order. Returns `null` when the store can't be opened + * (older Node without `node:sqlite`, or an unreadable/locked file) or has no + * usable `meta` row; the messages array is empty when the manifest is missing. + */ +export const readCursorCliStore = (storeDbPath: string): CursorCliStore | null => { + const database = openReadOnlySqlite(storeDbPath); + if (!database) return null; + try { + const metaRow = asRecord(database.prepare("SELECT value FROM meta LIMIT 1").get()); + const metaValue = metaRow && typeof metaRow.value === "string" ? metaRow.value : null; + if (!metaValue) return null; + let meta: Record | undefined; + try { + meta = asRecord(JSON.parse(Buffer.from(metaValue, "hex").toString("utf8"))); + } catch { + return null; + } + if (!meta) return null; + + const lastUsedModel = typeof meta.lastUsedModel === "string" ? meta.lastUsedModel : null; + const latestRootBlobId = + typeof meta.latestRootBlobId === "string" ? meta.latestRootBlobId : null; + if (!latestRootBlobId) return { lastUsedModel, messages: [] }; + + const blobStatement = database.prepare("SELECT data FROM blobs WHERE id = ?"); + const blobBuffer = (id: string): Buffer | null => { + const row = asRecord(blobStatement.get(id)); + return row ? toBuffer(row.data) : null; + }; + + const manifest = blobBuffer(latestRootBlobId); + if (!manifest) return { lastUsedModel, messages: [] }; + + const messages: CursorCliMessage[] = []; + for (const blobId of parseManifestBlobIds(manifest)) { + const raw = blobBuffer(blobId); + if (!raw) continue; + const text = raw.toString("utf8"); + if (!text.startsWith("{")) continue; + let message: Record | undefined; + try { + message = asRecord(JSON.parse(text)); + } catch { + continue; + } + if (message && typeof message.role === "string") { + messages.push({ role: message.role, content: message.content }); + } + } + return { lastUsedModel, messages }; + } catch { + // A locked or unreadable store can throw mid-read; skip it rather than + // sinking the whole stats run. + return null; + } finally { + try { + database.close(); + } catch { + // Already closed or never fully opened — nothing to release. + } + } +}; diff --git a/packages/react-doctor/src/stats/cursor-db.ts b/packages/react-doctor/src/stats/cursor-db.ts new file mode 100644 index 000000000..6ed3e2e2f --- /dev/null +++ b/packages/react-doctor/src/stats/cursor-db.ts @@ -0,0 +1,207 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { asNullableString } from "./coerce.js"; +import { openReadOnlySqlite } from "./open-sqlite.js"; + +// Cursor persists chat state in a single SQLite file. The GUI agent's model +// selection, tool calls (edits), and full post-edit file snapshots all live in +// here — the agent-transcript JSONL files do not record the model at all. +const CURSOR_DB_RELATIVE_PATH = path.join("User", "globalStorage", "state.vscdb"); +const COMPOSER_DATA_PREFIX = "composerData:"; +const BUBBLE_PREFIX = "bubbleId:"; +const COMPOSER_HEADERS_KEY = "composer.composerHeaders"; + +/** One chat in the composer index, with its newest-activity timestamp. */ +interface CursorComposerHeader { + readonly composerId: string; + readonly modifiedMs: number; +} + +/** Read-only accessor over the Cursor composer database. */ +interface CursorDbHandle { + composerHeaders(): CursorComposerHeader[]; + composerValue(composerId: string): string | null; + bubbleValues(composerId: string): string[]; + contentValue(contentId: string): string | null; +} + +export type { CursorComposerHeader, CursorDbHandle }; + +const asRecord = (value: unknown): Record | null => + value && typeof value === "object" && !Array.isArray(value) + ? (value as Record) + : null; + +// Cursor ships a stable build and a "Nightly" build; each keeps its own +// application-support tree, so both are scanned. +const CURSOR_APP_DIR_NAMES = ["Cursor", "Cursor Nightly"]; + +const cursorAppDirs = (): string[] => { + if (process.platform === "darwin") { + const base = path.join(os.homedir(), "Library", "Application Support"); + return CURSOR_APP_DIR_NAMES.map((name) => path.join(base, name)); + } + if (process.platform === "win32") { + const appData = process.env.APPDATA ?? path.join(os.homedir(), "AppData", "Roaming"); + return CURSOR_APP_DIR_NAMES.map((name) => path.join(appData, name)); + } + const configHome = process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), ".config"); + return CURSOR_APP_DIR_NAMES.map((name) => path.join(configHome, name)); +}; + +/** + * Absolute paths to every readable Cursor composer database — the stable and + * Nightly builds each keep their own. A `REACT_DOCTOR_CURSOR_DB` override pins + * a single database (used by tests). Returns `[]` when none exist. + */ +export const resolveCursorDbPaths = (): string[] => { + const override = process.env.REACT_DOCTOR_CURSOR_DB; + const candidates = override + ? [override] + : cursorAppDirs().map((directory) => path.join(directory, CURSOR_DB_RELATIVE_PATH)); + return candidates.filter((candidate) => fs.existsSync(candidate)); +}; + +const modifiedMsFromHeader = (head: Record): number => { + const lastUpdatedAt = head.lastUpdatedAt; + if (typeof lastUpdatedAt === "number") return lastUpdatedAt; + const createdAt = head.createdAt; + if (typeof createdAt === "number") return createdAt; + return 0; +}; + +const parseComposerHeaders = (raw: string): CursorComposerHeader[] => { + let decoded: unknown; + try { + decoded = JSON.parse(raw); + } catch { + return []; + } + const record = asRecord(decoded); + let list: unknown[] = []; + if (Array.isArray(decoded)) { + list = decoded; + } else if (record && Array.isArray(record.allComposers)) { + list = record.allComposers; + } + const headers: CursorComposerHeader[] = []; + for (const entry of list) { + const head = asRecord(entry); + const composerId = head && asNullableString(head.composerId); + if (head && composerId) { + headers.push({ composerId, modifiedMs: modifiedMsFromHeader(head) }); + } + } + return headers; +}; + +// node:sqlite returns each row as an object keyed by column name. +const rowValueString = (row: unknown): string | null => { + const record = asRecord(row); + return record ? asNullableString(record.value) : null; +}; + +// The exclusive upper bound for a key prefix: the prefix with its last byte +// incremented. A `key >= prefix AND key < upper` range always uses the primary +// key index, unlike `LIKE 'prefix%'`, which a BINARY-collated index can't serve +// (and so falls back to a full scan of the multi-GB database). +const prefixUpperBound = (prefix: string): string => + prefix.slice(0, -1) + String.fromCharCode(prefix.charCodeAt(prefix.length - 1) + 1); + +interface OpenDb { + readonly handle: CursorDbHandle; + readonly close: () => void; +} + +const makeHandle = (dbPath: string): OpenDb | null => { + // `node:sqlite` is built in on Node 22.13+/24+; absent on older Node, where + // opening returns null and Cursor stats degrade to "no sessions found". + const database = openReadOnlySqlite(dbPath); + if (!database) return null; + const close = (): void => { + try { + database.close(); + } catch { + // Already closed or never fully opened — nothing to release. + } + }; + + try { + const headersStatement = database.prepare(`SELECT value FROM ItemTable WHERE key = ?`); + const composerStatement = database.prepare(`SELECT value FROM cursorDiskKV WHERE key = ?`); + const bubbleStatement = database.prepare( + `SELECT value FROM cursorDiskKV WHERE key >= ? AND key < ?`, + ); + + const handle: CursorDbHandle = { + composerHeaders(): CursorComposerHeader[] { + try { + const raw = rowValueString(headersStatement.get(COMPOSER_HEADERS_KEY)); + return raw ? parseComposerHeaders(raw) : []; + } catch { + return []; + } + }, + composerValue(composerId: string): string | null { + try { + return rowValueString(composerStatement.get(`${COMPOSER_DATA_PREFIX}${composerId}`)); + } catch { + return null; + } + }, + bubbleValues(composerId: string): string[] { + try { + const prefix = `${BUBBLE_PREFIX}${composerId}:`; + const rows = bubbleStatement.all(prefix, prefixUpperBound(prefix)); + const values: string[] = []; + for (const row of rows) { + const value = rowValueString(row); + if (value) values.push(value); + } + return values; + } catch { + return []; + } + }, + contentValue(contentId: string): string | null { + try { + return rowValueString(composerStatement.get(contentId)); + } catch { + return null; + } + }, + }; + + return { handle, close }; + } catch { + // A locked or unreadable database can throw when statements are prepared; + // skip it rather than sinking the whole stats run. + close(); + return null; + } +}; + +// One open handle per database path — opening is cheap (SQLite memory-maps +// lazily), but reopening per composer during a scan would thrash. The stable +// and Nightly databases can both be open at once, so they're memoized by path. +// `closeCursorDb` closes them for tests (so Windows can unlink the fixture +// file); the CLI relies on process exit. +const openDatabases = new Map void }>(); + +/** Open (and memoize) a composer database by path, or `null` when unavailable. */ +export const openCursorDb = (dbPath: string | null): CursorDbHandle | null => { + if (!dbPath) return null; + const cached = openDatabases.get(dbPath); + if (cached) return cached.handle; + const opened = makeHandle(dbPath); + const entry = { handle: opened?.handle ?? null, close: opened?.close ?? (() => {}) }; + openDatabases.set(dbPath, entry); + return entry.handle; +}; + +/** Close and drop every memoized database (tests open fresh fixture databases). */ +export const closeCursorDb = (): void => { + for (const database of openDatabases.values()) database.close(); + openDatabases.clear(); +}; diff --git a/packages/react-doctor/src/stats/discover-sessions.ts b/packages/react-doctor/src/stats/discover-sessions.ts new file mode 100644 index 000000000..312494c4b --- /dev/null +++ b/packages/react-doctor/src/stats/discover-sessions.ts @@ -0,0 +1,65 @@ +import { isPathInside } from "@react-doctor/core"; +import { STATS_DISCOVERY_YIELD_INTERVAL } from "./constants.js"; +import { STATS_SOURCES } from "./sources/index.js"; +import { resolveEditPaths } from "./reconstruct-files.js"; +import type { AgentSession, StatsScopeOptions } from "./types.js"; + +/** Reports discovery progress: sessions kept so far, candidates scanned so far. */ +export type DiscoveryProgress = (foundCount: number, scannedCount: number) => void; + +const sessionTouchesRepo = (session: AgentSession, repoRoot: string): boolean => { + if (session.cwd && isPathInside(session.cwd, repoRoot, { allowSame: true })) return true; + return resolveEditPaths(session).some((editPath) => + isPathInside(editPath, repoRoot, { allowSame: true }), + ); +}; + +/** + * Enumerate, load, and scope-filter agent sessions. By default only sessions + * that touched `repoRoot` are kept; `--global` lifts that. `--since` and + * `--limit` bound cost (candidates are loaded newest-first, and loading is lazy + * so capped runs never touch the whole history). Sessions with no edits are + * dropped. Loading is synchronous per candidate, so the loop yields to the event + * loop periodically (and reports progress) to keep the spinner responsive. + */ +export const discoverSessions = async ( + repoRoot: string, + scope: StatsScopeOptions, + onProgress?: DiscoveryProgress, +): Promise => { + const candidates = STATS_SOURCES.filter( + (source) => !scope.provider || source.name === scope.provider, + ).flatMap((source) => source.candidates()); + candidates.sort((left, right) => right.modifiedMs - left.modifiedMs); + + const sinceMs = scope.since ? scope.since.getTime() : null; + const sessions: AgentSession[] = []; + let scannedCount = 0; + for (const candidate of candidates) { + // With `--since`, a candidate whose timestamp is unknown (`modifiedMs <= 0`) + // can't be proven on-or-after the cutoff, so it's excluded rather than + // ambiguously kept. Dated candidates are sorted newest-first, so the first + // one older than the cutoff ends the walk. + if (sinceMs !== null) { + if (candidate.modifiedMs <= 0) continue; + if (candidate.modifiedMs < sinceMs) break; + } + + const session = await candidate.load(); + scannedCount += 1; + if ( + session && + session.edits.length > 0 && + (scope.global || sessionTouchesRepo(session, repoRoot)) + ) { + sessions.push(session); + } + + if (scannedCount % STATS_DISCOVERY_YIELD_INTERVAL === 0) { + onProgress?.(sessions.length, scannedCount); + await new Promise((resolve) => setImmediate(resolve)); + } + if (sessions.length >= scope.limit) break; + } + return sessions; +}; diff --git a/packages/react-doctor/src/stats/is-react-source.ts b/packages/react-doctor/src/stats/is-react-source.ts new file mode 100644 index 000000000..f220eeb89 --- /dev/null +++ b/packages/react-doctor/src/stats/is-react-source.ts @@ -0,0 +1,42 @@ +// JSX-bearing extensions imply a React (or React-like) component in this +// product's universe — `.ts`/`.js` cannot hold JSX, so they need a content +// signal instead. +const JSX_EXTENSION_PATTERN = /\.(tsx|jsx)$/; + +// `"use client"` / `"use server"` directives mark React Server Component +// boundaries and server actions — React code even without a `react` import. +const REACT_DIRECTIVE_PATTERN = /^\s*['"]use (?:client|server)['"]/m; + +// Every `from "…"`, `require("…")`, and `import("…")` specifier in a file. +const MODULE_SPECIFIER_PATTERN = /(?:\bfrom\s*|\brequire\(\s*|\bimport\(\s*)['"]([^'"]+)['"]/g; + +// React framework packages that don't carry "react" in their name. +const REACT_FRAMEWORK_ROOTS = ["next", "expo", "gatsby", "@remix-run", "@shopify/hydrogen"]; + +const isReactModuleSpecifier = (specifier: string): boolean => { + const lower = specifier.toLowerCase(); + if (lower === "react" || lower.startsWith("react/") || lower.startsWith("react-")) return true; + // Scoped/nested React packages: `@tanstack/react-query`, `@react-navigation/native`, … + if (lower.includes("/react-") || lower.endsWith("/react") || lower.startsWith("@react-")) { + return true; + } + if (lower === "preact" || lower.startsWith("preact/")) return true; + return REACT_FRAMEWORK_ROOTS.some((root) => lower === root || lower.startsWith(`${root}/`)); +}; + +/** + * Whether a reconstructed file is actually React code worth ranking. React + * Doctor's rules are React-specific, so a model's plain backend/util/config + * files would otherwise pad its file count and dilute its diagnostics-per-file + * — skewing the leaderboard toward whoever wrote the most non-React code. A + * file qualifies when it has a JSX extension, a `use client`/`use server` + * directive, or imports from the React ecosystem. + */ +export const isReactSourceFile = (filePath: string, content: string): boolean => { + if (JSX_EXTENSION_PATTERN.test(filePath)) return true; + if (REACT_DIRECTIVE_PATTERN.test(content)) return true; + for (const match of content.matchAll(MODULE_SPECIFIER_PATTERN)) { + if (isReactModuleSpecifier(match[1])) return true; + } + return false; +}; diff --git a/packages/react-doctor/src/stats/leaderboard-row.ts b/packages/react-doctor/src/stats/leaderboard-row.ts new file mode 100644 index 000000000..ff9e63d45 --- /dev/null +++ b/packages/react-doctor/src/stats/leaderboard-row.ts @@ -0,0 +1,26 @@ +import { modelLabel } from "./model-label.js"; +import type { GroupStats } from "./types.js"; + +/** + * One leaderboard row reduced to its four shareable dimensions: the bare model + * name, the harness (the agent tool that ran it), the confidence-weighted 0-100 + * score (`null` when undersampled), and the React files scored. + * + * This is the single source for everything the stats feature reports off the + * machine — the Sentry `stats.leaderboard_row` span attributes and the + * `/api/stats` payload both project from it, so the two sinks can never drift and + * both stay code-free: no source text, paths, or repo identity ever appears here. + */ +export interface LeaderboardRow { + readonly model: string; + readonly harness: string; + readonly score: number | null; + readonly files: number; +} + +export const toLeaderboardRow = (group: GroupStats): LeaderboardRow => ({ + model: modelLabel(group), + harness: group.provider, + score: group.weightedScore, + files: group.filesScanned, +}); diff --git a/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts new file mode 100644 index 000000000..7dbcc9397 --- /dev/null +++ b/packages/react-doctor/src/stats/materialize-reconstructed-tree.ts @@ -0,0 +1,66 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { isPathInside, STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "@react-doctor/core"; +import { STATS_TEMP_DIR_PREFIX } from "./constants.js"; +import type { ReconstructedFile } from "./types.js"; + +export interface MaterializedReconstruction { + readonly tempDirectory: string; + /** `realpath` of `tempDirectory` (macOS symlinks `/var` → `/private/var`). */ + readonly realTempDirectory: string; + readonly relativePaths: string[]; + readonly cleanup: () => void; +} + +/** + * Write reconstructed file content into a fresh temp tree mirroring the scan + * layout, copying the project-config files (`tsconfig` / `package.json` / + * `doctor.config` / oxlintrc) from `scanRoot` so oxlint resolves the same + * config it would in the real project. In-memory sibling of core's + * `materializeSourceTree` (which reads from git); the zip-slip guard mirrors it. + */ +export const materializeReconstructedTree = ( + scanRoot: string, + files: ReadonlyArray, +): MaterializedReconstruction => { + const tempDirectory = fs.mkdtempSync(path.join(os.tmpdir(), STATS_TEMP_DIR_PREFIX)); + const resolvedTempDirectory = path.resolve(tempDirectory); + const relativePaths: string[] = []; + + for (const file of files) { + const targetPath = path.resolve(resolvedTempDirectory, file.relativePath); + if (!isPathInside(targetPath, resolvedTempDirectory)) continue; + fs.mkdirSync(path.dirname(targetPath), { recursive: true }); + fs.writeFileSync(targetPath, file.content); + relativePaths.push(file.relativePath); + } + + for (const configFilename of STAGED_FILES_PROJECT_CONFIG_FILENAMES) { + const sourcePath = path.join(scanRoot, configFilename); + const targetPath = path.join(resolvedTempDirectory, configFilename); + if (fs.existsSync(sourcePath) && !fs.existsSync(targetPath)) { + fs.cpSync(sourcePath, targetPath, { recursive: true }); + } + } + + let realTempDirectory = resolvedTempDirectory; + try { + realTempDirectory = fs.realpathSync(resolvedTempDirectory); + } catch { + // realpath unavailable (broken symlink, permission); keep the resolved path. + } + + return { + tempDirectory: resolvedTempDirectory, + realTempDirectory, + relativePaths, + cleanup: () => { + try { + fs.rmSync(resolvedTempDirectory, { recursive: true, force: true }); + } catch { + // Best-effort; the OS tempdir reaper eventually runs. + } + }, + }; +}; diff --git a/packages/react-doctor/src/stats/model-label.ts b/packages/react-doctor/src/stats/model-label.ts new file mode 100644 index 000000000..2fd82ef69 --- /dev/null +++ b/packages/react-doctor/src/stats/model-label.ts @@ -0,0 +1,11 @@ +import type { GroupStats } from "./types.js"; + +/** + * The bare model name for a leaderboard row — strips the `provider/` prefix that + * keys model groups, so `claude/claude-sonnet-4-5` reads as `claude-sonnet-4-5`. + * Provider groups (whose key is just the provider) pass through unchanged. + */ +export const modelLabel = (group: GroupStats): string => { + const slash = group.key.indexOf("/"); + return slash === -1 ? group.key : group.key.slice(slash + 1); +}; diff --git a/packages/react-doctor/src/stats/most-common-key.ts b/packages/react-doctor/src/stats/most-common-key.ts new file mode 100644 index 000000000..7319e6781 --- /dev/null +++ b/packages/react-doctor/src/stats/most-common-key.ts @@ -0,0 +1,15 @@ +/** + * The map key with the highest count, or undefined when the map is empty. Used + * to pick a session's dominant model from per-message model tallies. + */ +export const mostCommonKey = (counts: ReadonlyMap): string | undefined => { + let bestKey: string | undefined; + let bestCount = 0; + for (const [key, count] of counts) { + if (count > bestCount) { + bestKey = key; + bestCount = count; + } + } + return bestKey; +}; diff --git a/packages/react-doctor/src/stats/open-sqlite.ts b/packages/react-doctor/src/stats/open-sqlite.ts new file mode 100644 index 000000000..f8abd1cb6 --- /dev/null +++ b/packages/react-doctor/src/stats/open-sqlite.ts @@ -0,0 +1,66 @@ +import { createRequire } from "node:module"; + +const nodeRequire = createRequire(import.meta.url); + +/** Minimal read surface over a `node:sqlite` database (prepared statements). */ +export interface ReadOnlySqliteDatabase { + prepare(sql: string): { + get(...params: unknown[]): unknown; + all(...params: unknown[]): unknown[]; + }; + close(): void; +} + +// A read-only `file:` URI with `immutable=1`. A running editor (Cursor/VSCode) +// holds its SQLite store locked, so a plain read-only connection opens but then +// throws "database is locked" on first read; `immutable=1` tells SQLite the file +// won't change and skips all locking, reading the live database (it ignores any +// uncommitted WAL, which is fine for a history scan). Forward-slashed + encoded +// for the URI grammar so paths with spaces (e.g. "Cursor Nightly") parse. +const toImmutableFileUri = (databasePath: string): string => { + const forwardSlashed = databasePath.replace(/\\/g, "/"); + const absolute = forwardSlashed.startsWith("/") ? forwardSlashed : `/${forwardSlashed}`; + // Encode each segment so reserved URI characters (`?`, `#`, …) inside a path + // can't be parsed as the query/fragment delimiter; the `/` separators stay. + const encoded = absolute.split("/").map(encodeURIComponent).join("/"); + return `file:${encoded}?immutable=1`; +}; + +/** + * Open a SQLite database read-only via the built-in `node:sqlite`, or `null` + * when it is unavailable (Node < 22.13, where the require throws) or the file + * cannot be read. A plain read-only open is tried first (it sees the WAL, so + * historical stores read accurately); if its probe trips the lock a running + * editor holds, an `immutable` open takes over. Shared by the Cursor GUI + * composer database and the Cursor CLI per-session store. + */ +export const openReadOnlySqlite = (databasePath: string): ReadOnlySqliteDatabase | null => { + let DatabaseSync: new ( + location: string, + options: { readOnly: boolean }, + ) => ReadOnlySqliteDatabase; + try { + ({ DatabaseSync } = nodeRequire("node:sqlite")); + } catch { + return null; + } + const locations = [databasePath, toImmutableFileUri(databasePath)]; + for (const location of locations) { + let database: ReadOnlySqliteDatabase | undefined; + try { + database = new DatabaseSync(location, { readOnly: true }); + // A read-only open succeeds even against a locked database; the lock only + // surfaces on the first page read. Probe with a schema read so a locked + // store falls through to the immutable strategy instead of throwing later. + database.prepare("SELECT name FROM sqlite_master LIMIT 1").get(); + return database; + } catch { + try { + database?.close(); + } catch { + // Nothing to release. + } + } + } + return null; +}; diff --git a/packages/react-doctor/src/stats/parse-apply-patch.ts b/packages/react-doctor/src/stats/parse-apply-patch.ts new file mode 100644 index 000000000..e44033eb4 --- /dev/null +++ b/packages/react-doctor/src/stats/parse-apply-patch.ts @@ -0,0 +1,129 @@ +export type PatchOpType = "add" | "update" | "delete"; + +export interface PatchOp { + readonly type: PatchOpType; + readonly path: string; + /** For `add`: the full file content lines (without the leading `+`). */ + readonly addedLines?: string[]; + /** For `update`: the raw hunk body lines (` `/`+`/`-`/`@@`). */ + readonly hunkLines?: string[]; + /** For `update` with a `*** Move to:` directive. */ + readonly movePath?: string; +} + +const FILE_HEADER = /^\*\*\* (Add|Update|Delete) File: (.+)$/; +const MOVE_HEADER = /^\*\*\* Move to: (.+)$/; +const KIND_TO_OP_TYPE: Record = { + add: "add", + update: "update", + delete: "delete", +}; + +/** + * Parse a Codex / Cursor `apply_patch` envelope (`*** Begin Patch` … + * `*** End Patch`) into per-file operations. The format carries no line + * numbers, so `update` ops keep their raw hunk body for a fuzzy line-search + * apply at reconstruction time. Returns `[]` when no file header is found. + */ +export const parseApplyPatch = (patchText: string): PatchOp[] => { + const lines = patchText.split("\n"); + const ops: PatchOp[] = []; + let current: { type: PatchOpType; path: string; movePath?: string; body: string[] } | null = null; + + const flush = (): void => { + if (!current) return; + if (current.type === "add") { + ops.push({ + type: "add", + path: current.path, + addedLines: current.body + .filter((line) => line.startsWith("+")) + .map((line) => line.slice(1)), + }); + } else if (current.type === "update") { + ops.push({ + type: "update", + path: current.path, + hunkLines: current.body, + ...(current.movePath ? { movePath: current.movePath } : {}), + }); + } else { + ops.push({ type: "delete", path: current.path }); + } + current = null; + }; + + for (const line of lines) { + if (line.startsWith("*** Begin Patch") || line.startsWith("*** End Patch")) continue; + const header = FILE_HEADER.exec(line); + if (header) { + flush(); + const kind = header[1].toLowerCase(); + current = { + type: KIND_TO_OP_TYPE[kind], + path: header[2].trim(), + body: [], + }; + continue; + } + if (!current) continue; + const move = MOVE_HEADER.exec(line); + if (move && current.type === "update") { + current.movePath = move[1].trim(); + continue; + } + current.body.push(line); + } + flush(); + return ops; +}; + +/** + * Apply an `update` hunk body to `baseContent` using a forward line search + * (the apply_patch format omits line numbers). Returns the new content, or + * `null` when a context / removed line can't be located — the caller then + * treats the file as unreconstructable rather than linting wrong content. + */ +export const applyUpdateHunks = (baseContent: string, hunkLines: string[]): string | null => { + const baseLines = baseContent.split("\n"); + const result: string[] = []; + let cursor = 0; + + const consumeUntil = (text: string): boolean => { + for (let index = cursor; index < baseLines.length; index += 1) { + if (baseLines[index] === text) { + for (let copy = cursor; copy < index; copy += 1) result.push(baseLines[copy]); + cursor = index + 1; + return true; + } + } + return false; + }; + + for (const line of hunkLines) { + if (line.startsWith("@@")) continue; + if (line === "") { + // A bare blank line in a hunk is an unchanged empty context line. + if (!consumeUntil("")) return null; + result.push(""); + continue; + } + const tag = line[0]; + const text = line.slice(1); + if (tag === " ") { + if (!consumeUntil(text)) return null; + result.push(text); + } else if (tag === "-") { + if (!consumeUntil(text)) return null; + } else if (tag === "+") { + result.push(text); + } else { + // Unknown prefix — treat as context to stay lenient. + if (!consumeUntil(line)) return null; + result.push(line); + } + } + + for (let index = cursor; index < baseLines.length; index += 1) result.push(baseLines[index]); + return result.join("\n"); +}; diff --git a/packages/react-doctor/src/stats/reconstruct-files.ts b/packages/react-doctor/src/stats/reconstruct-files.ts new file mode 100644 index 000000000..3e39b69db --- /dev/null +++ b/packages/react-doctor/src/stats/reconstruct-files.ts @@ -0,0 +1,152 @@ +import * as path from "node:path"; +import { STATS_LINTABLE_EXTENSIONS } from "./constants.js"; +import { applyUpdateHunks, parseApplyPatch } from "./parse-apply-patch.js"; +import type { AgentSession, ReconstructedContent, SessionReconstruction } from "./types.js"; + +export const isLintablePath = (filePath: string): boolean => + STATS_LINTABLE_EXTENSIONS.some((extension) => filePath.endsWith(extension)); + +const resolveAgainstCwd = (rawPath: string, cwd: string | null): string | null => { + if (!rawPath) return null; + if (path.isAbsolute(rawPath)) return path.normalize(rawPath); + if (!cwd) return null; + return path.resolve(cwd, rawPath); +}; + +/** + * Apply a string-replace edit, or `null` when it can't be applied faithfully + * (the `oldString` isn't in our buffer, so we're out of sync with what the + * model actually edited). An empty `oldString` is a no-op rather than a failure. + */ +const applyStringReplace = ( + source: string, + oldString: string, + newString: string, + replaceAll: boolean, +): string | null => { + if (oldString === "") return source; + if (!source.includes(oldString)) return null; + return replaceAll + ? source.split(oldString).join(newString) + : source.replace(oldString, newString); +}; + +/** + * Every absolute file path an agent's edits reference (resolving relatives + * against the session cwd and parsing apply_patch envelopes). Used by scope + * filtering to decide whether a session touched the current repo. + */ +export const resolveEditPaths = (session: AgentSession): string[] => { + const paths = new Set(); + const add = (rawPath: string): void => { + const resolved = resolveAgainstCwd(rawPath, session.cwd); + if (resolved) paths.add(resolved); + }; + for (const edit of session.edits) { + if (edit.kind === "patch") { + for (const operation of parseApplyPatch(edit.patch ?? "")) add(operation.path); + } else { + add(edit.path); + } + } + return [...paths]; +}; + +/** + * Replay a session's edits into the final content of each touched file, as the + * model left it (Tier 2). Only files with a faithful base (a full write, an + * apply_patch `Add File`, or a captured read) and a lintable extension are + * emitted; anything edited without a faithful base is reported as + * `unreconstructable` and never linted with wrong content. + */ +export const reconstructSession = (session: AgentSession): SessionReconstruction => { + // `string` = current content, `null` = deleted. Absent = no faithful base yet. + const buffers = new Map(); + const touchedLintable = new Set(); + + for (const read of session.reads) { + const resolved = resolveAgainstCwd(read.path, session.cwd); + if (resolved) buffers.set(resolved, read.content); + } + + const applyPatchOps = (patchText: string): void => { + for (const operation of parseApplyPatch(patchText)) { + const resolved = resolveAgainstCwd(operation.path, session.cwd); + if (!resolved) continue; + if (isLintablePath(resolved)) touchedLintable.add(resolved); + if (operation.type === "add") { + const lines = operation.addedLines ?? []; + buffers.set(resolved, lines.length > 0 ? `${lines.join("\n")}\n` : ""); + } else if (operation.type === "delete") { + buffers.set(resolved, null); + } else { + const base = buffers.get(resolved); + if (typeof base !== "string") continue; + const applied = applyUpdateHunks(base, operation.hunkLines ?? []); + if (applied === null) { + // The hunk didn't match our base, so our buffer is out of sync with + // what the model actually edited. Drop it to "no faithful base" rather + // than emit stale content as if it were the reconstructed result. + buffers.delete(resolved); + continue; + } + const movedTo = operation.movePath && resolveAgainstCwd(operation.movePath, session.cwd); + if (movedTo) { + buffers.set(resolved, null); + buffers.set(movedTo, applied); + if (isLintablePath(movedTo)) touchedLintable.add(movedTo); + } else { + buffers.set(resolved, applied); + } + } + } + }; + + for (const edit of session.edits) { + if (edit.kind === "patch") { + applyPatchOps(edit.patch ?? ""); + continue; + } + const resolved = resolveAgainstCwd(edit.path, session.cwd); + if (!resolved) continue; + if (isLintablePath(resolved)) touchedLintable.add(resolved); + if (edit.kind === "write") { + buffers.set(resolved, edit.content ?? edit.resultContent ?? ""); + } else if (edit.kind === "delete") { + buffers.set(resolved, null); + } else { + const base = buffers.get(resolved); + if (typeof base !== "string") continue; + const applied = applyStringReplace( + base, + edit.oldString ?? "", + edit.newString ?? "", + edit.replaceAll ?? false, + ); + // The oldString wasn't in our buffer, so it's out of sync with what the + // model actually edited. Drop to "no faithful base" rather than lint stale + // content — mirrors the apply_patch hunk-mismatch handling above. + if (applied === null) { + buffers.delete(resolved); + continue; + } + buffers.set(resolved, applied); + } + } + + const files: ReconstructedContent[] = []; + const unreconstructable: string[] = []; + for (const absolutePath of touchedLintable) { + const content = buffers.get(absolutePath); + if (typeof content === "string") { + files.push({ absolutePath, content }); + } else if (content === undefined) { + // Edited but never had a faithful base (e.g. a replace on unread content, + // or a Codex shell edit we couldn't capture). Deleted files (null) are + // intentional removals, not coverage gaps. + unreconstructable.push(absolutePath); + } + } + + return { session, files, unreconstructable }; +}; diff --git a/packages/react-doctor/src/stats/render-stats.ts b/packages/react-doctor/src/stats/render-stats.ts new file mode 100644 index 000000000..0cc787a44 --- /dev/null +++ b/packages/react-doctor/src/stats/render-stats.ts @@ -0,0 +1,173 @@ +import { highlighter } from "@react-doctor/core"; +import { + STATS_LEADERBOARD_TOP_N, + STATS_SCORE_BAR_WIDTH, + STATS_SCORE_COLOR_HIGH, + STATS_SCORE_COLOR_MEDIUM, +} from "./constants.js"; +import { modelLabel } from "./model-label.js"; +import type { CommunityLeaderboard, CommunityModel, GroupStats, StatsReport } from "./types.js"; + +const colorForScore = (score: number): ((text: string) => string) => { + if (score >= STATS_SCORE_COLOR_HIGH) return highlighter.success; + if (score >= STATS_SCORE_COLOR_MEDIUM) return highlighter.warn; + return highlighter.error; +}; + +const colorForProvider = (provider: string): ((text: string) => string) => { + if (provider === "cursor") return highlighter.gray; + if (provider === "claude") return highlighter.orange; + if (provider === "codex") return highlighter.info; + return highlighter.dim; +}; + +const renderScore = (group: GroupStats): string => { + if (group.weightedScore === null) return highlighter.dim("n/a"); + const filledCount = Math.max( + 0, + Math.min( + STATS_SCORE_BAR_WIDTH, + Math.round((group.weightedScore / 100) * STATS_SCORE_BAR_WIDTH), + ), + ); + const paint = colorForScore(group.weightedScore); + const bar = + paint("█".repeat(filledCount)) + + highlighter.dim("░".repeat(STATS_SCORE_BAR_WIDTH - filledCount)); + return `${bar} ${paint(String(group.weightedScore).padStart(3))}`; +}; + +const ANSI_PATTERN = new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, "g"); + +const stripAnsi = (text: string): string => text.replace(ANSI_PATTERN, ""); + +const renderTable = (headers: string[], rows: string[][]): string => { + const widths = headers.map((header, columnIndex) => + Math.max(header.length, ...rows.map((row) => stripAnsi(row[columnIndex] ?? "").length)), + ); + const pad = (cell: string, columnIndex: number): string => { + const visibleLength = stripAnsi(cell).length; + return cell + " ".repeat(Math.max(0, widths[columnIndex] - visibleLength)); + }; + const headerLine = headers.map((header, index) => highlighter.dim(pad(header, index))).join(" "); + const bodyLines = rows.map((row) => row.map((cell, index) => pad(cell, index)).join(" ")); + return [headerLine, ...bodyLines].join("\n"); +}; + +const renderModelTable = (models: ReadonlyArray): string => { + const rows = models.map((group, index) => [ + String(index + 1), + highlighter.bold(modelLabel(group)), + colorForProvider(group.provider)(group.provider), + String(group.filesScanned), + renderScore(group), + ]); + return renderTable(["#", "Model", "Tool", "Files", "Score"], rows); +}; + +const renderProviderTable = (providers: ReadonlyArray): string => { + const rows = providers.map((group) => [ + highlighter.bold(colorForProvider(group.provider)(group.provider)), + String(group.filesScanned), + renderScore(group), + ]); + return renderTable(["Tool", "Files", "Score"], rows); +}; + +const renderCommunityScore = (score: number | null): string => + score === null ? highlighter.dim("n/a") : colorForScore(score)(String(score).padStart(3)); + +const renderCommunityTable = (models: ReadonlyArray): string => { + const rows = models.map((model, index) => [ + String(index + 1), + highlighter.bold(model.model), + colorForProvider(model.harness)(model.harness), + renderCommunityScore(model.communityScore), + // Sample size beside the score so a thinly-sampled model isn't read as authoritative. + highlighter.dim(String(model.runs)), + ]); + return renderTable(["#", "Model", "Tool", "Score", "Runs"], rows); +}; + +const calloutScore = (group: GroupStats): string => + group.weightedScore !== null ? ` (${group.weightedScore})` : ""; + +const renderCallout = (report: StatsReport): string => { + if (!report.best) return ""; + const lines: string[] = []; + lines.push( + `${highlighter.success("Best")}: ${highlighter.bold( + modelLabel(report.best), + )}${calloutScore(report.best)}`, + ); + if (report.worst && report.worst.key !== report.best.key) { + lines.push( + `${highlighter.error("Worst")}: ${highlighter.bold( + modelLabel(report.worst), + )}${calloutScore(report.worst)}`, + ); + } + return lines.join("\n"); +}; + +/** + * Render the leaderboard to a string for the terminal. When a `community` board is + * supplied (telemetry on, `/api/stats` reachable), append how these agents rank + * across everyone for context. + */ +export const renderStatsReport = ( + report: StatsReport, + community: CommunityLeaderboard | null = null, +): string => { + const scopePhrase = report.scope === "global" ? "across all your projects" : "in this project"; + const header = [ + highlighter.bold("React Doctor leaderboard"), + highlighter.dim( + `Which agent writes the cleanest React code ${scopePhrase}. Higher is better, 0 to 100.`, + ), + ].join("\n"); + + if (report.models.length === 0) { + return [ + header, + "", + highlighter.dim( + "Nothing to rank yet. The edits touched only non-React files, were too few, or could not be replayed.", + ), + ].join("\n"); + } + + const shownModels = report.models.slice(0, STATS_LEADERBOARD_TOP_N); + const hiddenCount = report.models.length - shownModels.length; + const sections = [header, "", renderModelTable(shownModels)]; + if (hiddenCount > 0) { + sections.push(highlighter.dim(`+ ${hiddenCount} more (see --json for the full ranking).`)); + } + sections.push("", highlighter.dim("By tool:"), renderProviderTable(report.providers)); + + const callout = renderCallout(report); + if (callout) { + sections.push("", callout); + } + + if (community && community.models.length > 0) { + sections.push( + "", + highlighter.dim("Community leaderboard (all react-doctor users):"), + renderCommunityTable(community.models.slice(0, STATS_LEADERBOARD_TOP_N)), + ); + } + + const notes: string[] = []; + if (report.sessionsNonReact > 0) { + notes.push(`Skipped ${report.sessionsNonReact} that changed only non-React files.`); + } + if (report.sessionsUnreconstructable > 0) { + notes.push(`Skipped ${report.sessionsUnreconstructable} that used edits we could not replay.`); + } + if (notes.length > 0) { + sections.push("", ...notes.map((note) => highlighter.dim(note))); + } + + return sections.join("\n"); +}; diff --git a/packages/react-doctor/src/stats/report-stats-run.ts b/packages/react-doctor/src/stats/report-stats-run.ts new file mode 100644 index 000000000..9341de7d9 --- /dev/null +++ b/packages/react-doctor/src/stats/report-stats-run.ts @@ -0,0 +1,86 @@ +import { FETCH_TIMEOUT_MS, STATS_API_URL } from "@react-doctor/core"; +import * as Option from "effect/Option"; +import * as Schema from "effect/Schema"; +import { STATS_REPORT_SCHEMA_VERSION } from "./constants.js"; +import { toLeaderboardRow } from "./leaderboard-row.js"; +import type { CommunityLeaderboard, StatsReport } from "./types.js"; + +const CommunityModelSchema = Schema.Struct({ + model: Schema.String, + harness: Schema.String, + communityScore: Schema.NullOr(Schema.Number), + runs: Schema.Number, + files: Schema.Number, +}); + +// The endpoint stores the submitted rows and returns the community leaderboard. +// `Schema.Struct` ignores unknown fields, so extra keys (e.g. `stored`) are +// harmless; a missing/!ok/malformed `community` simply drops to `null`. +const StatsResponseSchema = Schema.Struct({ + community: Schema.optional( + Schema.Struct({ + generatedAt: Schema.String, + models: Schema.Array(CommunityModelSchema), + }), + ), +}); + +const parseCommunity = (value: unknown): CommunityLeaderboard | null => { + const decoded = Option.getOrNull(Schema.decodeUnknownOption(StatsResponseSchema)(value)); + return decoded?.community ?? null; +}; + +const isAbortError = (error: unknown): boolean => + error instanceof Error && (error.name === "AbortError" || error.name === "TimeoutError"); + +const describeFailure = (error: unknown): string => { + if (isAbortError(error)) return `timed out after ${FETCH_TIMEOUT_MS / 1000}s`; + if (error instanceof Error && error.message) return error.message; + return String(error); +}; + +// Local-only override so an e2e run can point at a dev server; production uses the +// hardcoded `STATS_API_URL`. Read here (the CLI layer) rather than in core, which +// routes ambient config through `Context.Reference`. +const resolveStatsApiUrl = (): string => + process.env.REACT_DOCTOR_STATS_API_URL?.trim() || STATS_API_URL; + +/** + * Sends the run's leaderboard rows — and only those: `{model, harness, score, + * files}` per model, identical to what the Sentry `stats.leaderboard_row` spans + * carry — to `/api/stats`, which stores them and returns the community + * leaderboard. Best-effort: any failure (offline, timeout, non-2xx, malformed + * body) resolves to `null` so reporting never breaks the rendered board. The + * caller gates this on telemetry being enabled, so it never runs under + * `--no-telemetry` / `--no-score`. + */ +export const reportStatsRun = async (report: StatsReport): Promise => { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS); + try { + const response = await fetch(resolveStatsApiUrl(), { + method: "POST", + headers: { "Content-Type": "application/json" }, + // Plain JSON, not gzip: the payload is a handful of tiny rows, so + // compression would cost more than it saves (unlike the score API's large + // diagnostics body). + body: JSON.stringify({ + schemaVersion: STATS_REPORT_SCHEMA_VERSION, + models: report.models.map(toLeaderboardRow), + }), + signal: controller.signal, + }); + + if (!response.ok) { + console.warn(`[react-doctor] Stats API returned ${response.status} ${response.statusText}`); + return null; + } + + return parseCommunity(await response.json()); + } catch (error) { + console.warn(`[react-doctor] Stats API unreachable (${describeFailure(error)})`); + return null; + } finally { + clearTimeout(timeoutId); + } +}; diff --git a/packages/react-doctor/src/stats/run-stats-scan.ts b/packages/react-doctor/src/stats/run-stats-scan.ts new file mode 100644 index 000000000..1328b8eed --- /dev/null +++ b/packages/react-doctor/src/stats/run-stats-scan.ts @@ -0,0 +1,165 @@ +import * as path from "node:path"; +import { + isPathInside, + mapWithConcurrency, + runEditorScan, + type Diagnostic, +} from "@react-doctor/core"; +import { STATS_SCAN_CONCURRENCY } from "./constants.js"; +import { isReactSourceFile } from "./is-react-source.js"; +import { materializeReconstructedTree } from "./materialize-reconstructed-tree.js"; +import { reconstructSession } from "./reconstruct-files.js"; +import type { AgentSession, ReconstructedFile, SessionScanResult } from "./types.js"; + +const toPosix = (filePath: string): string => filePath.split(path.sep).join("/"); + +/** Longest shared directory of a set of absolute paths, or `null`. */ +const commonAncestorDirectory = (absolutePaths: ReadonlyArray): string | null => { + if (absolutePaths.length === 0) return null; + const splitPaths = absolutePaths.map((absolutePath) => + path.dirname(absolutePath).split(path.sep), + ); + let shared = splitPaths[0]; + for (const segments of splitPaths.slice(1)) { + let index = 0; + while (index < shared.length && index < segments.length && shared[index] === segments[index]) { + index += 1; + } + shared = shared.slice(0, index); + } + const joined = shared.join(path.sep); + return joined.length > 0 ? joined : null; +}; + +/** + * Map a diagnostic's path (relative to the temp dir, or absolute under it) back + * to the real absolute path it was reconstructed from. + */ +const remapDiagnosticPath = ( + filePath: string, + tempDirectory: string, + realTempDirectory: string, + scanRoot: string, +): string => { + const normalized = toPosix(filePath); + const absolute = path.isAbsolute(normalized) + ? normalized + : `${toPosix(tempDirectory)}/${normalized}`; + for (const prefix of [tempDirectory, realTempDirectory]) { + const prefixPosix = toPosix(prefix); + if (absolute === prefixPosix || absolute.startsWith(`${prefixPosix}/`)) { + return path.normalize(`${scanRoot}${absolute.slice(prefixPosix.length)}`); + } + } + return path.normalize(absolute); +}; + +/** + * Resolve the directory the session's reconstructed files should be linted + * under. Repo-scoped runs pin it to the repo root; global runs fall back to the + * session cwd, then the common ancestor of the edited files. + */ +const resolveScanRoot = ( + session: AgentSession, + fileAbsolutePaths: ReadonlyArray, + repoRoot: string | null, +): string | null => { + if (repoRoot) return repoRoot; + if (session.cwd) return session.cwd; + return commonAncestorDirectory(fileAbsolutePaths); +}; + +const scanSession = async ( + session: AgentSession, + repoRoot: string | null, +): Promise => { + const reconstruction = reconstructSession(session); + const empty: SessionScanResult = { + session, + diagnostics: [], + filesScanned: 0, + reconstructedFiles: reconstruction.files.length, + unreconstructable: reconstruction.unreconstructable.length, + }; + // React Doctor only scores React code; ranking a model on the plain + // backend/util/config files it also wrote would dilute its diagnostics-per- + // file and skew the leaderboard toward whoever wrote the most non-React code. + const reactFiles = reconstruction.files.filter((file) => + isReactSourceFile(file.absolutePath, file.content), + ); + if (reactFiles.length === 0) return empty; + + const scanRoot = resolveScanRoot( + session, + reactFiles.map((file) => file.absolutePath), + repoRoot, + ); + if (!scanRoot) return empty; + + const files: ReconstructedFile[] = []; + for (const file of reactFiles) { + if (!isPathInside(file.absolutePath, scanRoot)) continue; + files.push({ ...file, relativePath: toPosix(path.relative(scanRoot, file.absolutePath)) }); + } + if (files.length === 0) return empty; + + const tree = materializeReconstructedTree(scanRoot, files); + try { + const result = await runEditorScan({ + directory: tree.tempDirectory, + includePaths: tree.relativePaths, + lint: true, + runDeadCode: false, + // The node running the CLI can load oxlint's native binding. + nodeBinaryPath: process.execPath, + }); + // A scan that errored, was skipped (unanalyzable project), or whose lint + // phase failed yields zero diagnostics for reasons unrelated to code + // quality. Counting its files as clean would reward un-lintable code and + // inflate the leaderboard, so it joins the empty bucket instead. + if (!result.ok || result.skipped || result.didLintFail) return empty; + const diagnostics: Diagnostic[] = result.diagnostics.map((diagnostic) => ({ + ...diagnostic, + filePath: remapDiagnosticPath( + diagnostic.filePath, + tree.tempDirectory, + tree.realTempDirectory, + scanRoot, + ), + })); + return { + session, + diagnostics, + filesScanned: tree.relativePaths.length, + reconstructedFiles: reconstruction.files.length, + unreconstructable: reconstruction.unreconstructable.length, + }; + } finally { + tree.cleanup(); + } +}; + +export interface RunStatsScanOptions { + /** Reports `(completedCount, totalCount)` as each session finishes. */ + readonly onProgress?: (completedCount: number, totalCount: number) => void; +} + +/** + * Reconstruct and lint every session with bounded concurrency. `repoRoot` pins + * the scan root for repo-scoped runs; pass `null` for global runs (per-session + * root inferred from cwd / edited files). Each session that yields content + * spawns one oxlint subprocess, so progress is reported per session. + */ +export const runStatsScan = ( + sessions: ReadonlyArray, + repoRoot: string | null, + options: RunStatsScanOptions = {}, +): Promise => { + let completedCount = 0; + return mapWithConcurrency(sessions, STATS_SCAN_CONCURRENCY, async (session) => { + const result = await scanSession(session, repoRoot); + completedCount += 1; + options.onProgress?.(completedCount, sessions.length); + return result; + }); +}; diff --git a/packages/react-doctor/src/stats/sources/claude.ts b/packages/react-doctor/src/stats/sources/claude.ts new file mode 100644 index 000000000..1a5fa11f7 --- /dev/null +++ b/packages/react-doctor/src/stats/sources/claude.ts @@ -0,0 +1,124 @@ +import * as os from "node:os"; +import * as path from "node:path"; +import { asArray, asRecord, asString } from "../coerce.js"; +import { mostCommonKey } from "../most-common-key.js"; +import { fileSessionCandidates, findJsonlFiles, readJsonlEntries } from "../walk-transcripts.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import type { AgentSession, FileEdit, FileRead, SourceDef } from "./index.js"; + +const EDIT_TOOL_NAMES = new Set(["Write", "Edit", "MultiEdit"]); + +const editsFromToolUse = (name: string, input: Record): FileEdit[] => { + const filePath = asString(input.file_path); + if (!filePath) return []; + if (name === "Write") { + return [{ kind: "write", path: filePath, content: asString(input.content) ?? "" }]; + } + if (name === "Edit") { + return [ + { + kind: "replace", + path: filePath, + oldString: asString(input.old_string) ?? "", + newString: asString(input.new_string) ?? "", + replaceAll: input.replace_all === true, + }, + ]; + } + // MultiEdit: a sequence of replacements applied in order. + return asArray(input.edits).flatMap((rawEdit) => { + const edit = asRecord(rawEdit); + if (!edit) return []; + return [ + { + kind: "replace" as const, + path: filePath, + oldString: asString(edit.old_string) ?? "", + newString: asString(edit.new_string) ?? "", + replaceAll: edit.replace_all === true, + }, + ]; + }); +}; + +export const parseClaudeSession = async (transcriptPath: string): Promise => { + const edits: FileEdit[] = []; + const reads: FileRead[] = []; + const modelCounts = new Map(); + let cwd: string | null = null; + let sawAnything = false; + + await readJsonlEntries(transcriptPath, (entry) => { + sawAnything = true; + if (!cwd) cwd = asString(entry.cwd) ?? null; + + // Post-edit / read snapshots ride a top-level `toolUseResult` on the + // following user/tool line — the most faithful reconstruction source. + const toolResult = asRecord(entry.toolUseResult); + if (toolResult) { + const resultFilePath = asString(toolResult.filePath); + if (resultFilePath && typeof toolResult.content === "string") { + edits.push({ kind: "write", path: resultFilePath, resultContent: toolResult.content }); + } + const readFile = asRecord(toolResult.file); + const readPath = readFile && asString(readFile.filePath); + if (readFile && readPath && typeof readFile.content === "string") { + reads.push({ path: readPath, content: readFile.content }); + } + } + + if (entry.type !== "assistant") return; + const message = asRecord(entry.message); + if (!message) return; + const model = asString(message.model); + if (model && model !== "") { + modelCounts.set(model, (modelCounts.get(model) ?? 0) + 1); + } + for (const rawBlock of asArray(message.content)) { + const block = asRecord(rawBlock); + if (!block || block.type !== "tool_use") continue; + const name = asString(block.name); + const input = asRecord(block.input); + if (!name || !input || !EDIT_TOOL_NAMES.has(name)) continue; + edits.push(...editsFromToolUse(name, input)); + } + }); + + if (!sawAnything) return null; + + return { + provider: "claude", + sessionId: path.basename(transcriptPath, ".jsonl"), + transcriptPath, + model: mostCommonKey(modelCounts) ?? STATS_UNKNOWN_MODEL, + cwd, + edits, + reads, + }; +}; + +const claudeRoots = (): string[] => { + const fromEnv = process.env.CLAUDE_CONFIG_DIR; + const configDirs = fromEnv + ? fromEnv + .split(",") + .map((value) => value.trim()) + .filter(Boolean) + : [ + path.join(process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), ".config"), "claude"), + path.join(os.homedir(), ".claude"), + ]; + return configDirs.map((dir) => path.join(dir, "projects")); +}; + +export const claudeSource: SourceDef = { + name: "claude", + candidates() { + return fileSessionCandidates( + "claude", + claudeRoots(), + (root) => findJsonlFiles(root, 3), + parseClaudeSession, + ); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/codex.ts b/packages/react-doctor/src/stats/sources/codex.ts new file mode 100644 index 000000000..fd4258f0a --- /dev/null +++ b/packages/react-doctor/src/stats/sources/codex.ts @@ -0,0 +1,70 @@ +import * as os from "node:os"; +import * as path from "node:path"; +import { asRecord, asString } from "../coerce.js"; +import { mostCommonKey } from "../most-common-key.js"; +import { fileSessionCandidates, findJsonlFiles, readJsonlEntries } from "../walk-transcripts.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import type { AgentSession, FileEdit, SourceDef } from "./index.js"; + +// Codex reconstructs only `apply_patch` (`custom_tool_call`) edits — `shell` +// function calls (sed, heredoc redirects, …) are not faithfully reconstructable +// and are skipped. Model comes from `turn_context`, cwd from `session_meta`. +export const parseCodexSession = async (transcriptPath: string): Promise => { + const edits: FileEdit[] = []; + const modelCounts = new Map(); + let cwd: string | null = null; + let sawAnything = false; + + await readJsonlEntries(transcriptPath, (entry) => { + sawAnything = true; + const payload = asRecord(entry.payload); + if (!payload) return; + + if (entry.type === "session_meta" && !cwd) { + cwd = asString(payload.cwd) ?? null; + } + if (entry.type === "turn_context") { + if (!cwd) cwd = asString(payload.cwd) ?? null; + const model = asString(payload.model); + if (model) modelCounts.set(model, (modelCounts.get(model) ?? 0) + 1); + } + + if ( + payload.type === "custom_tool_call" && + payload.name === "apply_patch" && + typeof payload.input === "string" + ) { + edits.push({ kind: "patch", path: "", patch: payload.input }); + } + }); + + if (!sawAnything) return null; + + return { + provider: "codex", + sessionId: path.basename(transcriptPath, ".jsonl"), + transcriptPath, + model: mostCommonKey(modelCounts) ?? STATS_UNKNOWN_MODEL, + cwd, + edits, + reads: [], + }; +}; + +const codexRoots = (): string[] => { + const home = process.env.CODEX_HOME ?? path.join(os.homedir(), ".codex"); + return [path.join(home, "sessions"), path.join(home, "archived_sessions")]; +}; + +export const codexSource: SourceDef = { + name: "codex", + candidates() { + // sessions/YYYY/MM/DD/rollout-*.jsonl — files sit 3 levels below the root; 5 leaves headroom. + return fileSessionCandidates( + "codex", + codexRoots(), + (root) => findJsonlFiles(root, 5), + parseCodexSession, + ); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/cursor-cli.ts b/packages/react-doctor/src/stats/sources/cursor-cli.ts new file mode 100644 index 000000000..fc037cb4b --- /dev/null +++ b/packages/react-doctor/src/stats/sources/cursor-cli.ts @@ -0,0 +1,159 @@ +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import { asArray, asNullableString, asRecord, asString } from "../coerce.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import { readCursorCliStore } from "../cursor-cli-store.js"; +import { isLintablePath } from "../reconstruct-files.js"; +import { statMtimeMs } from "../walk-transcripts.js"; +import type { AgentSession, FileEdit, FileRead, SessionCandidate, SourceDef } from "./index.js"; + +// The Cursor CLI agent keeps one content-addressed SQLite store per chat at +// `/chats///store.db`, beside a `meta.json` that +// records the chat's last-updated time. Two install channels exist — stable +// (`~/.cursor`) and nightly (`~/.cursor-nightly`). +const cursorCliHomes = (): string[] => { + const override = process.env.REACT_DOCTOR_CURSOR_CLI_HOME; + if (override) return override.split(path.delimiter).filter(Boolean); + return [path.join(os.homedir(), ".cursor"), path.join(os.homedir(), ".cursor-nightly")]; +}; + +// File-mutating tool calls. `Write` carries the full post-edit content; +// `ApplyPatch` carries a raw apply_patch envelope (the same format as Codex); +// `StrReplace` carries an old/new string pair; `Delete` removes a path. Planning +// tools (`CreatePlan`, `TodoWrite`) write no source file and are ignored. +const READ_TOOL_NAMES = new Set(["Read", "ReadFile"]); + +const editFromToolCall = (toolName: string, args: unknown): FileEdit | null => { + if (toolName === "ApplyPatch") { + return typeof args === "string" && args.length > 0 + ? { kind: "patch", path: "", patch: args } + : null; + } + const record = asRecord(args); + const filePath = record && asString(record.path); + if (!record || !filePath) return null; + if (toolName === "Write") { + const contents = asNullableString(record.contents); + return contents === null ? null : { kind: "write", path: filePath, resultContent: contents }; + } + if (toolName === "Delete") { + return { kind: "delete", path: filePath }; + } + if (toolName === "StrReplace") { + const oldString = asNullableString(record.old_string); + const newString = asNullableString(record.new_string); + if (oldString === null || newString === null) return null; + return { kind: "replace", path: filePath, oldString, newString }; + } + return null; +}; + +const buildCliSession = (storeDbPath: string, sessionId: string): AgentSession | null => { + const store = readCursorCliStore(storeDbPath); + if (!store) return null; + + const edits: FileEdit[] = []; + const reads: FileRead[] = []; + const capturedReadPaths = new Set(); + // tool-call → tool-result are separate messages, so a Read's path is recorded + // when its call is seen, then paired with the content in its later result. + const pendingReadPaths = new Map(); + + for (const message of store.messages) { + for (const rawBlock of asArray(message.content)) { + const block = asRecord(rawBlock); + if (!block) continue; + if (block.type === "tool-call") { + const toolName = asString(block.toolName); + if (!toolName) continue; + const edit = editFromToolCall(toolName, block.args); + if (edit) { + edits.push(edit); + continue; + } + const toolCallId = asString(block.toolCallId); + const readRecord = asRecord(block.args); + const readPath = readRecord && asString(readRecord.path); + if (READ_TOOL_NAMES.has(toolName) && toolCallId && readPath && isLintablePath(readPath)) { + pendingReadPaths.set(toolCallId, readPath); + } + } else if (block.type === "tool-result") { + const toolCallId = asString(block.toolCallId); + const readPath = toolCallId ? pendingReadPaths.get(toolCallId) : undefined; + // Keep the first read of a path (the pre-edit base); a later post-edit + // read would otherwise overwrite it and desync replace/patch replay. + if (readPath && !capturedReadPaths.has(readPath) && typeof block.result === "string") { + reads.push({ path: readPath, content: block.result }); + capturedReadPaths.add(readPath); + } + } + } + } + + if (edits.length === 0) return null; + + return { + provider: "cursor", + sessionId, + transcriptPath: storeDbPath, + model: store.lastUsedModel ?? STATS_UNKNOWN_MODEL, + cwd: null, + edits, + reads, + }; +}; + +/** mtime key for `--since`/sorting: the chat's `meta.json` time, else the store's. */ +const sessionModifiedMs = (sessionDir: string, storeDbPath: string): number => { + try { + const meta = JSON.parse(fs.readFileSync(path.join(sessionDir, "meta.json"), "utf8")); + if (meta && typeof meta.updatedAtMs === "number") return meta.updatedAtMs; + } catch { + // No or unreadable meta.json — fall back to the store's mtime. + } + return statMtimeMs(storeDbPath); +}; + +const discoverCliSessions = (home: string): SessionCandidate[] => { + const chatsRoot = path.join(home, "chats"); + let workspaceEntries: fs.Dirent[]; + try { + workspaceEntries = fs.readdirSync(chatsRoot, { withFileTypes: true }); + } catch { + return []; + } + const candidates: SessionCandidate[] = []; + for (const workspace of workspaceEntries) { + if (!workspace.isDirectory()) continue; + const workspaceDir = path.join(chatsRoot, workspace.name); + let sessionEntries: fs.Dirent[]; + try { + sessionEntries = fs.readdirSync(workspaceDir, { withFileTypes: true }); + } catch { + continue; + } + for (const session of sessionEntries) { + if (!session.isDirectory()) continue; + const sessionDir = path.join(workspaceDir, session.name); + const storeDbPath = path.join(sessionDir, "store.db"); + if (!fs.existsSync(storeDbPath)) continue; + candidates.push({ + provider: "cursor", + modifiedMs: sessionModifiedMs(sessionDir, storeDbPath), + load: async () => buildCliSession(storeDbPath, session.name), + }); + } + } + return candidates; +}; + +export const cursorCliCandidates = (homes: ReadonlyArray): SessionCandidate[] => + homes.flatMap((home) => discoverCliSessions(home)); + +export const cursorCliSource: SourceDef = { + name: "cursor", + candidates() { + return cursorCliCandidates(cursorCliHomes()); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/cursor.ts b/packages/react-doctor/src/stats/sources/cursor.ts new file mode 100644 index 000000000..d6f567e19 --- /dev/null +++ b/packages/react-doctor/src/stats/sources/cursor.ts @@ -0,0 +1,110 @@ +import { asRecord, asString, parseJson } from "../coerce.js"; +import { STATS_UNKNOWN_MODEL } from "../constants.js"; +import { openCursorDb, resolveCursorDbPaths, type CursorDbHandle } from "../cursor-db.js"; +import { mostCommonKey } from "../most-common-key.js"; +import { isLintablePath } from "../reconstruct-files.js"; +import type { AgentSession, FileEdit, SessionCandidate, SourceDef } from "./index.js"; + +// The composer's selected model, ignoring the "Auto" sentinel which carries no +// concrete model id. +const composerModelName = (composer: Record | undefined): string | undefined => { + const modelConfig = composer && asRecord(composer.modelConfig); + const modelName = modelConfig && asString(modelConfig.modelName); + return modelName && modelName !== "default" ? modelName : undefined; +}; + +// One Cursor tool call. `edit_file_v2` records the full post-edit file behind a +// content id (`result.afterContentId`), giving exact reconstruction; the inline +// `streamingContent` is the fallback when that blob is gone. `delete_file` +// removes a path. Other tools (read, search, terminal) are ignored. +const editFromToolCall = ( + toolData: Record, + db: CursorDbHandle, +): FileEdit | null => { + if (toolData.status !== "completed") return null; + const name = asString(toolData.name); + if (!name) return null; + const params = asRecord(parseJson(asString(toolData.params))); + const filePath = params && asString(params.relativeWorkspacePath); + if (!filePath || !isLintablePath(filePath)) return null; + + if (name === "delete_file") { + return { kind: "delete", path: filePath }; + } + if (name !== "edit_file_v2") return null; + + const result = asRecord(parseJson(asString(toolData.result))); + const afterContentId = result && asString(result.afterContentId); + const content = afterContentId ? db.contentValue(afterContentId) : null; + const resultContent = content ?? asString(params?.streamingContent); + if (resultContent === undefined) return null; + return { kind: "write", path: filePath, resultContent }; +}; + +// A composer can switch models mid-chat; when the conversation-level selection +// is "Auto", fall back to the model most bubbles were generated with. +const bubbleModelName = (bubble: Record): string | undefined => { + const modelInfo = asRecord(bubble.modelInfo); + const modelName = modelInfo && asString(modelInfo.modelName); + return modelName && modelName !== "default" ? modelName : undefined; +}; + +interface OrderedEdit { + readonly createdAt: number; + readonly edit: FileEdit; +} + +const buildCursorSession = (db: CursorDbHandle, composerId: string): AgentSession | null => { + const composer = asRecord(parseJson(db.composerValue(composerId))); + const orderedEdits: OrderedEdit[] = []; + const bubbleModelCounts = new Map(); + + for (const rawBubble of db.bubbleValues(composerId)) { + const bubble = asRecord(parseJson(rawBubble)); + if (!bubble) continue; + const model = bubbleModelName(bubble); + if (model) bubbleModelCounts.set(model, (bubbleModelCounts.get(model) ?? 0) + 1); + const toolData = asRecord(bubble.toolFormerData); + if (!toolData) continue; + const edit = editFromToolCall(toolData, db); + if (edit) { + const createdAt = typeof bubble.createdAt === "number" ? bubble.createdAt : 0; + orderedEdits.push({ createdAt, edit }); + } + } + + // Apply edits in chronological order so the last write to a file wins. + orderedEdits.sort((left, right) => left.createdAt - right.createdAt); + + return { + provider: "cursor", + sessionId: composerId, + transcriptPath: `cursor-composer:${composerId}`, + model: composerModelName(composer) ?? mostCommonKey(bubbleModelCounts) ?? STATS_UNKNOWN_MODEL, + cwd: null, + edits: orderedEdits.map((entry) => entry.edit), + reads: [], + }; +}; + +/** + * Enumerate every composer in the database as a lazy candidate. The header + * index is cheap to read; the per-composer bubble/content walk only runs when a + * candidate survives scope/`--since`/`--limit` filtering and `load()` is called. + */ +export const cursorComposerCandidates = (dbPath: string | null): SessionCandidate[] => { + const db = openCursorDb(dbPath); + if (!db) return []; + return db.composerHeaders().map((header) => ({ + provider: "cursor" as const, + modifiedMs: header.modifiedMs, + load: async () => buildCursorSession(db, header.composerId), + })); +}; + +export const cursorSource: SourceDef = { + name: "cursor", + candidates() { + return resolveCursorDbPaths().flatMap((dbPath) => cursorComposerCandidates(dbPath)); + }, +}; diff --git a/packages/react-doctor/src/stats/sources/index.ts b/packages/react-doctor/src/stats/sources/index.ts new file mode 100644 index 000000000..4ee12076a --- /dev/null +++ b/packages/react-doctor/src/stats/sources/index.ts @@ -0,0 +1,33 @@ +import type { SessionCandidate, StatsProvider } from "../types.js"; +import { claudeSource } from "./claude.js"; +import { codexSource } from "./codex.js"; +import { cursorSource } from "./cursor.js"; +import { cursorCliSource } from "./cursor-cli.js"; + +export type { + AgentSession, + FileEdit, + FileRead, + SessionCandidate, + StatsProvider, +} from "../types.js"; + +/** + * A per-provider session source. Each source enumerates its sessions as cheap, + * lazily-loadable `SessionCandidate`s — transcript files for Claude/Codex, rows + * from the GUI composer database and the CLI per-session stores for Cursor — so + * the rest of the pipeline is provider-agnostic. A provider may have more than + * one source (Cursor's GUI app and CLI agent store chats differently). + */ +export interface SourceDef { + readonly name: StatsProvider; + /** Enumerate every candidate session for this provider (cheap; no parsing). */ + candidates(): SessionCandidate[]; +} + +export const STATS_SOURCES: ReadonlyArray = [ + claudeSource, + codexSource, + cursorSource, + cursorCliSource, +]; diff --git a/packages/react-doctor/src/stats/types.ts b/packages/react-doctor/src/stats/types.ts new file mode 100644 index 000000000..9b0548626 --- /dev/null +++ b/packages/react-doctor/src/stats/types.ts @@ -0,0 +1,155 @@ +import type { Diagnostic } from "@react-doctor/core"; + +export type StatsProvider = "claude" | "codex" | "cursor"; + +export type FileEditKind = "write" | "replace" | "patch" | "delete"; + +/** + * One edit operation an agent performed on a file, normalized across + * providers. `replace` carries `oldString`/`newString`; `patch` carries a raw + * apply-patch envelope; `write` carries full `content`. `resultContent` is the + * post-edit full file content when the transcript records it directly (Claude + * tool results), which short-circuits replay reconstruction. + */ +export interface FileEdit { + readonly kind: FileEditKind; + readonly path: string; + readonly content?: string; + readonly oldString?: string; + readonly newString?: string; + readonly replaceAll?: boolean; + readonly patch?: string; + readonly resultContent?: string; +} + +/** A file the agent read, captured as a reconstruction base for replay. */ +export interface FileRead { + readonly path: string; + readonly content: string; +} + +/** A single agent run (one model), normalized from one transcript. */ +export interface AgentSession { + readonly provider: StatsProvider; + readonly sessionId: string; + readonly transcriptPath: string; + readonly model: string; + readonly cwd: string | null; + readonly edits: FileEdit[]; + readonly reads: FileRead[]; +} + +/** + * A discovered-but-not-yet-parsed session. Sources enumerate these cheaply so + * scope/`--since`/`--limit` can be applied before the expensive `load()` runs + * (a file read for transcript sources, a DB walk for the Cursor composer + * source). `modifiedMs` is the sort + `--since` key (0 when unknown). + */ +export interface SessionCandidate { + readonly provider: StatsProvider; + readonly modifiedMs: number; + load(): Promise; +} + +/** A faithfully reconstructed file as the model left it at session end. */ +export interface ReconstructedContent { + /** Absolute path the agent wrote to (used for attribution + display). */ + readonly absolutePath: string; + readonly content: string; +} + +/** A reconstructed file placed under a scan root, ready to materialize + lint. */ +export interface ReconstructedFile extends ReconstructedContent { + /** Path relative to the scan root, forward-slashed (temp-dir layout). */ + readonly relativePath: string; +} + +export interface SessionReconstruction { + readonly session: AgentSession; + readonly files: ReconstructedContent[]; + /** Paths touched but not faithfully reconstructable (e.g. Codex shell edits). */ + readonly unreconstructable: string[]; +} + +export interface SessionScanResult { + readonly session: AgentSession; + readonly diagnostics: Diagnostic[]; + /** React files actually linted (the score's denominator for this session). */ + readonly filesScanned: number; + /** + * Lintable files faithfully reconstructed before the React filter. When this + * is positive but `filesScanned` is 0, the session was skipped only because + * none of its files were React — not because reconstruction failed. + */ + readonly reconstructedFiles: number; + /** Files edited without a faithful base (a genuine reconstruction gap). */ + readonly unreconstructable: number; +} + +/** Aggregate stats for one leaderboard row (a model or a provider). */ +export interface GroupStats { + readonly key: string; + readonly provider: StatsProvider | "mixed"; + readonly sessions: number; + readonly filesScanned: number; + readonly unreconstructable: number; + readonly totalDiagnostics: number; + readonly errorCount: number; + readonly warningCount: number; + readonly diagnosticsPerFile: number; + /** Raw 0-100 React Doctor score for this group's code (null if undersampled). */ + readonly score: number | null; + readonly scoreLabel: string | null; + /** + * Confidence-weighted score: the raw score regressed toward the global mean by + * the group's evidence (files discounted by sessions). This is what the + * leaderboard ranks on, so small samples can't dominate. + */ + readonly weightedScore: number | null; + readonly topRules: ReadonlyArray<{ readonly rule: string; readonly count: number }>; +} + +export interface StatsReport { + readonly scope: "repo" | "global"; + readonly directory: string; + readonly models: GroupStats[]; + readonly providers: GroupStats[]; + readonly best: GroupStats | null; + readonly worst: GroupStats | null; + /** Sessions with edits that were reconstructed and considered. */ + readonly sessionsAnalyzed: number; + /** Sessions that contributed at least one React file to the ranking. */ + readonly sessionsRanked: number; + /** Sessions reconstructed successfully but whose files were all non-React. */ + readonly sessionsNonReact: number; + /** Sessions whose edits could not be faithfully reconstructed. */ + readonly sessionsUnreconstructable: number; + readonly generatedAt: string; +} + +export interface StatsScopeOptions { + readonly global: boolean; + readonly since?: Date; + readonly limit: number; + readonly provider?: StatsProvider; +} + +/** One model's standing across every `react-doctor stats` run (the community). */ +export interface CommunityModel { + readonly model: string; + readonly harness: string; + /** Files-weighted mean score across all runs (null if undersampled globally). */ + readonly communityScore: number | null; + /** Distinct runs that contributed this model — the sample size behind the score. */ + readonly runs: number; + readonly files: number; +} + +/** + * The global agent leaderboard returned by `/api/stats` in exchange for a run's + * rows — how these agents rank across everyone, so a local board reads in context. + */ +export interface CommunityLeaderboard { + readonly generatedAt: string; + readonly models: ReadonlyArray; +} diff --git a/packages/react-doctor/src/stats/walk-transcripts.ts b/packages/react-doctor/src/stats/walk-transcripts.ts new file mode 100644 index 000000000..e56157d2c --- /dev/null +++ b/packages/react-doctor/src/stats/walk-transcripts.ts @@ -0,0 +1,99 @@ +import * as fs from "node:fs"; +import * as path from "node:path"; +import * as readline from "node:readline"; +import type { AgentSession, SessionCandidate, StatsProvider } from "./types.js"; + +/** File modification time in ms, or 0 when the file is missing/unreadable. */ +export const statMtimeMs = (filePath: string): number => { + try { + return fs.statSync(filePath).mtimeMs; + } catch { + return 0; + } +}; + +/** + * Turn a transcript-file-based provider into lazy `SessionCandidate`s: one per + * `.jsonl` file under its roots, each parsed only when `load()` is called. The + * file's mtime is the sort + `--since` key. + */ +export const fileSessionCandidates = ( + provider: StatsProvider, + roots: ReadonlyArray, + discover: (root: string) => string[], + parse: (transcriptPath: string) => Promise, +): SessionCandidate[] => { + const candidates: SessionCandidate[] = []; + for (const root of roots) { + for (const transcriptPath of discover(root)) { + candidates.push({ + provider, + modifiedMs: statMtimeMs(transcriptPath), + load: () => parse(transcriptPath), + }); + } + } + return candidates; +}; + +/** + * Recursively collect `.jsonl` transcript files under `root` up to `maxDepth` + * directory levels deep. Returns absolute paths sorted newest-first by mtime so + * a `--limit` keeps the most recent sessions. Missing roots yield `[]`. + */ +export const findJsonlFiles = (root: string, maxDepth: number): string[] => { + const found: Array<{ filePath: string; modifiedMs: number }> = []; + + const walk = (directory: string, depth: number): void => { + let entries: fs.Dirent[]; + try { + entries = fs.readdirSync(directory, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const entryPath = path.join(directory, entry.name); + if (entry.isDirectory()) { + if (depth < maxDepth) walk(entryPath, depth + 1); + } else if (entry.isFile() && entry.name.endsWith(".jsonl")) { + found.push({ filePath: entryPath, modifiedMs: statMtimeMs(entryPath) }); + } + } + }; + + walk(root, 0); + found.sort((left, right) => right.modifiedMs - left.modifiedMs); + return found.map((entry) => entry.filePath); +}; + +/** + * Stream a JSONL file line-by-line through `node:readline`, invoking `onEntry` + * with each decoded object. Streaming keeps memory flat on large transcripts + * (no whole-file read). Unparseable lines and unreadable files are skipped + * silently so one corrupt transcript never sinks a whole run. + */ +export const readJsonlEntries = async ( + filePath: string, + onEntry: (entry: Record) => void, +): Promise => { + const lines = readline.createInterface({ + input: fs.createReadStream(filePath, { encoding: "utf8" }), + crlfDelay: Infinity, + }); + try { + for await (const line of lines) { + if (!line.trim()) continue; + let entry: unknown; + try { + entry = JSON.parse(line); + } catch { + continue; + } + if (entry && typeof entry === "object") onEntry(entry as Record); + } + } catch { + // Unreadable file / stream error: stop silently, keep partial entries. + } finally { + lines.close(); + } +}; diff --git a/packages/react-doctor/tests/stats-adapters.test.ts b/packages/react-doctor/tests/stats-adapters.test.ts new file mode 100644 index 000000000..5829fb9e3 --- /dev/null +++ b/packages/react-doctor/tests/stats-adapters.test.ts @@ -0,0 +1,394 @@ +import * as fs from "node:fs"; +import { createRequire } from "node:module"; +import * as os from "node:os"; +import * as path from "node:path"; +import { afterAll, beforeEach, describe, expect, it } from "vite-plus/test"; +import { closeCursorDb } from "../src/stats/cursor-db.js"; +import { parseClaudeSession } from "../src/stats/sources/claude.js"; +import { parseCodexSession } from "../src/stats/sources/codex.js"; +import { cursorComposerCandidates } from "../src/stats/sources/cursor.js"; +import { cursorCliCandidates } from "../src/stats/sources/cursor-cli.js"; + +interface SqliteDb { + exec(sql: string): void; + prepare(sql: string): { run(...params: unknown[]): void }; + close(): void; +} +interface SqliteModule { + DatabaseSync: new (filePath: string) => SqliteDb; +} + +// `node:sqlite` is built in on Node 22.13+/24+ and absent on older Node, where +// the require throws. Mirror cursor-db.ts and skip the Cursor suite there rather +// than crashing the whole file at import time. +const loadSqlite = (): SqliteModule | null => { + try { + return createRequire(import.meta.url)("node:sqlite"); + } catch { + return null; + } +}; +const sqlite = loadSqlite(); + +const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "stats-adapters-")); + +const writeTranscript = (name: string, lines: unknown[]): string => { + const filePath = path.join(tempDir, name); + fs.writeFileSync(filePath, lines.map((line) => JSON.stringify(line)).join("\n")); + return filePath; +}; + +afterAll(() => { + closeCursorDb(); + fs.rmSync(tempDir, { recursive: true, force: true }); +}); + +describe("parseClaudeSession", () => { + it("extracts model, cwd, edits, and post-edit result content", async () => { + const filePath = writeTranscript("claude.jsonl", [ + { + type: "assistant", + cwd: "/repo", + timestamp: "2026-06-20T00:00:00Z", + message: { + model: "claude-x", + content: [ + { + type: "tool_use", + name: "Write", + id: "t1", + input: { file_path: "/repo/src/a.ts", content: "export const a=1;" }, + }, + ], + }, + }, + { + type: "user", + toolUseResult: { filePath: "/repo/src/a.ts", content: "export const a = 1;\n" }, + }, + ]); + const session = await parseClaudeSession(filePath); + expect(session?.model).toBe("claude-x"); + expect(session?.cwd).toBe("/repo"); + expect(session?.edits.some((edit) => edit.resultContent === "export const a = 1;\n")).toBe( + true, + ); + }); +}); + +describe("parseCodexSession", () => { + it("extracts model from turn_context, cwd from session_meta, and apply_patch edits", async () => { + const filePath = writeTranscript("codex.jsonl", [ + { type: "session_meta", payload: { cwd: "/repo" } }, + { type: "turn_context", payload: { model: "gpt-5.5" } }, + { + type: "response_item", + payload: { + type: "custom_tool_call", + name: "apply_patch", + input: "*** Begin Patch\n*** Add File: /repo/d.ts\n+x\n*** End Patch", + }, + }, + ]); + const session = await parseCodexSession(filePath); + expect(session?.model).toBe("gpt-5.5"); + expect(session?.cwd).toBe("/repo"); + expect(session?.edits).toHaveLength(1); + expect(session?.edits[0].kind).toBe("patch"); + }); +}); + +interface ComposerFixture { + readonly composerId: string; + readonly modelName: string | null; + readonly bubbles: ReadonlyArray>; + readonly content?: Record; +} + +const writeComposerDb = (name: string, composers: ReadonlyArray): string => { + if (!sqlite) throw new Error("node:sqlite unavailable"); + const dbPath = path.join(tempDir, name); + const database = new sqlite.DatabaseSync(dbPath); + database.exec("CREATE TABLE ItemTable (key TEXT PRIMARY KEY, value TEXT)"); + database.exec("CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value TEXT)"); + + const headers = composers.map((composer, index) => ({ + composerId: composer.composerId, + lastUpdatedAt: 1_000 + index, + })); + const insertItem = database.prepare("INSERT INTO ItemTable (key, value) VALUES (?, ?)"); + insertItem.run("composer.composerHeaders", JSON.stringify({ allComposers: headers })); + + const insertKv = database.prepare("INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)"); + for (const composer of composers) { + insertKv.run( + `composerData:${composer.composerId}`, + JSON.stringify(composer.modelName ? { modelConfig: { modelName: composer.modelName } } : {}), + ); + composer.bubbles.forEach((bubble, index) => { + insertKv.run(`bubbleId:${composer.composerId}:b${index}`, JSON.stringify(bubble)); + }); + for (const [contentId, body] of Object.entries(composer.content ?? {})) { + insertKv.run(contentId, body); + } + } + database.close(); + return dbPath; +}; + +const describeCursor = sqlite ? describe : describe.skip; + +describeCursor("cursorComposerCandidates", () => { + beforeEach(() => closeCursorDb()); + + it("attributes the composer model and reconstructs exact content via afterContentId", async () => { + const dbPath = writeComposerDb("cursor-model.vscdb", [ + { + composerId: "comp-1", + modelName: "claude-opus-4-8", + content: { "composer.content.hash1": "export const x = 1;\n" }, + bubbles: [ + { + createdAt: 10, + toolFormerData: { + name: "edit_file_v2", + status: "completed", + params: JSON.stringify({ + relativeWorkspacePath: "/repo/b.ts", + streamingContent: "export const x=1;", + }), + result: JSON.stringify({ afterContentId: "composer.content.hash1" }), + }, + }, + { + createdAt: 20, + toolFormerData: { + name: "delete_file", + status: "completed", + params: JSON.stringify({ relativeWorkspacePath: "/repo/old.ts" }), + }, + }, + ], + }, + ]); + + const candidates = cursorComposerCandidates(dbPath); + expect(candidates).toHaveLength(1); + const session = await candidates[0].load(); + expect(session?.provider).toBe("cursor"); + expect(session?.model).toBe("claude-opus-4-8"); + expect(session?.edits).toHaveLength(2); + const write = session?.edits.find((edit) => edit.kind === "write"); + expect(write?.path).toBe("/repo/b.ts"); + expect(write?.resultContent).toBe("export const x = 1;\n"); + expect( + session?.edits.some((edit) => edit.kind === "delete" && edit.path === "/repo/old.ts"), + ).toBe(true); + }); + + it("falls back to the dominant bubble model when the composer is on Auto", async () => { + const dbPath = writeComposerDb("cursor-auto.vscdb", [ + { + composerId: "comp-2", + modelName: null, + content: { "composer.content.hash2": "export const y = 2;\n" }, + bubbles: [ + { modelInfo: { modelName: "gpt-5.5" } }, + { + createdAt: 5, + modelInfo: { modelName: "gpt-5.5" }, + toolFormerData: { + name: "edit_file_v2", + status: "completed", + params: JSON.stringify({ relativeWorkspacePath: "/repo/c.ts" }), + result: JSON.stringify({ afterContentId: "composer.content.hash2" }), + }, + }, + ], + }, + ]); + + const session = await cursorComposerCandidates(dbPath)[0]?.load(); + expect(session?.model).toBe("gpt-5.5"); + expect(session?.edits[0]?.resultContent).toBe("export const y = 2;\n"); + }); + + it("ignores non-lintable edits and skips when the database is absent", async () => { + expect(cursorComposerCandidates(null)).toEqual([]); + + const dbPath = writeComposerDb("cursor-nonlintable.vscdb", [ + { + composerId: "comp-3", + modelName: "claude-opus-4-8", + content: { "composer.content.hash3": "# readme" }, + bubbles: [ + { + createdAt: 1, + toolFormerData: { + name: "edit_file_v2", + status: "completed", + params: JSON.stringify({ relativeWorkspacePath: "/repo/README.md" }), + result: JSON.stringify({ afterContentId: "composer.content.hash3" }), + }, + }, + ], + }, + ]); + const session = await cursorComposerCandidates(dbPath)[0]?.load(); + expect(session?.edits).toEqual([]); + }); +}); + +interface CliStoreFixture { + readonly model: string; + readonly updatedAtMs: number; + readonly messages: ReadonlyArray<{ role: string; content: unknown }>; +} + +const CLI_ROOT_BLOB_ID = "f".repeat(64); + +// Build a Cursor CLI per-session store: hex-encoded `meta`, a protobuf-style +// manifest blob (`0x0a 0x20` + 32-byte id per message, in order), and one JSON +// message blob per entry — the shape `readCursorCliStore` parses. +const writeCliStore = (home: string, sessionId: string, fixture: CliStoreFixture): void => { + if (!sqlite) throw new Error("node:sqlite unavailable"); + const sessionDir = path.join(home, "chats", "workspace-hash", sessionId); + fs.mkdirSync(sessionDir, { recursive: true }); + fs.writeFileSync( + path.join(sessionDir, "meta.json"), + JSON.stringify({ schemaVersion: 1, updatedAtMs: fixture.updatedAtMs }), + ); + + const database = new sqlite.DatabaseSync(path.join(sessionDir, "store.db")); + database.exec("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)"); + database.exec("CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB)"); + + const messageIds = fixture.messages.map((_, index) => index.toString(16).padStart(64, "0")); + const manifest = Buffer.concat( + messageIds.map((id) => Buffer.concat([Buffer.from([0x0a, 0x20]), Buffer.from(id, "hex")])), + ); + + const insertMeta = database.prepare("INSERT INTO meta (key, value) VALUES (?, ?)"); + insertMeta.run( + "0", + Buffer.from( + JSON.stringify({ latestRootBlobId: CLI_ROOT_BLOB_ID, lastUsedModel: fixture.model }), + ).toString("hex"), + ); + + const insertBlob = database.prepare("INSERT INTO blobs (id, data) VALUES (?, ?)"); + insertBlob.run(CLI_ROOT_BLOB_ID, manifest); + fixture.messages.forEach((message, index) => { + insertBlob.run(messageIds[index], Buffer.from(JSON.stringify(message))); + }); + database.close(); +}; + +describeCursor("cursorCliCandidates", () => { + it("reconstructs model, ordered edits, and read bases from a CLI store", async () => { + const home = path.join(tempDir, "cursor-cli-home"); + writeCliStore(home, "session-1", { + model: "claude-opus-4-8", + updatedAtMs: 5_000, + messages: [ + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "Write", + toolCallId: "w1", + args: { path: "/repo/a.tsx", contents: "export const A = () => null;\n" }, + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "ApplyPatch", + toolCallId: "p1", + args: "*** Begin Patch\n*** Add File: /repo/b.ts\n+export const b = 2;\n*** End Patch", + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "Read", + toolCallId: "r1", + args: { path: "/repo/c.tsx" }, + }, + ], + }, + { + role: "tool", + content: [{ type: "tool-result", toolCallId: "r1", result: "export const C = 1;\n" }], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "StrReplace", + toolCallId: "s1", + args: { path: "/repo/c.tsx", old_string: "1", new_string: "2" }, + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool-call", + toolName: "Delete", + toolCallId: "d1", + args: { path: "/repo/old.ts" }, + }, + ], + }, + { + role: "assistant", + content: [ + { type: "tool-call", toolName: "TodoWrite", toolCallId: "t1", args: { todos: [] } }, + ], + }, + ], + }); + + const candidates = cursorCliCandidates([home]); + expect(candidates).toHaveLength(1); + expect(candidates[0].modifiedMs).toBe(5_000); + + const session = await candidates[0].load(); + expect(session?.provider).toBe("cursor"); + expect(session?.model).toBe("claude-opus-4-8"); + + const write = session?.edits.find((edit) => edit.kind === "write"); + expect(write?.path).toBe("/repo/a.tsx"); + expect(write?.resultContent).toBe("export const A = () => null;\n"); + + const patch = session?.edits.find((edit) => edit.kind === "patch"); + expect(patch?.patch).toContain("Add File: /repo/b.ts"); + + const replace = session?.edits.find((edit) => edit.kind === "replace"); + expect(replace?.path).toBe("/repo/c.tsx"); + expect(replace?.oldString).toBe("1"); + expect(replace?.newString).toBe("2"); + + expect( + session?.edits.some((edit) => edit.kind === "delete" && edit.path === "/repo/old.ts"), + ).toBe(true); + // Write + ApplyPatch + StrReplace + Delete; the Read and the TodoWrite plan are not edits. + expect(session?.edits).toHaveLength(4); + // The Read result is captured as a base so the StrReplace reconstructs. + expect(session?.reads).toEqual([{ path: "/repo/c.tsx", content: "export const C = 1;\n" }]); + }); + + it("returns no candidates when the CLI home has no chats", () => { + expect(cursorCliCandidates([path.join(tempDir, "missing-cli-home")])).toEqual([]); + }); +}); diff --git a/packages/react-doctor/tests/stats-aggregate.test.ts b/packages/react-doctor/tests/stats-aggregate.test.ts new file mode 100644 index 000000000..ccfd9c36d --- /dev/null +++ b/packages/react-doctor/tests/stats-aggregate.test.ts @@ -0,0 +1,144 @@ +import { describe, expect, it } from "vite-plus/test"; +import type { Diagnostic } from "@react-doctor/core"; +import { aggregateStats, type ScoreComputer } from "../src/stats/aggregate-stats.js"; +import type { AgentSession, SessionScanResult, StatsProvider } from "../src/stats/types.js"; + +const diagnostic = (rule: string, severity: "error" | "warning" = "warning"): Diagnostic => ({ + filePath: "src/App.tsx", + plugin: "react-doctor", + rule, + severity, + message: "m", + help: "h", + line: 1, + column: 1, + category: "Correctness", +}); + +const result = ( + provider: StatsProvider, + model: string, + filesScanned: number, + diagnostics: Diagnostic[], +): SessionScanResult => { + const session: AgentSession = { + provider, + sessionId: `${provider}-${model}`, + transcriptPath: "/tmp/x.jsonl", + model, + cwd: "/repo", + edits: [], + reads: [], + }; + return { + session, + diagnostics, + filesScanned, + reconstructedFiles: filesScanned, + unreconstructable: 0, + }; +}; + +// Deterministic, offline score: cleaner code (fewer diagnostics) scores higher. +const stubScore: ScoreComputer = async (diagnostics) => ({ + score: Math.max(0, 100 - diagnostics.length * 5), + label: "stub", +}); + +describe("aggregateStats", () => { + it("ranks models best-first by score and surfaces best/worst", async () => { + const results = [ + result("claude", "m1", 4, [diagnostic("r1"), diagnostic("r1")]), + result( + "codex", + "m2", + 4, + Array.from({ length: 6 }, () => diagnostic("r2")), + ), + ]; + const aggregated = await aggregateStats(results, null, stubScore); + + expect(aggregated.models.map((group) => group.key)).toEqual(["claude/m1", "codex/m2"]); + expect(aggregated.best?.key).toBe("claude/m1"); + expect(aggregated.best?.score).toBe(90); + expect(aggregated.worst?.key).toBe("codex/m2"); + expect(aggregated.worst?.score).toBe(70); + }); + + it("computes diagnostics-per-file and top rules per group", async () => { + const results = [result("claude", "m1", 4, [diagnostic("r1"), diagnostic("r1")])]; + const aggregated = await aggregateStats(results, null, stubScore); + const group = aggregated.models[0]; + expect(group.totalDiagnostics).toBe(2); + expect(group.diagnosticsPerFile).toBe(0.5); + expect(group.topRules).toEqual([{ rule: "react-doctor/r1", count: 2 }]); + }); + + it("groups by provider and excludes under-sampled groups from the ranking", async () => { + const results = [ + result("claude", "m1", 4, [diagnostic("r1")]), + result("cursor", "unknown", 1, [diagnostic("r2")]), + ]; + const aggregated = await aggregateStats(results, null, stubScore); + // Cursor's single-file group is below the min-files threshold. + expect(aggregated.models.map((group) => group.key)).toEqual(["claude/m1"]); + expect(aggregated.providers.map((group) => group.provider)).toEqual(["claude"]); + }); + + it("weights the score by files and sessions so a tiny perfect sample can't top the board", async () => { + const results = [ + result("claude", "big", 10, [diagnostic("r1")]), + result("claude", "big", 10, []), + result("claude", "big", 10, []), + result("claude", "big", 10, []), + result("claude", "big", 10, []), + ...Array.from({ length: 5 }, () => + result("codex", "med", 10, [diagnostic("r2"), diagnostic("r2")]), + ), + result("cursor", "small", 3, []), + ]; + const aggregated = await aggregateStats(results, null, stubScore); + + // "small" has the best RAW score (100, zero diagnostics) but only 3 files + // from one session, so confidence weighting regresses it toward the mean and + // the well-sampled "big" group wins instead of the tiny perfect sample. + expect(aggregated.best?.key).toBe("claude/big"); + expect(aggregated.models[0]?.key).toBe("claude/big"); + const small = aggregated.models.find((group) => group.key === "cursor/small"); + expect(small?.score).toBe(100); + expect(small?.weightedScore).toBeLessThan(100); + expect(aggregated.models[0]?.weightedScore ?? 0).toBeGreaterThan(small?.weightedScore ?? 0); + }); + + it("does not let dead (0-file) sessions inflate a group's weighting", async () => { + const productive = [ + result("claude", "a", 10, [diagnostic("r1")]), + result("claude", "a", 10, [diagnostic("r1")]), + ]; + // Same model "b": identical scored output, but padded with non-React/failed + // sessions that scanned no files. Those must not change the weighted score. + const padded = [ + result("codex", "b", 10, [diagnostic("r1")]), + result("codex", "b", 10, [diagnostic("r1")]), + ...Array.from({ length: 8 }, () => result("codex", "b", 0, [])), + ]; + const aggregated = await aggregateStats([...productive, ...padded], null, stubScore); + const productiveGroup = aggregated.models.find((group) => group.key === "claude/a"); + const paddedGroup = aggregated.models.find((group) => group.key === "codex/b"); + expect(productiveGroup?.score).toBe(paddedGroup?.score); + expect(productiveGroup?.weightedScore).toBe(paddedGroup?.weightedScore); + // The reported session count still reflects every analyzed session. + expect(paddedGroup?.sessions).toBe(10); + }); + + it("leaves the score null when a group lacks enough files to rank fairly", async () => { + const results = [result("claude", "m1", 1, [diagnostic("r1")])]; + let called = false; + const aggregated = await aggregateStats(results, null, async () => { + called = true; + return { score: 0, label: "x" }; + }); + expect(called).toBe(false); + expect(aggregated.models).toEqual([]); + }); +}); diff --git a/packages/react-doctor/tests/stats-apply-patch.test.ts b/packages/react-doctor/tests/stats-apply-patch.test.ts new file mode 100644 index 000000000..fd178ddfb --- /dev/null +++ b/packages/react-doctor/tests/stats-apply-patch.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from "vite-plus/test"; +import { applyUpdateHunks, parseApplyPatch } from "../src/stats/parse-apply-patch.js"; + +describe("parseApplyPatch", () => { + it("parses Add, Update, and Delete ops from one envelope", () => { + const patch = [ + "*** Begin Patch", + "*** Add File: a.ts", + "+export const a = 1;", + "*** Update File: b.ts", + "@@", + " keep", + "-old", + "+new", + "*** Delete File: c.ts", + "*** End Patch", + ].join("\n"); + const ops = parseApplyPatch(patch); + expect(ops).toHaveLength(3); + expect(ops[0]).toEqual({ type: "add", path: "a.ts", addedLines: ["export const a = 1;"] }); + expect(ops[1].type).toBe("update"); + expect(ops[1].path).toBe("b.ts"); + expect(ops[2]).toEqual({ type: "delete", path: "c.ts" }); + }); + + it("captures a Move to directive on an update", () => { + const patch = [ + "*** Begin Patch", + "*** Update File: old.ts", + "*** Move to: new.ts", + "@@", + "+x", + "*** End Patch", + ].join("\n"); + const ops = parseApplyPatch(patch); + expect(ops[0].movePath).toBe("new.ts"); + }); + + it("returns nothing for a patch with no file headers", () => { + expect(parseApplyPatch("not a patch")).toEqual([]); + }); +}); + +describe("applyUpdateHunks", () => { + it("applies context / add / remove against a base via line search", () => { + const base = "line one\nline two\nline three\n"; + const result = applyUpdateHunks(base, [ + "@@", + " line one", + "-line two", + "+line 2", + " line three", + ]); + expect(result).toBe("line one\nline 2\nline three\n"); + }); + + it("returns null when a context line is not found in the base", () => { + expect(applyUpdateHunks("a\nb\n", ["@@", " missing", "+x"])).toBeNull(); + }); +}); diff --git a/packages/react-doctor/tests/stats-is-react-source.test.ts b/packages/react-doctor/tests/stats-is-react-source.test.ts new file mode 100644 index 000000000..ad63749da --- /dev/null +++ b/packages/react-doctor/tests/stats-is-react-source.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from "vite-plus/test"; +import { isReactSourceFile } from "../src/stats/is-react-source.js"; + +describe("isReactSourceFile", () => { + it("treats JSX extensions as React regardless of content", () => { + expect(isReactSourceFile("/repo/src/App.tsx", "export const App = () => null;")).toBe(true); + expect(isReactSourceFile("/repo/src/widget.jsx", "module.exports = {};")).toBe(true); + }); + + it("detects React via direct and ecosystem imports in .ts/.js files", () => { + expect(isReactSourceFile("/repo/src/useThing.ts", 'import { useState } from "react";')).toBe( + true, + ); + expect( + isReactSourceFile("/repo/src/data.ts", 'import { useQuery } from "@tanstack/react-query";'), + ).toBe(true); + expect( + isReactSourceFile( + "/repo/src/nav.ts", + 'import { useNavigation } from "@react-navigation/native";', + ), + ).toBe(true); + expect(isReactSourceFile("/repo/src/page.ts", 'const r = require("react-dom/server");')).toBe( + true, + ); + }); + + it("detects React Server Component / server-action directives", () => { + expect( + isReactSourceFile("/repo/src/actions.ts", '"use server";\nexport async function go() {}'), + ).toBe(true); + expect(isReactSourceFile("/repo/src/client.ts", "'use client'\nexport const x = 1;")).toBe( + true, + ); + }); + + it("rejects plain backend / util / config files", () => { + expect(isReactSourceFile("/repo/src/math.ts", "export const add = (a, b) => a + b;")).toBe( + false, + ); + expect( + isReactSourceFile( + "/repo/server/db.ts", + 'import { Pool } from "pg";\nexport const pool = new Pool();', + ), + ).toBe(false); + expect(isReactSourceFile("/repo/scripts/build.js", 'const fs = require("node:fs");')).toBe( + false, + ); + }); + + it("does not mistake unrelated specifiers containing other words for React", () => { + expect(isReactSourceFile("/repo/src/a.ts", 'import x from "reactor-core";')).toBe(false); + expect(isReactSourceFile("/repo/src/b.ts", 'import y from "overreact";')).toBe(false); + }); +}); diff --git a/packages/react-doctor/tests/stats-reconstruct.test.ts b/packages/react-doctor/tests/stats-reconstruct.test.ts new file mode 100644 index 000000000..7e14b3fe8 --- /dev/null +++ b/packages/react-doctor/tests/stats-reconstruct.test.ts @@ -0,0 +1,151 @@ +import * as path from "node:path"; +import { describe, expect, it } from "vite-plus/test"; +import { reconstructSession, resolveEditPaths } from "../src/stats/reconstruct-files.js"; +import type { AgentSession, FileEdit } from "../src/stats/types.js"; + +const CWD = "/repo"; + +const session = (overrides: Partial): AgentSession => ({ + provider: "claude", + sessionId: "s1", + transcriptPath: "/tmp/s1.jsonl", + model: "test-model", + cwd: CWD, + edits: [], + reads: [], + ...overrides, +}); + +// Mirror reconstruct-files.ts' resolveAgainstCwd so expectations match the +// platform-normalized paths the reconstruction emits (backslashes on Windows). +const resolved = (rawPath: string): string => + path.isAbsolute(rawPath) ? path.normalize(rawPath) : path.resolve(CWD, rawPath); + +const byPath = (files: ReadonlyArray<{ absolutePath: string; content: string }>) => + new Map(files.map((file) => [file.absolutePath, file.content])); + +describe("reconstructSession", () => { + it("uses Claude post-edit result content as the authoritative final state", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/a.ts", content: "export const a = 0;\n" }, + { kind: "write", path: "/repo/src/a.ts", resultContent: "export const a = 1;\n" }, + ]; + const result = reconstructSession(session({ edits })); + expect(byPath(result.files).get(resolved("/repo/src/a.ts"))).toBe("export const a = 1;\n"); + expect(result.unreconstructable).toEqual([]); + }); + + it("replays a Cursor write then StrReplace into final content", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/b.ts", content: "const x = 1;\n" }, + { kind: "replace", path: "/repo/src/b.ts", oldString: "1", newString: "2" }, + ]; + const result = reconstructSession(session({ provider: "cursor", edits })); + expect(byPath(result.files).get(resolved("/repo/src/b.ts"))).toBe("const x = 2;\n"); + }); + + it("flags a StrReplace with no in-session base as unreconstructable", () => { + const edits: FileEdit[] = [ + { kind: "replace", path: "/repo/src/c.ts", oldString: "a", newString: "b" }, + ]; + const result = reconstructSession(session({ provider: "cursor", edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([resolved("/repo/src/c.ts")]); + }); + + it("drops a StrReplace whose oldString is absent from the base rather than linting stale content", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/r.ts", content: "const x = 1;\n" }, + { kind: "replace", path: "/repo/src/r.ts", oldString: "does-not-exist", newString: "y" }, + ]; + const result = reconstructSession(session({ provider: "cursor", edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([resolved("/repo/src/r.ts")]); + }); + + it("reconstructs a Codex apply_patch Add File", () => { + const patch = + "*** Begin Patch\n*** Add File: /repo/src/d.ts\n+export const d = 1;\n*** End Patch"; + const result = reconstructSession( + session({ provider: "codex", edits: [{ kind: "patch", path: "", patch }] }), + ); + expect(byPath(result.files).get(resolved("/repo/src/d.ts"))).toBe("export const d = 1;\n"); + }); + + it("applies a Codex apply_patch Update File on an in-session base", () => { + const add = + "*** Begin Patch\n*** Add File: /repo/src/e.ts\n+const value = 1;\n+export default value;\n*** End Patch"; + const update = + "*** Begin Patch\n*** Update File: /repo/src/e.ts\n@@\n-const value = 1;\n+const value = 2;\n export default value;\n*** End Patch"; + const result = reconstructSession( + session({ + provider: "codex", + edits: [ + { kind: "patch", path: "", patch: add }, + { kind: "patch", path: "", patch: update }, + ], + }), + ); + expect(byPath(result.files).get(resolved("/repo/src/e.ts"))).toBe( + "const value = 2;\nexport default value;\n", + ); + }); + + it("flags an apply_patch Update whose hunk does not match the base as unreconstructable", () => { + const add = "*** Begin Patch\n*** Add File: /repo/src/h.ts\n+const value = 1;\n*** End Patch"; + const update = + "*** Begin Patch\n*** Update File: /repo/src/h.ts\n@@\n-const value = 999;\n+const value = 2;\n*** End Patch"; + const result = reconstructSession( + session({ + provider: "codex", + edits: [ + { kind: "patch", path: "", patch: add }, + { kind: "patch", path: "", patch: update }, + ], + }), + ); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([resolved("/repo/src/h.ts")]); + }); + + it("resolves relative edit paths against the session cwd", () => { + const edits: FileEdit[] = [{ kind: "write", path: "src/f.ts", content: "export {};\n" }]; + const result = reconstructSession(session({ edits })); + expect(result.files.map((file) => file.absolutePath)).toEqual([resolved("src/f.ts")]); + }); + + it("ignores files outside the lintable extension allowlist", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/README.md", content: "# hi\n" }, + { kind: "replace", path: "/repo/notes.md", oldString: "x", newString: "y" }, + ]; + const result = reconstructSession(session({ edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([]); + }); + + it("drops deleted files from both output and the coverage gap list", () => { + const edits: FileEdit[] = [ + { kind: "write", path: "/repo/src/g.ts", content: "export {};\n" }, + { kind: "delete", path: "/repo/src/g.ts" }, + ]; + const result = reconstructSession(session({ edits })); + expect(result.files).toEqual([]); + expect(result.unreconstructable).toEqual([]); + }); +}); + +describe("resolveEditPaths", () => { + it("collects absolute paths from plain edits and apply_patch envelopes", () => { + const patch = "*** Begin Patch\n*** Update File: /repo/src/x.ts\n@@\n+x\n*** End Patch"; + const result = resolveEditPaths( + session({ + edits: [ + { kind: "write", path: "src/y.ts", content: "" }, + { kind: "patch", path: "", patch }, + ], + }), + ); + expect(new Set(result)).toEqual(new Set([resolved("src/y.ts"), resolved("/repo/src/x.ts")])); + }); +}); diff --git a/packages/react-doctor/tests/stats-render.test.ts b/packages/react-doctor/tests/stats-render.test.ts new file mode 100644 index 000000000..97dc17aa2 --- /dev/null +++ b/packages/react-doctor/tests/stats-render.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from "vite-plus/test"; +import { renderStatsReport } from "../src/stats/render-stats.js"; +import type { GroupStats, StatsReport } from "../src/stats/types.js"; + +const group = (overrides: Partial): GroupStats => ({ + key: "claude/m1", + provider: "claude", + sessions: 1, + filesScanned: 4, + unreconstructable: 0, + totalDiagnostics: 2, + errorCount: 0, + warningCount: 2, + diagnosticsPerFile: 0.5, + score: 90, + scoreLabel: "good", + weightedScore: 88, + topRules: [], + ...overrides, +}); + +const report = (overrides: Partial): StatsReport => ({ + scope: "repo", + directory: "/repo", + models: [], + providers: [], + best: null, + worst: null, + sessionsAnalyzed: 0, + sessionsRanked: 0, + sessionsNonReact: 0, + sessionsUnreconstructable: 0, + generatedAt: "2026-06-20T00:00:00.000Z", + ...overrides, +}); + +describe("renderStatsReport", () => { + it("renders a model leaderboard with the best/worst callout", () => { + const best = group({ key: "claude/opus", score: 95 }); + const worst = group({ key: "codex/gpt", provider: "codex", score: 60, diagnosticsPerFile: 2 }); + const output = renderStatsReport( + report({ + models: [best, worst], + providers: [best], + best, + worst, + sessionsAnalyzed: 2, + sessionsRanked: 2, + }), + ); + expect(output).toContain("React Doctor leaderboard"); + expect(output).toContain("Which agent writes the cleanest React code"); + expect(output).toContain("opus"); + expect(output).toContain("gpt"); + expect(output).toContain("Best"); + expect(output).toContain("Worst"); + }); + + it("shows a friendly message when there is nothing to rank", () => { + const output = renderStatsReport(report({ sessionsAnalyzed: 3 })); + expect(output).toContain("Nothing to rank yet"); + }); + + it("notes non-React sessions separately from unreplayable ones", () => { + const only = group({}); + const output = renderStatsReport( + report({ + models: [only], + providers: [only], + best: only, + sessionsAnalyzed: 5, + sessionsRanked: 1, + sessionsNonReact: 3, + sessionsUnreconstructable: 1, + }), + ); + expect(output).toContain("Skipped 3 that changed only non-React files"); + expect(output).toContain("Skipped 1 that used edits we could not replay"); + }); + + it("appends the community leaderboard with sample sizes when one is supplied", () => { + const only = group({ key: "claude/opus" }); + const output = renderStatsReport(report({ models: [only], providers: [only], best: only }), { + generatedAt: "2026-06-22T00:00:00.000Z", + models: [{ model: "opus", harness: "claude", communityScore: 81, runs: 42, files: 900 }], + }); + expect(output).toContain("Community leaderboard (all react-doctor users)"); + expect(output).toContain("81"); + expect(output).toContain("42"); + }); + + it("omits the community section when no board is supplied (offline / --no-telemetry)", () => { + const only = group({}); + const output = renderStatsReport(report({ models: [only], providers: [only], best: only })); + expect(output).not.toContain("Community leaderboard"); + }); +}); diff --git a/packages/react-doctor/tests/stats-report-run.test.ts b/packages/react-doctor/tests/stats-report-run.test.ts new file mode 100644 index 000000000..50d84112e --- /dev/null +++ b/packages/react-doctor/tests/stats-report-run.test.ts @@ -0,0 +1,132 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { reportStatsRun } from "../src/stats/report-stats-run.js"; +import type { GroupStats, StatsReport } from "../src/stats/types.js"; + +const group = (overrides: Partial): GroupStats => ({ + key: "claude/claude-sonnet-4-5", + provider: "claude", + sessions: 2, + filesScanned: 8, + unreconstructable: 0, + totalDiagnostics: 3, + errorCount: 1, + warningCount: 2, + diagnosticsPerFile: 0.375, + score: 90, + scoreLabel: "Great", + weightedScore: 84, + // topRules carries rule messages — must NEVER reach the wire payload. + topRules: [{ rule: "react-doctor/no-array-index-key", count: 3 }], + ...overrides, +}); + +const report = (models: GroupStats[]): StatsReport => ({ + scope: "repo", + directory: "/repo", + models, + providers: [group({ key: "claude", provider: "claude" })], + best: models[0] ?? null, + worst: null, + sessionsAnalyzed: 4, + sessionsRanked: 2, + sessionsNonReact: 1, + sessionsUnreconstructable: 0, + generatedAt: "2026-06-22T00:00:00.000Z", +}); + +const stubFetch = (impl: typeof fetch): void => { + vi.stubGlobal("fetch", vi.fn(impl)); +}; + +const decodeBody = (body: BodyInit | null | undefined): unknown => JSON.parse(body as string); + +describe("reportStatsRun", () => { + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + }); + + it("sends only the four code-free leaderboard fields per row — no source, paths, or identity", async () => { + let captured: unknown; + stubFetch(async (_url, init) => { + captured = decodeBody(init?.body); + return new Response(JSON.stringify({ stored: true }), { status: 200 }); + }); + + await reportStatsRun(report([group({})])); + + expect(captured).toEqual({ + schemaVersion: 1, + models: [{ model: "claude-sonnet-4-5", harness: "claude", score: 84, files: 8 }], + }); + // Belt-and-suspenders: the serialized body must carry none of the leaky fields. + const serialized = JSON.stringify(captured); + for (const leak of [ + "topRules", + "message", + "help", + "filePath", + "repo", + "sha", + "directory", + "no-array-index-key", + ]) { + expect(serialized).not.toContain(leak); + } + }); + + it("sends plain JSON (no gzip) and returns the parsed community leaderboard", async () => { + let encoding: string | undefined; + stubFetch(async (_url, init) => { + encoding = new Headers(init?.headers).get("content-encoding") ?? undefined; + return new Response( + JSON.stringify({ + stored: true, + community: { + generatedAt: "2026-06-22T00:00:00.000Z", + models: [ + { + model: "claude-sonnet-4-5", + harness: "claude", + communityScore: 81, + runs: 42, + files: 900, + }, + ], + }, + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ); + }); + + const community = await reportStatsRun(report([group({})])); + + expect(encoding).toBeUndefined(); + expect(community?.models[0]).toEqual({ + model: "claude-sonnet-4-5", + harness: "claude", + communityScore: 81, + runs: 42, + files: 900, + }); + }); + + it("returns null (never throws) when the API is unreachable", async () => { + vi.spyOn(console, "warn").mockImplementation(() => {}); + stubFetch(async () => { + throw new Error("network unavailable"); + }); + expect(await reportStatsRun(report([group({})]))).toBeNull(); + }); + + it("returns null on a non-2xx response", async () => { + vi.spyOn(console, "warn").mockImplementation(() => {}); + stubFetch(async () => new Response("boom", { status: 500, statusText: "Server Error" })); + expect(await reportStatsRun(report([group({})]))).toBeNull(); + }); + + it("returns null when the response omits a community board", async () => { + stubFetch(async () => new Response(JSON.stringify({ stored: true }), { status: 200 })); + expect(await reportStatsRun(report([group({})]))).toBeNull(); + }); +}); diff --git a/packages/react-doctor/tests/stats-trace.test.ts b/packages/react-doctor/tests/stats-trace.test.ts new file mode 100644 index 000000000..7e93377d9 --- /dev/null +++ b/packages/react-doctor/tests/stats-trace.test.ts @@ -0,0 +1,84 @@ +import { describe, expect, it } from "vite-plus/test"; +import { + buildStatsRowAttributes, + recordStatsLeaderboard, +} from "../src/cli/utils/with-sentry-stats-span.js"; +import { toLeaderboardRow } from "../src/stats/leaderboard-row.js"; +import type { GroupStats } from "../src/stats/types.js"; + +const group = (overrides: Partial): GroupStats => ({ + key: "claude/claude-sonnet-4-5", + provider: "claude", + sessions: 1, + filesScanned: 4, + unreconstructable: 0, + totalDiagnostics: 2, + errorCount: 0, + warningCount: 2, + diagnosticsPerFile: 0.5, + score: 90, + scoreLabel: "good", + weightedScore: 88, + topRules: [], + ...overrides, +}); + +describe("buildStatsRowAttributes", () => { + it("projects the four leaderboard columns, stripping the provider prefix from the model", () => { + expect(buildStatsRowAttributes(group({}))).toEqual({ + "stats.model": "claude-sonnet-4-5", + "stats.harness": "claude", + "stats.score": 88, + "stats.files": 4, + }); + }); + + it("ranks on the confidence-weighted score, not the raw score", () => { + expect(buildStatsRowAttributes(group({ score: 90, weightedScore: 72 }))["stats.score"]).toBe( + 72, + ); + }); + + it("drops an undersampled (null) score rather than coercing it to a string", () => { + expect(buildStatsRowAttributes(group({ weightedScore: null }))).not.toHaveProperty( + "stats.score", + ); + }); + + it("passes a provider-only key (no slash) through as the model name", () => { + expect(buildStatsRowAttributes(group({ key: "codex", provider: "codex" }))["stats.model"]).toBe( + "codex", + ); + }); + + it("emits only the four leaderboard attribute keys — never code, paths, or identity", () => { + expect(Object.keys(buildStatsRowAttributes(group({}))).sort()).toEqual([ + "stats.files", + "stats.harness", + "stats.model", + "stats.score", + ]); + }); + + it("derives the span attributes from the same projection the /api/stats payload uses (no drift)", () => { + const sample = group({ + key: "cursor/composer-2.5", + provider: "cursor", + weightedScore: 67, + filesScanned: 12, + }); + const row = toLeaderboardRow(sample); + expect(buildStatsRowAttributes(sample)).toEqual({ + "stats.model": row.model, + "stats.harness": row.harness, + "stats.score": row.score, + "stats.files": row.files, + }); + }); +}); + +describe("recordStatsLeaderboard", () => { + it("is a no-op when the run is not traced (no root span)", () => { + expect(recordStatsLeaderboard([group({})], undefined)).toBeUndefined(); + }); +});