millionco · aidenybai · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/.changeset/stats-agent-leaderboard.md b/.changeset/stats-agent-leaderboard.md
@@ -0,0 +1,15 @@
+---
+"react-doctor": patch
+---
+
+Add a `react-doctor stats` subcommand — a per-model code-quality leaderboard built from local AI agent chat history.
+
+`stats` reads local agent history — Claude Code (`~/.claude`) and Codex (`~/.codex`) transcripts, plus Cursor's GUI composer databases and CLI agent stores (`~/.cursor`, `~/.cursor-nightly`) — reconstructs the file content each model actually wrote (Claude post-edit snapshots, Cursor full post-edit file snapshots, Codex `apply_patch` envelopes), lints that content with the existing engine, and ranks models and providers by their React Doctor score and diagnostics-per-file. The job: answer "which agent/model writes the cleanest React code in my repo".
+
+- Only the React code each model wrote is scored. Reconstructed files are filtered to actual React (JSX/TSX, `use client`/`use server` directives, or a React-ecosystem import) before linting, so a model's plain backend/util/config files don't pad its file count or dilute its diagnostics-per-file. A scan that errors, is skipped, or whose lint phase fails is dropped rather than counted as zero-diagnostic "clean" code, so un-lintable output can't inflate a model's score.
+- Ranking is by a confidence-weighted score, not the raw score: each group's score is regressed toward the global mean by its evidence, so a model with a handful of clean files can't top the board on a tiny sample. Files are the dominant signal; sessions only lightly discount the file weight (many files from one session are one correlated sample) and never below a floor.
+- Cursor is read from every place it stores chats: the GUI composer database (`state.vscdb`) for both the stable and Nightly builds, and the CLI agent's per-session stores under `~/.cursor` and `~/.cursor-nightly`. Each session carries its real model (e.g. `claude-opus-4-8`, `gpt-5.5`, `composer-2.5`) and a faithful reconstruction of every edited file (full GUI post-edit snapshots; CLI `Write`/`ApplyPatch`/`StrReplace`/`Delete` tool calls replayed against captured reads). A database a running editor holds locked is read via SQLite's `immutable` mode rather than skipped. Attribution falls back to `unknown` only for GUI chats left on the "Auto" model.
+- Default scope is the current repository (sessions whose cwd or edits touch the repo root); `--global` ranks across every repo on the machine. `--since`, `--limit`, and `--provider` bound the work.
+- `--json` emits a structured leaderboard (`{ schemaVersion, scope, models, providers, best, worst, … }`); the terminal output shows the top models and per-tool tables with a single score bar (the confidence-weighted score) and a best/worst callout.
+- Coverage is honest about its limits: Codex shell-based edits are not faithfully reconstructable (surfaced as skipped), reading any Cursor database requires `node:sqlite` (Node 22.13+), and the score requires network access.
+- Anonymized Sentry tracing (CLI only, same gating as the scan path — off under `--no-score`, in tests, and for the programmatic API): each run is one `cli.stats` trace with a discover/scan/aggregate latency waterfall, and every ranked model is a queryable `stats.leaderboard_row` span carrying its model, harness, confidence-weighted score, and files scored — so the leaderboard is sliceable in Sentry's Trace Explorer.
diff --git a/.github/workflows/publish-any-commit.yml b/.github/workflows/publish-any-commit.yml
@@ -40,7 +40,8 @@ jobs:
             if pnpm dlx pkg-pr-new publish \
               ./packages/react-doctor \
               ./packages/oxlint-plugin-react-doctor \
-              ./packages/eslint-plugin-react-doctor; then
+              ./packages/eslint-plugin-react-doctor \
+              ./packages/deslop-js; then
               exit 0
             fi
 

diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
@@ -81,6 +81,12 @@ export const SCORE_BAR_WIDTH_CHARS = 50;
 
 export const SCORE_API_URL = "https://www.react.doctor/api/score";
 
+// Sink for the `react-doctor stats` leaderboard rows: the CLI POSTs the same
+// code-free `{model, harness, score, files}` rows it reports to Sentry, and the
+// endpoint stores them and returns the community leaderboard. Overridable for
+// local e2e via `REACT_DOCTOR_STATS_API_URL` (read in the CLI client, not here).
+export const STATS_API_URL = "https://www.react.doctor/api/stats";
+
 export const ENTERPRISE_CONTACT_URL = "https://react.doctor/enterprise";
 
 export const SHARE_BASE_URL = "https://react.doctor/share";

diff --git a/packages/core/src/highlighter.ts b/packages/core/src/highlighter.ts
@@ -1,12 +1,21 @@
 import pc from "picocolors";
 
+// picocolors only ships the 16-color palette, so orange (Claude's brand) is a
+// 256-color escape built by hand. Honors color-disabled by returning the input.
+const ORANGE_ANSI_CODE = 208;
+const makeOrange =
+  (enabled: boolean): ((input: string | number) => string) =>
+  (input) =>
+    enabled ? `\u001b[38;5;${ORANGE_ANSI_CODE}m${input}\u001b[39m` : String(input);
+
 export const highlighter = {
   error: pc.red,
   warn: pc.yellow,
   info: pc.cyan,
   success: pc.green,
   dim: pc.dim,
   gray: pc.gray,
+  orange: makeOrange(pc.isColorSupported),
   bold: pc.bold,
 };
 
@@ -27,5 +36,6 @@ export const setColorEnabled = (enabled: boolean): void => {
   highlighter.success = colors.green;
   highlighter.dim = colors.dim;
   highlighter.gray = colors.gray;
+  highlighter.orange = makeOrange(enabled);
   highlighter.bold = colors.bold;
 };
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -88,6 +88,7 @@ export * from "./utils/define-config.js";
 export * from "./utils/group-by.js";
 export * from "./utils/has-published-fix-recipe.js";
 export * from "./utils/is-large-minified-file.js";
+export * from "./utils/is-path-inside.js";
 export * from "./utils/list-source-files.js";
 export * from "./utils/map-with-concurrency.js";
 export * from "./utils/match-glob-pattern.js";

diff --git a/packages/core/src/materialize-source-tree.ts b/packages/core/src/materialize-source-tree.ts
@@ -3,24 +3,14 @@ import fs from "node:fs";
 import path from "node:path";
 import { STAGED_FILES_PROJECT_CONFIG_FILENAMES } from "./constants.js";
 import type { ReactDoctorError } from "./errors.js";
+import { isPathInside } from "./utils/is-path-inside.js";
 
 export interface MaterializedTree {
   readonly tempDirectory: string;
   readonly materializedFiles: ReadonlyArray<string>;
   readonly cleanup: () => void;
 }
 
-/**
- * Zip-Slip defense: relative paths come from git (`diff --name-only`), which
- * normalizes during ordinary adds, but a crafted index/pack/symlinked tree can
- * smuggle `..` segments that escape the temp root. Resolve against the temp dir
- * and reject anything that lands outside before writing.
- */
-const isPathInsideDirectory = (childAbsolutePath: string, parentAbsolutePath: string): boolean => {
-  const relative = path.relative(parentAbsolutePath, childAbsolutePath);
-  return Boolean(relative) && !relative.startsWith("..") && !path.isAbsolute(relative);
-};
-
 /**
  * Writes a set of source files (supplied by `readContent` — e.g.
  * `git show <ref>:<path>` for a baseline tree, or `git show :<path>` for the
@@ -44,7 +34,7 @@ export const materializeSourceTree = (input: {
       const content = yield* input.readContent(relativePath).pipe(Effect.orElseSucceed(() => null));
       if (content === null) continue;
       const candidateTargetPath = path.resolve(resolvedTempDirectory, relativePath);
-      if (!isPathInsideDirectory(candidateTargetPath, resolvedTempDirectory)) continue;
+      if (!isPathInside(candidateTargetPath, resolvedTempDirectory)) continue;
       yield* Effect.sync(() => {
         fs.mkdirSync(path.dirname(candidateTargetPath), { recursive: true });
         fs.writeFileSync(candidateTargetPath, content);

diff --git a/packages/core/src/utils/is-path-inside.ts b/packages/core/src/utils/is-path-inside.ts
@@ -0,0 +1,28 @@
+import * as path from "node:path";
+
+export interface IsPathInsideOptions {
+  /** When `true`, `childPath` equal to `parentPath` counts as inside. */
+  readonly allowSame?: boolean;
+}
+
+/**
+ * `true` when `childPath` resolves within `parentPath`. By default the parent
+ * directory itself does not count (the strict zip-slip guard); pass
+ * `allowSame: true` to treat an exact match as inside (scope membership).
+ *
+ * Zip-Slip defense: relative paths can arrive from untrusted sources — a
+ * crafted git index/pack/symlinked tree, or a reconstructed agent transcript —
+ * and smuggle `..` segments that escape a temp root. Resolve against the parent
+ * and reject anything that lands outside before writing. This is the one
+ * audited copy of that guard, shared across the staged/baseline scan paths and
+ * the stats reconstruction tree so the two cannot drift.
+ */
+export const isPathInside = (
+  childPath: string,
+  parentPath: string,
+  options: IsPathInsideOptions = {},
+): boolean => {
+  const relative = path.relative(parentPath, childPath);
+  if (!relative) return Boolean(options.allowSame);
+  return !relative.startsWith("..") && !path.isAbsolute(relative);
+};
diff --git a/packages/react-doctor/src/cli/commands/stats.ts b/packages/react-doctor/src/cli/commands/stats.ts
@@ -0,0 +1,186 @@
+import * as path from "node:path";
+import { resolveScanTarget, type ReactDoctorConfig } from "@react-doctor/core";
+import { aggregateStats } from "../../stats/aggregate-stats.js";
+import { STATS_DEFAULT_SESSION_LIMIT } from "../../stats/constants.js";
+import { discoverSessions } from "../../stats/discover-sessions.js";
+import { renderStatsReport } from "../../stats/render-stats.js";
+import { reportStatsRun } from "../../stats/report-stats-run.js";
+import { runStatsScan } from "../../stats/run-stats-scan.js";
+import type {
+  CommunityLeaderboard,
+  StatsProvider,
+  StatsReport,
+  StatsScopeOptions,
+} from "../../stats/types.js";
+import { METRIC } from "../utils/constants.js";
+import { enableJsonMode } from "../utils/json-mode.js";
+import { recordCount } from "../utils/record-metric.js";
+import { spinner } from "../utils/spinner.js";
+import {
+  recordStatsLeaderboard,
+  traceStatsPhase,
+  withSentryStatsSpan,
+} from "../utils/with-sentry-stats-span.js";
+
+export interface StatsFlags {
+  global?: boolean;
+  since?: string;
+  limit?: string;
+  provider?: string;
+  json?: boolean;
+  cwd?: string;
+  // Commander negations from the root program: `--no-score` → `score: false`,
+  // `--no-telemetry` → `telemetry: false`. Both opt out of the network.
+  score?: boolean;
+  telemetry?: boolean;
+}
+
+const VALID_PROVIDERS = new Set<string>(["claude", "codex", "cursor"]);
+
+const isStatsProvider = (value: string): value is StatsProvider => VALID_PROVIDERS.has(value);
+
+const parseProvider = (value: string | undefined): StatsProvider | undefined => {
+  if (value === undefined) return undefined;
+  if (!isStatsProvider(value)) {
+    throw new Error(`Unknown provider "${value}". Expected one of: claude, codex, cursor.`);
+  }
+  return value;
+};
+
+const parseSince = (value: string | undefined): Date | undefined => {
+  if (value === undefined) return undefined;
+  const parsed = new Date(value);
+  if (Number.isNaN(parsed.getTime())) {
+    throw new Error(`Invalid --since date "${value}". Use e.g. 2026-06-01.`);
+  }
+  return parsed;
+};
+
+const parseLimit = (value: string | undefined): number => {
+  if (value === undefined) return STATS_DEFAULT_SESSION_LIMIT;
+  const parsed = Number.parseInt(value, 10);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    throw new Error(`Invalid --limit "${value}". Use a positive integer, e.g. 200.`);
+  }
+  return parsed;
+};
+
+const resolveTarget = async (
+  directory: string,
+): Promise<{ root: string; userConfig: ReactDoctorConfig | null }> => {
+  try {
+    const target = await resolveScanTarget(directory);
+    return { root: target.resolvedDirectory, userConfig: target.userConfig };
+  } catch {
+    return { root: path.resolve(directory), userConfig: null };
+  }
+};
+
+export const statsAction = async (flags: StatsFlags): Promise<void> => {
+  const directory = flags.cwd ?? process.cwd();
+  // Register JSON mode up front so any throw (flag parsing, scan, or score API
+  // failure) is emitted as a structured JSON error by the top-level handler
+  // instead of plain text — and so incidental logs (e.g. a score-API warning)
+  // never corrupt the report on stdout.
+  if (flags.json) enableJsonMode({ compact: false, directory });
+  const scope: StatsScopeOptions = {
+    global: flags.global ?? false,
+    since: parseSince(flags.since),
+    limit: parseLimit(flags.limit),
+    provider: parseProvider(flags.provider),
+  };
+
+  const { root, userConfig } = await resolveTarget(directory);
+
+  // `--no-score` / `--no-telemetry` (or `noScore` in config) opt out of the
+  // network entirely — same signal `resolve-cli-inspect-options` uses. When off,
+  // we skip the score API (scores show n/a, ranked by diagnostics-per-file) and
+  // the `/api/stats` report, so a `--no-telemetry` run is fully local.
+  const telemetryEnabled = !(
+    flags.score === false ||
+    flags.telemetry === false ||
+    Boolean(userConfig?.noScore)
+  );
+
+  // ora renders to stderr; suppress it in JSON mode so the run stays quiet.
+  // The whole run is one Sentry trace: each phase below is a child span, and
+  // every ranked model becomes a queryable leaderboard-row span.
+  const { report, community } = await withSentryStatsSpan<{
+    report: StatsReport;
+    community: CommunityLeaderboard | null;
+  }>(async (rootSpan) => {
+    const progress = flags.json ? null : spinner("Looking through your agent history…").start();
+    try {
+      const sessions = await traceStatsPhase("discover sessions", () =>
+        discoverSessions(root, scope, (foundCount) =>
+          progress?.update(`Looking through your agent history… (${foundCount} found)`),
+        ),
+      );
+      progress?.update("Checking the code each agent wrote…");
+      const results = await traceStatsPhase("scan sessions", () =>
+        runStatsScan(sessions, scope.global ? null : root, {
+          onProgress: (completedCount, totalCount) =>
+            progress?.update(
+              `Checking the code each agent wrote… (${completedCount}/${totalCount})`,
+            ),
+        }),
+      );
+      progress?.update(telemetryEnabled ? "Scoring…" : "Ranking…");
+      const aggregated = await traceStatsPhase("aggregate + score", () =>
+        // Skip the score API when telemetry is off: a null scorer leaves every
+        // score null, and ranking falls back to diagnostics-per-file.
+        aggregateStats(
+          results,
+          userConfig,
+          telemetryEnabled ? undefined : () => Promise.resolve(null),
+        ),
+      );
+
+      const built: StatsReport = {
+        scope: scope.global ? "global" : "repo",
+        directory: root,
+        models: aggregated.models,
+        providers: aggregated.providers,
+        best: aggregated.best,
+        worst: aggregated.worst,
+        sessionsAnalyzed: results.length,
+        sessionsRanked: results.filter((result) => result.filesScanned > 0).length,
+        sessionsNonReact: results.filter(
+          (result) => result.filesScanned === 0 && result.reconstructedFiles > 0,
+        ).length,
+        sessionsUnreconstructable: results.filter(
+          (result) =>
+            result.filesScanned === 0 &&
+            result.reconstructedFiles === 0 &&
+            result.unreconstructable > 0,
+        ).length,
+        generatedAt: new Date().toISOString(),
+      };
+      recordStatsLeaderboard(built.models, rootSpan);
+      // Send the same leaderboard rows to our own store and get the community
+      // board back. Best-effort and telemetry-gated; never blocks the result.
+      progress?.update("Comparing with the community…");
+      const communityBoard = telemetryEnabled
+        ? await traceStatsPhase("report leaderboard", () => reportStatsRun(built))
+        : null;
+      progress?.succeed("Done.");
+      return { report: built, community: communityBoard };
+    } finally {
+      progress?.stop();
+    }
+  });
+
+  recordCount(METRIC.statsRun, 1, {
+    scope: report.scope,
+    sessions: report.sessionsAnalyzed,
+    providers: report.providers.length,
+    provider: scope.provider ?? "all",
+  });
+
+  if (flags.json) {
+    process.stdout.write(`${JSON.stringify({ schemaVersion: 1, ...report }, null, 2)}\n`);
+    return;
+  }
+
+  process.stdout.write(`${renderStatsReport(report, community)}\n`);
+};