From a7d5a6947044976f0a76814f13c314504727c4d1 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 01:10:39 -0700
Subject: [PATCH 01/38] feat(react): /react-doctor umbrella skill, in-house
 browser core, and debug/perf runtime jobs

Adds the @react-doctor/browser package (CDP attach-and-launch over playwright-core:
open, eval, snapshot, screenshot, axe-core audit, console/network capture, LoAF-based
perf with per-script attribution, single-load report, viewport emulation) and the
React DevTools profiler harness injected as a document-start init script
(window.__REACT_PERF__), the @react-doctor/debug NDJSON logging server for the debug
job (per-project lock/log scoping, session dedup, daemon/json modes), the `browser`
and `debug serve` CLI commands, and the expanded /react-doctor skill (perf, debug,
design references) installed to both skills/ and .agents/skills/.
---
 .agents/skills/react-doctor/SKILL.md          |  71 +++-
 .../skills/react-doctor/references/debug.md   |  87 +++++
 .../skills/react-doctor/references/design.md  |  52 +++
 .../skills/react-doctor/references/explain.md |  51 ++-
 .../react-doctor/references/performance.md    |  55 +++
 .changeset/react-browser-debug-skill.md       |   5 +
 packages/browser/package.json                 |  32 ++
 packages/browser/src/connect.ts               |  45 +++
 packages/browser/src/constants.ts             |  50 +++
 packages/browser/src/index.ts                 |   4 +
 packages/browser/src/launch.ts                | 101 ++++++
 packages/browser/src/perf-observer.ts         | 116 ++++++
 .../devtools/collect-profiling-export.ts      |  63 ++++
 .../devtools/install-backend.ts               |  14 +
 .../browser/src/react-profiler/harness.ts     |  30 ++
 packages/browser/src/react-profiler/inject.ts |   6 +
 .../types/devtools-inline-backend.d.ts        |   5 +
 .../react-profiler/types/profiling-export.ts  |  49 +++
 .../react-profiler/types/react-devtools.ts    |  22 ++
 packages/browser/src/session.ts               | 329 ++++++++++++++++++
 packages/browser/src/types.ts                 |  62 ++++
 packages/browser/src/utils/cdp-port.ts        |  11 +
 packages/browser/src/utils/delay.ts           |   2 +
 .../browser/src/utils/is-loopback-endpoint.ts |   8 +
 packages/browser/tests/cdp-port.test.ts       |  14 +
 packages/browser/tests/connect.test.ts        |  11 +
 .../collect-profiling-export.test.ts          |  78 +++++
 packages/browser/tsconfig.json                |  12 +
 packages/browser/vite.config.ts               |  51 +++
 packages/debug/package.json                   |  26 ++
 packages/debug/src/constants.ts               |  23 ++
 packages/debug/src/index.ts                   |   3 +
 packages/debug/src/server.ts                  | 221 ++++++++++++
 packages/debug/src/types.ts                   |  38 ++
 packages/debug/src/utils/ping-server.ts       |  33 ++
 .../debug/src/utils/resolve-log-directory.ts  |  20 ++
 packages/debug/src/utils/server-lock.ts       |  38 ++
 packages/debug/tests/server.test.ts           |  71 ++++
 packages/debug/tsconfig.json                  |   9 +
 packages/debug/vite.config.ts                 |  18 +
 packages/react-doctor/package.json            |   5 +
 .../react-doctor/src/cli/commands/browser.ts  | 238 +++++++++++++
 .../react-doctor/src/cli/commands/debug.ts    | 110 ++++++
 packages/react-doctor/src/cli/index.ts        | 123 +++++++
 .../react-doctor/src/cli/utils/constants.ts   |   7 +
 .../src/cli/utils/parse-viewport.ts           |  17 +
 .../src/cli/utils/strip-unknown-cli-flags.ts  |  27 ++
 .../react-doctor/tests/parse-viewport.test.ts |  16 +
 .../tests/strip-unknown-cli-flags.test.ts     |  46 +++
 packages/react-doctor/vite.config.ts          |  26 ++
 pnpm-lock.yaml                                |  68 ++++
 skills/react-doctor/SKILL.md                  |  71 +++-
 skills/react-doctor/references/debug.md       |  87 +++++
 skills/react-doctor/references/design.md      |  52 +++
 skills/react-doctor/references/explain.md     |  49 ++-
 skills/react-doctor/references/performance.md |  55 +++
 56 files changed, 2854 insertions(+), 79 deletions(-)
 create mode 100644 .agents/skills/react-doctor/references/debug.md
 create mode 100644 .agents/skills/react-doctor/references/design.md
 create mode 100644 .agents/skills/react-doctor/references/performance.md
 create mode 100644 .changeset/react-browser-debug-skill.md
 create mode 100644 packages/browser/package.json
 create mode 100644 packages/browser/src/connect.ts
 create mode 100644 packages/browser/src/constants.ts
 create mode 100644 packages/browser/src/index.ts
 create mode 100644 packages/browser/src/launch.ts
 create mode 100644 packages/browser/src/perf-observer.ts
 create mode 100644 packages/browser/src/react-profiler/devtools/collect-profiling-export.ts
 create mode 100644 packages/browser/src/react-profiler/devtools/install-backend.ts
 create mode 100644 packages/browser/src/react-profiler/harness.ts
 create mode 100644 packages/browser/src/react-profiler/inject.ts
 create mode 100644 packages/browser/src/react-profiler/types/devtools-inline-backend.d.ts
 create mode 100644 packages/browser/src/react-profiler/types/profiling-export.ts
 create mode 100644 packages/browser/src/react-profiler/types/react-devtools.ts
 create mode 100644 packages/browser/src/session.ts
 create mode 100644 packages/browser/src/types.ts
 create mode 100644 packages/browser/src/utils/cdp-port.ts
 create mode 100644 packages/browser/src/utils/delay.ts
 create mode 100644 packages/browser/src/utils/is-loopback-endpoint.ts
 create mode 100644 packages/browser/tests/cdp-port.test.ts
 create mode 100644 packages/browser/tests/connect.test.ts
 create mode 100644 packages/browser/tests/react-profiler/collect-profiling-export.test.ts
 create mode 100644 packages/browser/tsconfig.json
 create mode 100644 packages/browser/vite.config.ts
 create mode 100644 packages/debug/package.json
 create mode 100644 packages/debug/src/constants.ts
 create mode 100644 packages/debug/src/index.ts
 create mode 100644 packages/debug/src/server.ts
 create mode 100644 packages/debug/src/types.ts
 create mode 100644 packages/debug/src/utils/ping-server.ts
 create mode 100644 packages/debug/src/utils/resolve-log-directory.ts
 create mode 100644 packages/debug/src/utils/server-lock.ts
 create mode 100644 packages/debug/tests/server.test.ts
 create mode 100644 packages/debug/tsconfig.json
 create mode 100644 packages/debug/vite.config.ts
 create mode 100644 packages/react-doctor/src/cli/commands/browser.ts
 create mode 100644 packages/react-doctor/src/cli/commands/debug.ts
 create mode 100644 packages/react-doctor/src/cli/utils/parse-viewport.ts
 create mode 100644 packages/react-doctor/tests/parse-viewport.test.ts
 create mode 100644 skills/react-doctor/references/debug.md
 create mode 100644 skills/react-doctor/references/design.md
 create mode 100644 skills/react-doctor/references/performance.md

diff --git a/.agents/skills/react-doctor/SKILL.md b/.agents/skills/react-doctor/SKILL.md
index 332a47f8b..e85b926fa 100644
--- a/.agents/skills/react-doctor/SKILL.md
+++ b/.agents/skills/react-doctor/SKILL.md
@@ -1,26 +1,61 @@
 ---
 name: react-doctor
-description: Use when finishing a feature, fixing a bug, before committing React code, or when the user types `/doctor`, asks to scan, triage, or clean up React diagnostics. Covers lint, accessibility, bundle size, architecture. Includes a regression check and a full local-triage workflow that fetches the canonical playbook.
-version: "1.2.0"
+description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
+version: "1.5.0"
 ---
 
 # React Doctor
 
-Scans React codebases for security, performance, correctness, and architecture issues. Outputs a 0–100 health score.
+One skill that makes your agent good at React. It writes better React by default, checks your changes in the background, and opens a real browser to profile performance, reproduce bugs, and review design.
 
-## After making React code changes:
+## Baseline rules (always on)
 
-Run `npx react-doctor@latest --verbose --scope changed` and check the score did not regress.
+Apply these on every React edit, before any tool runs. They shape how you write code, not only what you flag:
 
-If the score dropped, fix the regressions before committing.
+1. Derive state during render, don't duplicate it in another `useState`.
+2. Skip effects for values you can compute while rendering and for logic that belongs in an event handler.
+3. Compose components instead of piling on boolean props.
+4. Lift state only as far as it needs to go, no higher.
+5. Keep one source of truth for each piece of state.
+6. Render without side effects; keep the render pass pure.
+7. Use stable keys in lists, never the array index.
+8. Fetch independent data in parallel, not in a waterfall.
+9. Skip manual `useMemo`, `useCallback`, and `memo`; let the React Compiler handle it.
+10. Handle the loading, error, and empty states, not only the happy path.
 
-## For general cleanup or code improvement:
+## Routing
 
-Run `npx react-doctor@latest --verbose` (the default `--scope full`) to scan the full codebase. Fix issues by severity — errors first, then warnings.
+`/react-doctor` picks the job from what you're doing. Name a job (`/react-doctor perf`) to force it. When the request is genuinely unclear, ask which one rather than guessing.
 
-## /doctor — full local triage workflow
+| Signal                                                  | Job        | What it does                    |
+| ------------------------------------------------------- | ---------- | ------------------------------- |
+| "review", "before commit", "clean up", or changed files | **doctor** | static scan plus 0 to 100 score |
+| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React DevTools profiler harness |
+| "broken", "crashes", "doesn't work" in the UI           | **debug**  | reproduce in a real browser     |
+| "looks off", "polish", a screenshot or pasted element   | **design** | measured UI review              |
 
-When the user types `/doctor`, says "run react doctor", or asks for a full triage / cleanup pass (not just a regression check), fetch the canonical local-triage playbook and follow every step in it:
+doctor runs from code alone, so it is the one that fires in the background. The browser jobs (perf, debug, design) need a live page and are slower, so they run only when asked.
+
+## Which browser to drive
+
+debug, design, and perf need a real Chrome. Two ways to get one:
+
+1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
+2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, and `perf` (long animation frames with per-script attribution).
+
+It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
+
+## doctor: scan and triage
+
+After making React changes, run a regression check and confirm the score did not drop:
+
+```bash
+npx react-doctor@latest --verbose --scope changed
+```
+
+If the score dropped, fix the regressions before committing. For a cleanup of the whole codebase, drop `--scope changed` (the default is `--scope full`) and fix by severity: errors first, then warnings.
+
+When the user types `/react-doctor`, `/doctor`, says "run react doctor", or asks for a full triage or cleanup pass (not a regression check), fetch the canonical local-triage playbook and follow every step in it:
 
 ```bash
 curl --fail --silent --show-error \
@@ -28,13 +63,23 @@ curl --fail --silent --show-error \
   https://www.react.doctor/prompts/react-doctor-agent.md
 ```
 
-The playbook is the single source of truth — a scan → filter → triage → fix → validate loop that edits the working tree directly (never commits, never opens PRs). Updating the prompt at its source updates every agent on its next fetch — no skill reinstall needed.
+The playbook is the single source of truth: a scan, filter, triage, fix, validate loop that edits the working tree directly and never commits or opens PRs. Updating the prompt at its source updates every agent on its next fetch, no reinstall needed. Pair it with the per-rule prompts at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md` (fetched on demand inside the playbook) so each fix uses the reviewer-tested recipe.
+
+## perf: profile performance
+
+When the user reports jank, slow interactions, dropped frames, excessive re-renders, or asks to profile or optimize render performance, read [references/performance.md](references/performance.md) and follow it. It runs an evidence-driven profile, analyze, fix, re-profile loop against the real React DevTools profiler export, never guessing from code alone.
+
+## debug: reproduce in a real browser
+
+When the user says something is broken, crashes, throws, or behaves wrong in the running app, read [references/debug.md](references/debug.md) and follow it. It runs the [debug-agent](https://github.com/millionco/debug-agent) loop: generate hypotheses, instrument the code with runtime NDJSON logs, reproduce the bug in the live browser, and fix only once the logs prove the cause.
+
+## design: review and improve UI
 
-Pair it with the matching per-rule prompts at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md` (fetched on demand inside the playbook) so each fix uses the canonical, reviewer-tested recipe.
+When the user wants to build, polish, or review an interface ("looks off", "make this nicer", a pasted screenshot or element), read [references/design.md](references/design.md) and follow it. It opens the page, takes a screenshot, and reports what it can measure (contrast, line length, spacing, tap-target size), not only taste.
 
 ## Configuring or explaining rules
 
-When the user wants to understand a rule, disagrees with one, or wants to disable / tune which rules run (not fix code), read [references/explain.md](references/explain.md) and follow it. Start with `npx react-doctor@latest rules explain <rule>`, then apply the narrowest control via `npx react-doctor@latest rules disable|set|category|ignore-tag …`, which edits your `doctor.config.*` (or `package.json#reactDoctor`).
+When the user wants to understand a rule, disagrees with one, or wants to disable or tune which rules run (not fix code), read [references/explain.md](references/explain.md) and follow it. Start with `npx react-doctor@latest rules explain <rule>`, then apply the narrowest control via `npx react-doctor@latest rules disable|set|category|ignore-tag …`.
 
 ## Command
 
diff --git a/.agents/skills/react-doctor/references/debug.md b/.agents/skills/react-doctor/references/debug.md
new file mode 100644
index 000000000..afd5431e2
--- /dev/null
+++ b/.agents/skills/react-doctor/references/debug.md
@@ -0,0 +1,87 @@
+# Debugging with runtime evidence
+
+Reproduce and fix UI bugs with runtime evidence, never by guessing from code alone. Use this when the user says something is broken, crashes, throws, hangs, or behaves wrong in the running app.
+
+This is the [debug-agent](https://github.com/millionco/debug-agent) loop, built into React Doctor: hypothesize, instrument with logs, reproduce, analyze the logs, fix only once the logs prove the cause, verify, clean up.
+
+## 0. Start the logging server (before any instrumentation)
+
+The server is long-running. Start it once and keep it up for the whole session. `--daemon` prints the server info and returns, leaving the server running in the background:
+
+```bash
+npx react-doctor debug serve --daemon
+```
+
+It prints one JSON line. Capture and remember:
+
+- `endpoint`: POST your logs here from JS or TS at runtime
+- `logPath`: the NDJSON log file you read after each run
+- `sessionId`: include it in every log payload
+
+The server is idempotent: a second start returns the running server's info. If it fails to start, stop and tell the user. Do not instrument without it.
+
+## 1. Generate hypotheses
+
+Write 3 to 5 precise hypotheses about why the bug happens: a thrown error in a specific component, a failed or duplicated request, a null or undefined access, a state update after unmount, a missing loading or error branch. Aim for more, not fewer. Each hypothesis gets an id (A, B, C, …).
+
+## 2. Instrument the code
+
+Add 2 to 6 logs (never more than 10) at the points that confirm or reject each hypothesis: function entry and exit, values before and after a critical operation, which branch ran. In JS or TS, POST to the server `endpoint`:
+
+```js
+// #region debug log
+fetch("ENDPOINT", {
+  method: "POST",
+  headers: { "Content-Type": "application/json" },
+  body: JSON.stringify({
+    sessionId: "SESSION_ID",
+    hypothesisId: "A",
+    location: "cart.tsx:42",
+    message: "cart total before render",
+    data: { total },
+    timestamp: Date.now(),
+  }),
+}).catch(() => {});
+// #endregion
+```
+
+Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup later is deterministic. Each log maps to at least one `hypothesisId`. Never log secrets or PII.
+
+## 3. Reproduce
+
+Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
+
+- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser console` and `browser network` hand you the runtime console (with uncaught errors) and the request waterfall with failures flagged, often the evidence you need before instrumenting at all. To get the whole picture in one pass, `browser report` captures console, network, performance, and accessibility in a single page load instead of reloading once per command; prefer it over running the four separately. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+
+```bash
+npx react-doctor browser open http://localhost:3000           # attach + open the page
+npx react-doctor browser report http://localhost:3000         # console + network + perf + a11y in one load
+npx react-doctor browser console http://localhost:3000        # console output + uncaught errors
+npx react-doctor browser network http://localhost:3000        # request waterfall, failures flagged
+npx react-doctor browser snapshot                             # what rendered, by role + name
+npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()'
+npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
+```
+
+`snapshot` and `eval` are a pair. `snapshot` lists the rendered elements by role and accessible name. `eval` runs an expression with the Playwright `page` in scope, so you act on what you saw using Playwright's own selectors: `page.locator("text=Login").click()`, `page.getByRole(...)`, `page.fill(...)`, `page.waitForSelector(...)`. For raw DOM, reach through `page.evaluate(() => …)`. No separate ref scheme to track.
+
+- **Backend or CLI bugs:** write and run a small repro script (Node, shell) yourself.
+- Otherwise ask the user for numbered steps, and remind them to restart any app or service whose instrumented files are bundled or cached.
+
+Reuse the same repro pathway for every iteration.
+
+## 4. Analyze the logs
+
+Read the NDJSON at `logPath`. Mark each hypothesis CONFIRMED, REJECTED, or INCONCLUSIVE, citing the specific log lines. If the file is empty, the repro likely did not run the instrumented path, so try again. If every hypothesis is rejected, revert the rejected code changes, generate new hypotheses from a different subsystem, and add more instrumentation.
+
+## 5. Fix, only with proof
+
+Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in `SKILL.md` (derive don't duplicate, effects, single source of truth). Do not remove the instrumentation yet. Never use `setTimeout` or `sleep` as a fix.
+
+## 6. Verify
+
+Clear the log file, re-run the same reproduction (tag the logs `runId:"post-fix"` if helpful), and compare before and after with cited lines. Re-run a couple of times to rule out races. No fix is confirmed without log proof.
+
+## 7. Clean up
+
+Once verified, search every file for `#region debug log`, delete each block through its `#endregion`, grep again to confirm none remain, and `git diff` to confirm only the intentional fix is left.
diff --git a/.agents/skills/react-doctor/references/design.md b/.agents/skills/react-doctor/references/design.md
new file mode 100644
index 000000000..74f50928f
--- /dev/null
+++ b/.agents/skills/react-doctor/references/design.md
@@ -0,0 +1,52 @@
+# Reviewing and improving UI
+
+Improve interfaces with measured evidence from the rendered page, not taste alone. Use this when the user wants to build, polish, or review a UI: "looks off", "make this nicer", or a pasted screenshot.
+
+The value here is what a screenshot and the live DOM let you measure that reading code cannot: contrast ratios, line length, the spacing scale, and tap-target size. Lead with those, then apply craft.
+
+## Review against the live page
+
+```bash
+npx react-doctor browser open http://localhost:3000
+npx react-doctor browser screenshot --out review.png   # what the user actually sees
+npx react-doctor browser audit                          # axe-core: contrast, names, landmarks
+```
+
+Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, `audit`, or `perf`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
+
+```bash
+npx react-doctor browser screenshot --viewport 390x844 --out mobile.png
+```
+
+Look at the screenshot, then measure specifics with `eval` (computed styles, bounding boxes, color values) to get objective numbers rather than opinions:
+
+```bash
+npx react-doctor browser eval 'page.evaluate(() => getComputedStyle(document.querySelector("button")).fontSize)'
+```
+
+`browser audit` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
+
+## What to check
+
+Measured, in priority order:
+
+1. **Contrast**: body text at least 4.5:1, large text at least 3:1. Report the actual ratio.
+2. **Tap targets**: interactive elements at least 24 × 24 px (ideally 44 × 44 on touch).
+3. **Line length**: body copy roughly 45 to 75 characters per line.
+4. **Spacing**: spacing values come from one consistent scale, not ad-hoc px.
+
+Then craft, drawing on the bundled design rules:
+
+5. **Type**: one clear hierarchy; avoid default system-only stacks for brand surfaces; consistent line-height.
+6. **Color**: a committed palette, not arbitrary hexes; check both light and dark.
+7. **Layout**: alignment, rhythm, and a deliberate focal point.
+8. **State**: hover, focus-visible, disabled, loading, and empty states exist.
+
+## The loop
+
+Build or fix, screenshot, re-audit, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
+
+## Working rules
+
+- Always look at the screenshot; do not review UI from JSX alone.
+- Report measured findings with their numbers; keep taste suggestions short and clearly separate from the measured ones.
diff --git a/.agents/skills/react-doctor/references/explain.md b/.agents/skills/react-doctor/references/explain.md
index 722c6f642..18cd0cea2 100644
--- a/.agents/skills/react-doctor/references/explain.md
+++ b/.agents/skills/react-doctor/references/explain.md
@@ -1,15 +1,12 @@
 # Explaining and configuring rules
 
-Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user
-wants to understand a rule or change which rules run — not for fixing diagnostics
-(that is the main `react-doctor` skill / `/doctor`).
+Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user wants to understand a rule or change which rules run, not for fixing diagnostics (that is the main `react-doctor` skill, `/doctor`).
 
-Triggers: "why did this rule fire", "I disagree with this rule", "turn this rule off",
-"stop flagging X", "too noisy", "disable design rules".
+Triggers: "why did this rule fire", "I disagree with this rule", "turn this rule off", "stop flagging X", "too noisy", "disable design rules".
 
 ## Workflow
 
-1. Identify the rule key from the diagnostic (e.g. `react-doctor/no-array-index-as-key`).
+1. Identify the rule key from the diagnostic (for example `react-doctor/no-array-index-as-key`).
 2. Explain it before changing anything:
 
 ```bash
@@ -17,25 +14,25 @@ npx react-doctor@latest rules explain react-doctor/no-array-index-as-key
 ```
 
 3. Pick the narrowest control that matches the user's intent (see decision guide).
-4. Apply it with a `rules` subcommand (edits your `doctor.config.*` or `package.json#reactDoctor` in place, preserving other fields and formatting).
+4. Apply it with a `rules` subcommand. It edits your `doctor.config.*` or `package.json#reactDoctor` in place, preserving other fields and formatting.
 5. Validate the change did what they wanted:
 
 ```bash
-npx react-doctor@latest --verbose --scope changed
+npx react-doctor@latest --verbose --diff
 ```
 
 ## Commands
 
 ```bash
-npx react-doctor@latest rules list                         # every rule + its effective severity
-npx react-doctor@latest rules list --configured            # only what your config changed
-npx react-doctor@latest rules list --category Performance   # filter by category
-npx react-doctor@latest rules explain <rule>               # why it matters + how to configure
-npx react-doctor@latest rules disable <rule>               # rule never runs
-npx react-doctor@latest rules enable <rule>                # turn back on at its recommended severity
-npx react-doctor@latest rules set <rule> warn              # off | warn | error
-npx react-doctor@latest rules category "React Native" off   # whole category
-npx react-doctor@latest rules ignore-tag design            # skip a rule family (design, test-noise, …)
+npx react-doctor@latest rules list                          # every rule + its effective severity
+npx react-doctor@latest rules list --configured             # only what your config changed
+npx react-doctor@latest rules list --category Performance    # filter by category
+npx react-doctor@latest rules explain <rule>                # why it matters + how to configure
+npx react-doctor@latest rules disable <rule>                # rule never runs
+npx react-doctor@latest rules enable <rule>                 # turn back on at its recommended severity
+npx react-doctor@latest rules set <rule> warn               # off | warn | error
+npx react-doctor@latest rules category "React Native" off    # whole category
+npx react-doctor@latest rules ignore-tag design             # skip a rule family (design, test-noise, …)
 npx react-doctor@latest rules unignore-tag design
 ```
 
@@ -43,20 +40,20 @@ Rule references accept the full key (`react-doctor/no-danger`), the bare id (`no
 
 ## Decision guide
 
-Match the control to the intent — prefer the narrowest one:
+Match the control to the intent, and prefer the narrowest one:
 
-- **User disagrees with one rule / it's a false positive for them** → `rules disable <rule>` (sets `rules.<key> = "off"`; the rule stops running everywhere). This is the default for "I don't want this rule".
-- **Rule is fine but wrong severity** → `rules set <rule> warn` or `rules set <rule> error`.
-- **A disabled-by-default rule they want on** → `rules enable <rule>`.
-- **A whole area is unwanted** (e.g. all React Native rules) → `rules category "<Category>" off`.
-- **A behavioral family is noisy** (`design`, `test-noise`, `migration-hint`) → `rules ignore-tag <tag>`.
-- **Keep it locally but hide from PR comment / score / CI gate only** → do NOT disable. Edit `surfaces` in your config (`surfaces.prComment.excludeRules`, `surfaces.score.excludeTags`, `surfaces.ciFailure.excludeCategories`). The rule still shows in local `cli` output.
+- **User disagrees with one rule, or it is a false positive for them**: `rules disable <rule>` (sets `rules.<key> = "off"`; the rule stops running everywhere). This is the default for "I don't want this rule".
+- **Rule is fine but wrong severity**: `rules set <rule> warn` or `rules set <rule> error`.
+- **A disabled-by-default rule they want on**: `rules enable <rule>`.
+- **A whole area is unwanted** (for example all React Native rules): `rules category "<Category>" off`.
+- **A behavioral family is noisy** (`design`, `test-noise`, `migration-hint`): `rules ignore-tag <tag>`.
+- **Keep it locally but hide from PR comment, score, or CI gate only**: do not disable. Edit `surfaces` in your config (`surfaces.prComment.excludeRules`, `surfaces.score.excludeTags`, `surfaces.ciFailure.excludeCategories`). The rule still shows in local `cli` output.
 
-How the layers combine: `ignore.tags` disables every rule carrying that tag **before** linting, so a tagged rule stays off even if `rules`/`categories` set it to `warn`/`error` (a rule-level override cannot re-enable a tag-ignored rule). For rules that aren't tag-disabled, `rules` overrides `categories` overrides the rule's default. `surfaces` is visibility-only and never changes whether a rule runs.
+How the layers combine: `ignore.tags` disables every rule carrying that tag before linting, so a tagged rule stays off even if `rules` or `categories` set it to `warn` or `error` (a rule-level override cannot re-enable a tag-ignored rule). For rules that are not tag-disabled, `rules` overrides `categories` overrides the rule's default. `surfaces` is visibility-only and never changes whether a rule runs.
 
 ## Config shape
 
-Config lives in `doctor.config.ts` (or `.js`/`.mjs`/`.cjs`/`.json`/`.jsonc`), or the `reactDoctor` key in `package.json`. The `rules` commands edit whichever exists — TS/JS edits preserve formatting (via magicast) — and create `doctor.config.json` when none does, stamping `$schema`:
+Config lives in `doctor.config.ts` (or `.js`, `.mjs`, `.cjs`, `.json`, `.jsonc`), or the `reactDoctor` key in `package.json`. The `rules` commands edit whichever exists (TS and JS edits preserve formatting via magicast) and create `doctor.config.json` when none does, stamping `$schema`:
 
 ```ts
 // doctor.config.ts
@@ -69,4 +66,4 @@ export default {
 
 ## Educating the user
 
-When explaining a rule, lead with the "Why it matters" guidance from `rules explain` and, when they want depth, the per-rule recipe at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md`. Only after they understand it should you offer to disable it — many "bad" rules are catching real issues.
+When explaining a rule, lead with the "Why it matters" guidance from `rules explain` and, when they want depth, the per-rule recipe at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md`. Only after they understand it should you offer to disable it: many "bad" rules are catching real issues.
diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
new file mode 100644
index 000000000..e0dc85c79
--- /dev/null
+++ b/.agents/skills/react-doctor/references/performance.md
@@ -0,0 +1,55 @@
+# Performance engineering (runtime-evidence loop)
+
+Find and fix jank with runtime evidence, never code reading alone. The primary signal is the long animation frame (LoAF): a frame longer than 50 ms, captured with `PerformanceObserver` and attributed to the exact script that blocked it (its `sourceURL`, `sourceFunctionName`, and how much of that time was synchronous layout). That attribution is what `performance.now()` and reading code cannot give you. Use this when the user reports jank, dropped frames, janky scroll, slow click or typing response, poor INP, slow LCP, or layout shift, or asks to make something faster.
+
+Same discipline as [debug](./debug.md): hypothesize, capture, analyze the worst frame, fix the top evidence-backed cause, re-capture to verify, repeat. A change that does not make the offending script's frame time drop is not a fix.
+
+## 1. Hypothesize (3 to 5)
+
+Why is it slow, and where? Common React causes: unstable callback or object props, a missing `memo` or `useMemo`, a context provider that is too broad, large unvirtualized lists, expensive children re-rendering on every parent commit, or a sync layout read interleaved with writes (layout thrashing).
+
+## 2. Capture (no app changes)
+
+`browser perf` arms the LoAF, LCP, and CLS observers, loads the page, watches briefly past load, then reports the worst frames first with per-script attribution:
+
+```bash
+npx react-doctor browser perf http://localhost:3000   # measures the current page if URL omitted
+```
+
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The output leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
+
+To attribute interaction jank (a slow click, scroll, or keypress), drive the repro between load and the read: `browser open`, then `browser eval` the interaction, then `browser perf` with no URL. Without a URL it does not reload; it reads the long frames already buffered in the timeline, so the jank from your interaction is included.
+
+## 3. Analyze the worst frame first
+
+The output is already sorted worst-first. The script with the largest duration inside the worst frame is your culprit. If a script's sync-layout time is a large share of its duration, that is layout thrashing: sync reads (`offsetHeight`, `getBoundingClientRect`, `scrollTop`, `getComputedStyle`) interleaved with DOM writes. A minified `sourceURL` is meaningless on its own, so resolve it through your sourcemap. Cite the specific script when you conclude:
+
+> CONFIRMED: 128 ms frame, script `app.js` `drawSeries` ran 84 ms with 42 ms sync layout. The chart redraw forces layout inside the scroll handler.
+
+## 4. Zoom into React renders (optional)
+
+When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop:
+
+```bash
+npx react-doctor browser open http://localhost:3000
+```
+
+For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
+
+Drive it through `browser eval` (the Playwright `page` is in scope). `stop()` returns a JSON profiling export and resolves to `null` when nothing was recorded (a production React build records no profiling data):
+
+```bash
+npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
+# drive the exact repro with more `browser eval`: page.locator("...").click(), page.keyboard.type("...")
+npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
+```
+
+Aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates).
+
+## 5. Fix, only with proof
+
+Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in [`SKILL.md`](../SKILL.md) (derive don't duplicate, effects, single source of truth). Never fix by wrapping work in `setTimeout`: that defers the work to a later frame, it does not remove it.
+
+## 6. Verify
+
+Re-run the same capture and diff before and after: the offending frame and its script time must drop, and no other frame may regress. For the React profiler, re-run the scenario a few times and compare medians (dev timings are noisy; StrictMode double-renders on mount). Never claim a performance win without before-and-after evidence. The profiler leaves nothing behind in your app to clean up; it lives only in the injected browser session.
diff --git a/.changeset/react-browser-debug-skill.md b/.changeset/react-browser-debug-skill.md
new file mode 100644
index 000000000..ddb941b42
--- /dev/null
+++ b/.changeset/react-browser-debug-skill.md
@@ -0,0 +1,5 @@
+---
+"react-doctor": minor
+---
+
+Add the `browser` and `debug` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback) for accessibility audits, console/network capture, performance traces with React DevTools profiling, snapshots, and screenshots. `debug` runs an NDJSON logging server the debug job posts runtime evidence to.
diff --git a/packages/browser/package.json b/packages/browser/package.json
new file mode 100644
index 000000000..f2f9fa373
--- /dev/null
+++ b/packages/browser/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "@react-doctor/browser",
+  "version": "0.5.4",
+  "private": true,
+  "description": "Internal: React Doctor's browser driver. Attaches to a running Chrome over CDP (or launches one) and keeps the page open across commands, backing the debug and design jobs.",
+  "license": "MIT",
+  "type": "module",
+  "sideEffects": false,
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && cross-env NODE_ENV=production vp pack",
+    "typecheck": "tsc --noEmit",
+    "test": "vp test run"
+  },
+  "dependencies": {
+    "axe-core": "^4.10.2",
+    "playwright-core": "^1.49.1"
+  },
+  "devDependencies": {
+    "@types/node": "^25.6.0",
+    "esbuild": "^0.25.12",
+    "react-devtools-inline": "^6.1.5"
+  },
+  "engines": {
+    "node": "^20.19.0 || >=22.13.0"
+  }
+}
diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
new file mode 100644
index 000000000..52d19770c
--- /dev/null
+++ b/packages/browser/src/connect.ts
@@ -0,0 +1,45 @@
+import { chromium, type Browser } from "playwright-core";
+import { CONNECT_TIMEOUT_MS, DEFAULT_CDP_ENDPOINT } from "./constants.js";
+import { launchPersistentChrome } from "./launch.js";
+import type { BrowserConnectOptions } from "./types.js";
+import { cdpPortFromEndpoint } from "./utils/cdp-port.js";
+import { isLoopbackEndpoint } from "./utils/is-loopback-endpoint.js";
+
+export interface BrowserConnection {
+  browser: Browser;
+  launched: boolean;
+}
+
+// Attach to a debuggable Chrome over CDP. If none is reachable on a local
+// endpoint, launch our own persistent, reattachable instance and attach to
+// that. We always end up attached over CDP — never holding a launched process
+// handle — so the browser survives across commands, the model Chrome DevTools
+// MCP uses to keep state.
+export const connectToBrowser = async (
+  options: BrowserConnectOptions = {},
+): Promise<BrowserConnection> => {
+  const endpoint = options.cdpEndpoint ?? DEFAULT_CDP_ENDPOINT;
+  try {
+    const browser = await chromium.connectOverCDP(endpoint, { timeout: CONNECT_TIMEOUT_MS });
+    return { browser, launched: false };
+  } catch (attachError) {
+    // Only launch for a loopback endpoint — we can't spawn Chrome on a remote host.
+    if (options.launch === false || !isLoopbackEndpoint(endpoint)) {
+      throw new Error(
+        `Could not attach to Chrome at ${endpoint}. Start Chrome with --remote-debugging-port=${cdpPortFromEndpoint(endpoint)}, or allow launching a local browser.`,
+        { cause: attachError },
+      );
+    }
+    const reachableEndpoint = await launchPersistentChrome(endpoint);
+    try {
+      return {
+        browser: await chromium.connectOverCDP(reachableEndpoint, { timeout: CONNECT_TIMEOUT_MS }),
+        launched: true,
+      };
+    } catch (launchedAttachError) {
+      throw new Error(`Launched Chrome at ${reachableEndpoint} but could not attach to it.`, {
+        cause: launchedAttachError,
+      });
+    }
+  }
+};
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
new file mode 100644
index 000000000..b19da031e
--- /dev/null
+++ b/packages/browser/src/constants.ts
@@ -0,0 +1,50 @@
+import { homedir } from "node:os";
+import { join } from "node:path";
+
+// Default Chrome DevTools Protocol endpoint. A user opts their browser in by
+// launching Chrome with `--remote-debugging-port=9222`; we attach to that.
+export const DEFAULT_CDP_PORT = 9222;
+export const DEFAULT_CDP_ENDPOINT = `http://127.0.0.1:${DEFAULT_CDP_PORT}`;
+
+// How long to wait for a CDP attach before falling back to launching Chrome.
+export const CONNECT_TIMEOUT_MS = 5_000;
+
+// How long a single page navigation may take before we give up.
+export const NAVIGATION_TIMEOUT_MS = 30_000;
+
+// Upper bound on waiting for the page to settle (network quiet + fonts) before
+// reading or screenshotting it. Best-effort: a page that never goes idle (long
+// polling, analytics) hits this cap and we proceed anyway.
+export const SETTLE_TIMEOUT_MS = 10_000;
+
+// Dedicated Chrome profile for the browser we launch ourselves. Mirrors how
+// Chrome DevTools MCP keeps a persistent profile out of the user's real one, so
+// our launched instance is reattachable across commands and never touches their
+// main browsing data. (Chrome also refuses --remote-debugging-port on the
+// default profile, so a dedicated dir is required regardless.)
+export const LAUNCHED_CHROME_PROFILE_DIRECTORY = join(
+  homedir(),
+  ".cache",
+  "react-doctor",
+  "chrome-profile",
+);
+
+// How long to wait for a freshly launched Chrome to expose its CDP endpoint,
+// and how often to poll for it.
+export const LAUNCH_READY_TIMEOUT_MS = 20_000;
+export const LAUNCH_POLL_INTERVAL_MS = 100;
+
+// After the page settles, keep watching for long animation frames this long so
+// post-load jank (hydration, late effects) is captured, not just the load burst.
+export const PERFORMANCE_OBSERVE_WINDOW_MS = 1_000;
+
+// Failing element selectors kept per accessibility violation — enough to locate
+// the problem without dumping every match on a busy page.
+export const MAX_VIOLATION_TARGETS = 5;
+
+// Built React-profiler init script, relative to the bundle that imports it.
+// `react-profiler/inject.ts` is esbuilt into this self-contained IIFE at build
+// time (see vite.config.ts); the session injects it via `addInitScript`. The
+// path stays valid whether `dist/index.js` runs standalone or is re-bundled
+// into the CLI, because the build copies the asset next to each output.
+export const REACT_PROFILER_INJECT_FILE = "inject/react-profiler.global.js";
diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts
new file mode 100644
index 000000000..b01289f9b
--- /dev/null
+++ b/packages/browser/src/index.ts
@@ -0,0 +1,4 @@
+export { BrowserSession } from "./session.js";
+export { connectToBrowser } from "./connect.js";
+export type { BrowserConnection } from "./connect.js";
+export type * from "./types.js";
diff --git a/packages/browser/src/launch.ts b/packages/browser/src/launch.ts
new file mode 100644
index 000000000..693e91a70
--- /dev/null
+++ b/packages/browser/src/launch.ts
@@ -0,0 +1,101 @@
+import { spawn } from "node:child_process";
+import { existsSync } from "node:fs";
+import {
+  LAUNCH_POLL_INTERVAL_MS,
+  LAUNCH_READY_TIMEOUT_MS,
+  LAUNCHED_CHROME_PROFILE_DIRECTORY,
+} from "./constants.js";
+import { cdpPortFromEndpoint } from "./utils/cdp-port.js";
+import { delay } from "./utils/delay.js";
+
+const chromeExecutableCandidates = (): readonly string[] => {
+  switch (process.platform) {
+    case "darwin":
+      return [
+        "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+        "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
+      ];
+    case "win32":
+      return [
+        `${process.env.PROGRAMFILES ?? "C:\\Program Files"}\\Google\\Chrome\\Application\\chrome.exe`,
+        `${process.env["PROGRAMFILES(X86)"] ?? "C:\\Program Files (x86)"}\\Google\\Chrome\\Application\\chrome.exe`,
+        `${process.env.LOCALAPPDATA ?? ""}\\Google\\Chrome\\Application\\chrome.exe`,
+      ];
+    default:
+      return [
+        "/usr/bin/google-chrome",
+        "/usr/bin/google-chrome-stable",
+        "/opt/google/chrome/chrome",
+        "/usr/bin/chromium",
+        "/usr/bin/chromium-browser",
+      ];
+  }
+};
+
+const resolveChromeExecutable = (): string => {
+  const candidates = [process.env.CHROME_PATH, ...chromeExecutableCandidates()];
+  const executable = candidates.find(
+    (candidate): candidate is string => typeof candidate === "string" && existsSync(candidate),
+  );
+  if (!executable) {
+    throw new Error(
+      "Could not find Google Chrome to launch. Install Chrome, set CHROME_PATH, or start Chrome with --remote-debugging-port and pass --cdp to attach to it.",
+    );
+  }
+  return executable;
+};
+
+// Chrome may bind the debug port on IPv4 or IPv6 depending on the host stack —
+// notably it falls back to [::1] when 127.0.0.1 is already taken — so probe both
+// loopback forms of the endpoint.
+const loopbackVariants = (endpoint: string): readonly string[] => {
+  const variants = new Set<string>([endpoint]);
+  const url = new URL(endpoint);
+  if (url.hostname === "127.0.0.1" || url.hostname === "localhost") {
+    url.hostname = "::1";
+    variants.add(url.origin);
+  }
+  return [...variants];
+};
+
+// Returns the loopback form that actually responded so the caller attaches to
+// the right one.
+const waitForCdpEndpoint = async (endpoint: string): Promise<string> => {
+  const candidates = loopbackVariants(endpoint);
+  const deadline = Date.now() + LAUNCH_READY_TIMEOUT_MS;
+  while (Date.now() < deadline) {
+    for (const candidate of candidates) {
+      try {
+        const response = await fetch(new URL("/json/version", candidate));
+        if (response.ok) return candidate;
+      } catch {}
+    }
+    await delay(LAUNCH_POLL_INTERVAL_MS);
+  }
+  throw new Error(`Launched Chrome but it never exposed its debugger at ${endpoint}.`);
+};
+
+// Detached and unref'd on success so the browser outlives this process and the
+// next `browser` command reattaches over CDP — the persistent model Chrome
+// DevTools MCP uses to keep state across calls.
+export const launchPersistentChrome = async (endpoint: string): Promise<string> => {
+  const executable = resolveChromeExecutable();
+  const args = [
+    `--remote-debugging-port=${cdpPortFromEndpoint(endpoint)}`,
+    `--user-data-dir=${LAUNCHED_CHROME_PROFILE_DIRECTORY}`,
+    "--no-first-run",
+    "--no-default-browser-check",
+  ];
+
+  const child = spawn(executable, args, { detached: true, stdio: "ignore" });
+  // HACK: swallow async spawn errors (e.g. a non-executable binary) so they
+  // don't crash the CLI as an uncaught exception; waitForCdpEndpoint surfaces
+  // the failure as an actionable timeout instead.
+  child.on("error", () => {});
+  const reachableEndpoint = await waitForCdpEndpoint(endpoint).catch((error: unknown) => {
+    child.kill();
+    throw error;
+  });
+  child.unref();
+  return reachableEndpoint;
+};
diff --git a/packages/browser/src/perf-observer.ts b/packages/browser/src/perf-observer.ts
new file mode 100644
index 000000000..1fd83fac4
--- /dev/null
+++ b/packages/browser/src/perf-observer.ts
@@ -0,0 +1,116 @@
+import type { PerformanceReport } from "./types.js";
+
+// Runs in the page (via evaluate) and resolves after `windowMs`. Installs fresh
+// LoAF / LCP / CLS observers with `buffered: true`, so frames already in the
+// performance timeline (a load just navigated to, or an interaction a previous
+// command drove) are replayed immediately, while the window catches anything
+// that fires next. A reload resets the timeline, so a fresh-load measurement
+// always starts clean. For repeated no-reload measurements on the persistent
+// page, `buffered: true` would otherwise replay — and re-count — every frame
+// from earlier runs, inflating LoAF rows and CLS. So we keep a per-page
+// watermark of the latest entry `startTime` already counted (per type) and skip
+// anything at or below it: the first run after an interaction still captures its
+// frames, a second run sees only what fired since. LoAF fields are not in
+// lib.dom, so the casts here are unavoidable.
+export const collectPerformanceReport = (windowMs: number): Promise<PerformanceReport> => {
+  interface ScriptTiming {
+    sourceURL?: string;
+    sourceFunctionName?: string;
+    invokerType?: string;
+    duration?: number;
+    forcedStyleAndLayoutDuration?: number;
+  }
+  interface LongAnimationFrameEntry {
+    startTime: number;
+    duration: number;
+    blockingDuration?: number;
+    scripts?: ScriptTiming[];
+  }
+  interface LayoutShiftEntry {
+    value: number;
+    hadRecentInput: boolean;
+  }
+  interface MutableReport {
+    longAnimationFrames: PerformanceReport["longAnimationFrames"];
+    largestContentfulPaintMs: number | null;
+    cumulativeLayoutShift: number;
+  }
+
+  interface CountedEntryWatermark {
+    longAnimationFrame: number;
+    layoutShift: number;
+  }
+  const WATERMARK_KEY = "__REACT_DOCTOR_PERF_WATERMARK__";
+
+  return new Promise<PerformanceReport>((resolve) => {
+    const report: MutableReport = {
+      longAnimationFrames: [],
+      largestContentfulPaintMs: null,
+      cumulativeLayoutShift: 0,
+    };
+
+    // Persisted on the page so it survives across no-reload measurements (and is
+    // wiped by a navigation, which is exactly when we want a clean slate).
+    const windowScope = window as unknown as Record<string, CountedEntryWatermark | undefined>;
+    const previousWatermark: CountedEntryWatermark = windowScope[WATERMARK_KEY] ?? {
+      longAnimationFrame: -1,
+      layoutShift: -1,
+    };
+    const nextWatermark: CountedEntryWatermark = { ...previousWatermark };
+
+    const observers: PerformanceObserver[] = [];
+    const observe = (type: string, onEntry: (entry: PerformanceEntry) => void): void => {
+      try {
+        const observer = new PerformanceObserver((list) => {
+          for (const entry of list.getEntries()) onEntry(entry);
+        });
+        observer.observe({ type, buffered: true });
+        observers.push(observer);
+      } catch {}
+    };
+
+    observe("long-animation-frame", (entry) => {
+      if (entry.startTime <= previousWatermark.longAnimationFrame) return;
+      nextWatermark.longAnimationFrame = Math.max(
+        nextWatermark.longAnimationFrame,
+        entry.startTime,
+      );
+      const longAnimationFrame = entry as unknown as LongAnimationFrameEntry;
+      report.longAnimationFrames.push({
+        startTimeMs: Math.round(longAnimationFrame.startTime),
+        durationMs: Math.round(longAnimationFrame.duration),
+        blockingDurationMs: Math.round(longAnimationFrame.blockingDuration ?? 0),
+        scripts: (longAnimationFrame.scripts ?? []).map((scriptTiming) => ({
+          sourceUrl: scriptTiming.sourceURL ?? "",
+          sourceFunctionName: scriptTiming.sourceFunctionName ?? "",
+          invokerType: scriptTiming.invokerType ?? "",
+          durationMs: Math.round(scriptTiming.duration ?? 0),
+          forcedStyleAndLayoutMs: Math.round(scriptTiming.forcedStyleAndLayoutDuration ?? 0),
+        })),
+      });
+    });
+
+    observe("largest-contentful-paint", (entry) => {
+      report.largestContentfulPaintMs = Math.round(entry.startTime);
+    });
+
+    observe("layout-shift", (entry) => {
+      if (entry.startTime <= previousWatermark.layoutShift) return;
+      nextWatermark.layoutShift = Math.max(nextWatermark.layoutShift, entry.startTime);
+      const layoutShift = entry as unknown as LayoutShiftEntry;
+      if (!layoutShift.hadRecentInput) report.cumulativeLayoutShift += layoutShift.value;
+    });
+
+    setTimeout(() => {
+      for (const observer of observers) observer.disconnect();
+      windowScope[WATERMARK_KEY] = nextWatermark;
+      resolve({
+        longAnimationFrames: report.longAnimationFrames.sort(
+          (left, right) => right.durationMs - left.durationMs,
+        ),
+        largestContentfulPaintMs: report.largestContentfulPaintMs,
+        cumulativeLayoutShift: Math.round(report.cumulativeLayoutShift * 1000) / 1000,
+      });
+    }, windowMs);
+  });
+};
diff --git a/packages/browser/src/react-profiler/devtools/collect-profiling-export.ts b/packages/browser/src/react-profiler/devtools/collect-profiling-export.ts
new file mode 100644
index 000000000..84debf5e7
--- /dev/null
+++ b/packages/browser/src/react-profiler/devtools/collect-profiling-export.ts
@@ -0,0 +1,63 @@
+import type {
+  ReactProfilerDataExport,
+  ReactProfilerDataForRootExport,
+  ReactProfilerRootDataBackend,
+} from "../types/profiling-export.js";
+import type { DevtoolsGlobal, ReactRendererInterface } from "../types/react-devtools.js";
+
+export const PROFILING_EXPORT_VERSION = 5;
+
+const collectFiberIds = (root: ReactProfilerRootDataBackend): Set<number> => {
+  const fiberIds = new Set<number>();
+  for (const [fiberId] of root.initialTreeBaseDurations) fiberIds.add(fiberId);
+  for (const commit of root.commitData) {
+    for (const [fiberId] of commit.fiberActualDurations) fiberIds.add(fiberId);
+    for (const [fiberId] of commit.fiberSelfDurations) fiberIds.add(fiberId);
+    for (const [fiberId] of commit.changeDescriptions ?? []) fiberIds.add(fiberId);
+  }
+  return fiberIds;
+};
+
+// Skipped when the renderer can't resolve names (older React), leaving raw ids.
+const resolveElementNames = (
+  renderer: ReactRendererInterface,
+  root: ReactProfilerRootDataBackend,
+): Array<[number, string]> => {
+  const resolve = renderer.getDisplayNameForElementID;
+  if (!resolve) return [];
+  const elementNames: Array<[number, string]> = [];
+  for (const fiberId of collectFiberIds(root)) {
+    const name = resolve(fiberId);
+    if (name) elementNames.push([fiberId, name]);
+  }
+  return elementNames;
+};
+
+// Returns null when no renderer is attached or no commits were recorded (e.g. a
+// production React build), so `stop()` resolves with null rather than an empty
+// object.
+export const collectProfilingExport = (target: DevtoolsGlobal): ReactProfilerDataExport | null => {
+  const renderers = target.__REACT_DEVTOOLS_GLOBAL_HOOK__?.rendererInterfaces;
+  if (!renderers || renderers.size === 0) return null;
+
+  const dataForRoots: Array<ReactProfilerDataForRootExport> = [];
+  for (const renderer of renderers.values()) {
+    renderer.stopProfiling();
+    // A renderer that attached after `start()` (lazy/code-split root) was never
+    // profiled, and `getProfilingData` throws for it — skip it rather than lose
+    // every other renderer's data.
+    let roots: ReactProfilerRootDataBackend[];
+    try {
+      roots = renderer.getProfilingData().dataForRoots;
+    } catch {
+      continue;
+    }
+    for (const root of roots) {
+      dataForRoots.push({ ...root, elementNames: resolveElementNames(renderer, root) });
+    }
+  }
+
+  const hasCommits = dataForRoots.some((root) => root.commitData.length > 0);
+  if (!hasCommits) return null;
+  return { version: PROFILING_EXPORT_VERSION, dataForRoots };
+};
diff --git a/packages/browser/src/react-profiler/devtools/install-backend.ts b/packages/browser/src/react-profiler/devtools/install-backend.ts
new file mode 100644
index 000000000..9b0cde05e
--- /dev/null
+++ b/packages/browser/src/react-profiler/devtools/install-backend.ts
@@ -0,0 +1,14 @@
+import { initialize as initializeBackend } from "react-devtools-inline/backend";
+import type { DevtoolsGlobal } from "../types/react-devtools.js";
+
+// Track per target, not a single boolean, so a second global (iframe/worker)
+// still gets its own hook.
+const installedTargets = new WeakSet<DevtoolsGlobal>();
+
+// MUST run before React loads, otherwise React never connects to the hook and
+// no commits are recorded.
+export const installReactDevtoolsBackend = (target: DevtoolsGlobal = globalThis): void => {
+  if (installedTargets.has(target)) return;
+  initializeBackend(target);
+  installedTargets.add(target);
+};
diff --git a/packages/browser/src/react-profiler/harness.ts b/packages/browser/src/react-profiler/harness.ts
new file mode 100644
index 000000000..0c356fb70
--- /dev/null
+++ b/packages/browser/src/react-profiler/harness.ts
@@ -0,0 +1,30 @@
+import { collectProfilingExport } from "./devtools/collect-profiling-export.js";
+import { installReactDevtoolsBackend } from "./devtools/install-backend.js";
+import type { ReactProfilerDataExport } from "./types/profiling-export.js";
+import type { DevtoolsGlobal } from "./types/react-devtools.js";
+
+export interface ReactPerfHarness {
+  start: () => void;
+  stop: () => Promise<ReactProfilerDataExport | null>;
+}
+
+declare global {
+  // eslint-disable-next-line no-var, vars-on-top
+  var __REACT_PERF__: ReactPerfHarness | undefined;
+}
+
+export const createReactPerfHarness = (target: DevtoolsGlobal = globalThis): ReactPerfHarness => {
+  installReactDevtoolsBackend(target);
+
+  const harness: ReactPerfHarness = {
+    start: () => {
+      const renderers = target.__REACT_DEVTOOLS_GLOBAL_HOOK__?.rendererInterfaces;
+      if (!renderers) return;
+      for (const renderer of renderers.values()) renderer.startProfiling(true);
+    },
+    stop: () => Promise.resolve(collectProfilingExport(target)),
+  };
+
+  target.__REACT_PERF__ = harness;
+  return harness;
+};
diff --git a/packages/browser/src/react-profiler/inject.ts b/packages/browser/src/react-profiler/inject.ts
new file mode 100644
index 000000000..e60aa2e5c
--- /dev/null
+++ b/packages/browser/src/react-profiler/inject.ts
@@ -0,0 +1,6 @@
+import { createReactPerfHarness } from "./harness.js";
+
+// esbuilt into the IIFE the session injects via `addInitScript`, so it runs at
+// document-start — the only moment installing the DevTools hook lets React
+// attach to it.
+createReactPerfHarness();
diff --git a/packages/browser/src/react-profiler/types/devtools-inline-backend.d.ts b/packages/browser/src/react-profiler/types/devtools-inline-backend.d.ts
new file mode 100644
index 000000000..534c5a520
--- /dev/null
+++ b/packages/browser/src/react-profiler/types/devtools-inline-backend.d.ts
@@ -0,0 +1,5 @@
+// Types for `react-devtools-inline/backend`, which ships Flow source with no
+// TypeScript types. Wired in via tsconfig `paths`. We only use `initialize`,
+// which installs the DevTools hook; the renderer attaches itself to the hook
+// when React loads, and we drive that renderer interface directly.
+export const initialize: (windowOrGlobal: unknown) => void;
diff --git a/packages/browser/src/react-profiler/types/profiling-export.ts b/packages/browser/src/react-profiler/types/profiling-export.ts
new file mode 100644
index 000000000..090a22ccb
--- /dev/null
+++ b/packages/browser/src/react-profiler/types/profiling-export.ts
@@ -0,0 +1,49 @@
+import type { PROFILING_EXPORT_VERSION } from "../devtools/collect-profiling-export.js";
+
+export interface ReactProfilerChangeDescription {
+  context: Array<string> | boolean | null;
+  didHooksChange: boolean;
+  isFirstMount: boolean;
+  props: Array<string> | null;
+  state: Array<string> | null;
+  hooks?: Array<number> | null;
+}
+
+export interface ReactProfilerSerializedElement {
+  displayName: string | null;
+  id: number;
+  key: number | string | null;
+  type: number;
+}
+
+export interface ReactProfilerCommitDataExport {
+  changeDescriptions: Array<[number, ReactProfilerChangeDescription]> | null;
+  duration: number;
+  effectDuration: number | null;
+  fiberActualDurations: Array<[number, number]>;
+  fiberSelfDurations: Array<[number, number]>;
+  passiveEffectDuration: number | null;
+  priorityLevel: string | null;
+  timestamp: number;
+  updaters: Array<ReactProfilerSerializedElement> | null;
+}
+
+// One profiled root, exactly as the renderer's `getProfilingData` returns it.
+export interface ReactProfilerRootDataBackend {
+  rootID: number;
+  displayName: string;
+  commitData: Array<ReactProfilerCommitDataExport>;
+  initialTreeBaseDurations: Array<[number, number]>;
+}
+
+// The renderer's per-root data plus `elementNames`, the `fiberID → component
+// name` map an agent needs to read the durations and change descriptions
+// (which key everything by fiber id).
+export interface ReactProfilerDataForRootExport extends ReactProfilerRootDataBackend {
+  elementNames: Array<[number, string]>;
+}
+
+export interface ReactProfilerDataExport {
+  version: typeof PROFILING_EXPORT_VERSION;
+  dataForRoots: Array<ReactProfilerDataForRootExport>;
+}
diff --git a/packages/browser/src/react-profiler/types/react-devtools.ts b/packages/browser/src/react-profiler/types/react-devtools.ts
new file mode 100644
index 000000000..c0f7958b3
--- /dev/null
+++ b/packages/browser/src/react-profiler/types/react-devtools.ts
@@ -0,0 +1,22 @@
+import type { ReactProfilerRootDataBackend } from "./profiling-export.js";
+
+export type DevtoolsGlobal = typeof globalThis;
+
+// The subset of React DevTools' RendererInterface we drive directly, bypassing
+// the frontend Store/bridge/wall. `getDisplayNameForElementID` is absent on
+// older React.
+export interface ReactRendererInterface {
+  startProfiling: (recordChangeDescriptions: boolean) => void;
+  stopProfiling: () => void;
+  getProfilingData: () => { dataForRoots: Array<ReactProfilerRootDataBackend> };
+  getDisplayNameForElementID?: (id: number) => string | null;
+}
+
+export interface ReactDevtoolsHook {
+  rendererInterfaces?: Map<number, ReactRendererInterface>;
+}
+
+declare global {
+  // eslint-disable-next-line no-var, vars-on-top
+  var __REACT_DEVTOOLS_GLOBAL_HOOK__: ReactDevtoolsHook | undefined;
+}
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
new file mode 100644
index 000000000..03ad5319d
--- /dev/null
+++ b/packages/browser/src/session.ts
@@ -0,0 +1,329 @@
+import { readFile } from "node:fs/promises";
+import { fileURLToPath } from "node:url";
+import axe from "axe-core";
+import type { Browser, ConsoleMessage, Page, Request, Response } from "playwright-core";
+import { connectToBrowser, type BrowserConnection } from "./connect.js";
+import {
+  MAX_VIOLATION_TARGETS,
+  NAVIGATION_TIMEOUT_MS,
+  PERFORMANCE_OBSERVE_WINDOW_MS,
+  REACT_PROFILER_INJECT_FILE,
+  SETTLE_TIMEOUT_MS,
+} from "./constants.js";
+import { collectPerformanceReport } from "./perf-observer.js";
+import type {
+  AccessibilityViolation,
+  BrowserConnectOptions,
+  ConsoleMessageEntry,
+  NetworkRequestEntry,
+  PageInspection,
+  PerformanceReport,
+  Viewport,
+} from "./types.js";
+
+// Which signals to collect during a single capture load. Listeners and the perf
+// observers all attach before one navigation, so any combination costs one load.
+interface CaptureSignals {
+  console: boolean;
+  network: boolean;
+  performance: boolean;
+}
+
+interface CaptureResult {
+  console: ConsoleMessageEntry[];
+  network: NetworkRequestEntry[];
+  performance: PerformanceReport;
+}
+
+const emptyPerformanceReport = (): PerformanceReport => ({
+  longAnimationFrames: [],
+  largestContentfulPaintMs: null,
+  cumulativeLayoutShift: 0,
+});
+
+const resolveActivePage = async (browser: Browser): Promise<Page> => {
+  for (const context of browser.contexts()) {
+    const [firstPage] = context.pages();
+    if (firstPage) return firstPage;
+  }
+  const context = browser.contexts()[0] ?? (await browser.newContext());
+  return context.newPage();
+};
+
+// A live handle to the attached page. The page state lives in the browser, so a
+// session is cheap to create per command and there is no server to keep alive.
+export class BrowserSession {
+  private constructor(
+    private readonly connection: BrowserConnection,
+    readonly page: Page,
+  ) {}
+
+  static async attach(options: BrowserConnectOptions = {}): Promise<BrowserSession> {
+    const connection = await connectToBrowser(options);
+    const page = await resolveActivePage(connection.browser);
+    return new BrowserSession(connection, page);
+  }
+
+  get launched(): boolean {
+    return this.connection.launched;
+  }
+
+  async open(url: string): Promise<void> {
+    await this.navigate(url);
+  }
+
+  // Open `url` with the React DevTools profiler wired in. The init script has to
+  // run before the page's React loads (the only moment the hook can attach), so
+  // register it, drive this one load, then remove the registration. Leaving it
+  // registered would linger in the persistent Chrome we only attached to:
+  // stacking another copy on every `browser open` (each a separate script that
+  // re-installs the backend) and re-running on a later command's navigation. The
+  // page it just loaded keeps `window.__REACT_PERF__` for subsequent `eval`s.
+  async openWithReactProfiler(url: string): Promise<void> {
+    const injectUrl = new URL(REACT_PROFILER_INJECT_FILE, import.meta.url);
+    const source = await readFile(injectUrl, "utf8").catch(() => null);
+    if (source === null) {
+      throw new Error(
+        `React profiler init script missing at ${fileURLToPath(injectUrl)}; rebuild @react-doctor/browser.`,
+      );
+    }
+    const cdpSession = await this.page.context().newCDPSession(this.page);
+    await cdpSession.send("Page.enable");
+    const { identifier } = await cdpSession.send("Page.addScriptToEvaluateOnNewDocument", {
+      source,
+    });
+    try {
+      await this.navigate(url);
+    } finally {
+      await cdpSession
+        .send("Page.removeScriptToEvaluateOnNewDocument", { identifier })
+        .catch(() => {});
+      await cdpSession.detach().catch(() => {});
+    }
+  }
+
+  private async navigate(url?: string): Promise<void> {
+    const options = { timeout: NAVIGATION_TIMEOUT_MS, waitUntil: "domcontentloaded" } as const;
+    await (url ? this.page.goto(url, options) : this.page.reload(options));
+    await this.settle();
+  }
+
+  // A CDP device-metrics override, not page.setViewportSize, so it works on a
+  // page we only attached to and clears on disconnect — it never resizes the
+  // user's real window.
+  async setViewport(viewport: Viewport): Promise<void> {
+    const cdpSession = await this.page.context().newCDPSession(this.page);
+    await cdpSession.send("Emulation.setDeviceMetricsOverride", {
+      width: viewport.width,
+      height: viewport.height,
+      deviceScaleFactor: 1,
+      mobile: false,
+    });
+  }
+
+  // The expression runs here in Node with the Playwright `page` in scope (the
+  // whole driver API), not in the page — so an agent acts on what `snapshot`
+  // showed it using Playwright's own selectors.
+  async evaluate<T = unknown>(expression: string): Promise<T> {
+    const run = new Function("page", `"use strict"; return (async () => (${expression}))();`) as (
+      page: Page,
+    ) => Promise<T>;
+    return run(this.page);
+  }
+
+  // Wait for the page to stop changing before we read it: in-flight requests
+  // drain, then web fonts finish loading. Without this the design job
+  // screenshots a half-rendered frame (lazy images, fade-in, fallback fonts).
+  // Bounded and best-effort — a page that never goes idle hits the cap.
+  private async settle(): Promise<void> {
+    await this.page.waitForLoadState("networkidle", { timeout: SETTLE_TIMEOUT_MS }).catch(() => {});
+    await this.waitForFonts();
+  }
+
+  // `document.fonts.ready` can stall on a page that keeps registering fonts, so
+  // cap it — otherwise settle() (on the hot path of every command) could hang.
+  private waitForFonts(): Promise<void> {
+    return new Promise<void>((resolve) => {
+      const timer = setTimeout(resolve, SETTLE_TIMEOUT_MS);
+      void this.page
+        .evaluate(() => document.fonts?.ready.then(() => undefined))
+        .catch(() => undefined)
+        .finally(() => {
+          clearTimeout(timer);
+          resolve();
+        });
+    });
+  }
+
+  async snapshot(): Promise<string> {
+    return this.page.locator("body").ariaSnapshot();
+  }
+
+  // Settle first so a screenshot taken straight after an SPA navigation (or in a
+  // separate command that reattaches) still captures the finished page.
+  async screenshot(path?: string): Promise<Uint8Array> {
+    await this.settle();
+    return this.page.screenshot({ path });
+  }
+
+  async audit(url?: string): Promise<AccessibilityViolation[]> {
+    if (url) {
+      await this.navigate(url);
+    } else {
+      await this.settle();
+    }
+    return this.runAxe();
+  }
+
+  // axe is injected with `evaluate`, not a <script> tag, so a strict CSP can't
+  // block it.
+  private async runAxe(): Promise<AccessibilityViolation[]> {
+    await this.page.evaluate(axe.source);
+    return this.page.evaluate(async (maxTargets) => {
+      const runner: typeof axe = (globalThis as unknown as { axe: typeof axe }).axe;
+      const results = await runner.run(document, { resultTypes: ["violations"] });
+      return results.violations.map((violation) => ({
+        id: violation.id,
+        impact: violation.impact ?? null,
+        help: violation.help,
+        helpUrl: violation.helpUrl,
+        targets: violation.nodes.slice(0, maxTargets).map((node) => node.target.join(" ")),
+      }));
+    }, MAX_VIOLATION_TARGETS);
+  }
+
+  // Listeners go on before navigation so load-time messages are seen; returns a
+  // detach.
+  private collectConsole(entries: ConsoleMessageEntry[]): () => void {
+    const onConsole = (message: ConsoleMessage): void => {
+      const { url: sourceUrl, lineNumber } = message.location();
+      entries.push({
+        type: message.type(),
+        text: message.text(),
+        location: sourceUrl ? `${sourceUrl}:${lineNumber}` : null,
+      });
+    };
+    const onPageError = (error: Error): void => {
+      entries.push({ type: "error", text: error.message, location: null });
+    };
+    this.page.on("console", onConsole);
+    this.page.on("pageerror", onPageError);
+    return () => {
+      this.page.off("console", onConsole);
+      this.page.off("pageerror", onPageError);
+    };
+  }
+
+  private collectNetwork(entriesByRequest: Map<Request, NetworkRequestEntry>): () => void {
+    const onRequest = (request: Request): void => {
+      entriesByRequest.set(request, {
+        method: request.method(),
+        url: request.url(),
+        resourceType: request.resourceType(),
+        status: null,
+        failure: null,
+      });
+    };
+    const onResponse = (response: Response): void => {
+      const entry = entriesByRequest.get(response.request());
+      if (entry) entry.status = response.status();
+    };
+    const onRequestFailed = (request: Request): void => {
+      const entry = entriesByRequest.get(request);
+      if (entry) entry.failure = request.failure()?.errorText ?? "failed";
+    };
+    this.page.on("request", onRequest);
+    this.page.on("response", onResponse);
+    this.page.on("requestfailed", onRequestFailed);
+    return () => {
+      this.page.off("request", onRequest);
+      this.page.off("response", onResponse);
+      this.page.off("requestfailed", onRequestFailed);
+    };
+  }
+
+  // Arm every requested observer before a single navigation, drive that one
+  // load, then read everything back — so capturing N signals costs ONE load,
+  // not N. Listeners detach in `finally` so a navigation error can't leak them.
+  private async runCapture(
+    url: string | undefined,
+    signals: CaptureSignals,
+  ): Promise<CaptureResult> {
+    const consoleEntries: ConsoleMessageEntry[] = [];
+    const networkByRequest = new Map<Request, NetworkRequestEntry>();
+    const detachers: Array<() => void> = [];
+    if (signals.console) detachers.push(this.collectConsole(consoleEntries));
+    if (signals.network) detachers.push(this.collectNetwork(networkByRequest));
+    let performance = emptyPerformanceReport();
+    try {
+      await this.navigate(url);
+      // Measure perf inside the try so console/network listeners stay attached
+      // through its observation window (the collector waits internally), catching
+      // post-load errors and requests. `buffered: true` replays the load's frames.
+      if (signals.performance) performance = await this.measureCurrentPerformance();
+    } finally {
+      for (const detach of detachers) detach();
+    }
+    return { console: consoleEntries, network: [...networkByRequest.values()], performance };
+  }
+
+  // A per-page watermark inside collectPerformanceReport keeps a repeated
+  // no-reload measurement from re-counting frames an earlier command already
+  // reported on the same persistent page.
+  private measureCurrentPerformance(): Promise<PerformanceReport> {
+    return this.page.evaluate(collectPerformanceReport, PERFORMANCE_OBSERVE_WINDOW_MS);
+  }
+
+  async captureConsole(url?: string): Promise<ConsoleMessageEntry[]> {
+    const { console } = await this.runCapture(url, {
+      console: true,
+      network: false,
+      performance: false,
+    });
+    return console;
+  }
+
+  async captureNetwork(url?: string): Promise<NetworkRequestEntry[]> {
+    const { network } = await this.runCapture(url, {
+      console: false,
+      network: true,
+      performance: false,
+    });
+    return network;
+  }
+
+  // Without a `url`, measure the page as it is now with no reload — a reload
+  // would wipe a just-performed `eval` interaction and its jank.
+  async measurePerformance(url?: string): Promise<PerformanceReport> {
+    if (url) {
+      const { performance } = await this.runCapture(url, {
+        console: false,
+        network: false,
+        performance: true,
+      });
+      return performance;
+    }
+    return this.measureCurrentPerformance();
+  }
+
+  async inspectPage(url?: string): Promise<PageInspection> {
+    const capture = await this.runCapture(url, {
+      console: true,
+      network: true,
+      performance: true,
+    });
+    return {
+      console: capture.console,
+      network: capture.network,
+      performance: capture.performance,
+      accessibility: await this.runAxe(),
+    };
+  }
+
+  // Drop our CDP connection. This only disconnects — it never kills the browser,
+  // whether the user had it open or we launched it — so the page stays alive and
+  // the next `browser` command reattaches to the same live session.
+  async dispose(): Promise<void> {
+    await this.connection.browser.close().catch(() => {});
+  }
+}
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
new file mode 100644
index 000000000..865e5bb54
--- /dev/null
+++ b/packages/browser/src/types.ts
@@ -0,0 +1,62 @@
+export interface BrowserConnectOptions {
+  // Default http://127.0.0.1:9222. On attach failure (unless `launch` is false)
+  // a local endpoint launches our own persistent Chrome instead.
+  cdpEndpoint?: string;
+  launch?: boolean;
+}
+
+export interface Viewport {
+  width: number;
+  height: number;
+}
+
+export interface AccessibilityViolation {
+  id: string;
+  impact: string | null;
+  help: string;
+  helpUrl: string;
+  targets: string[];
+}
+
+export interface ConsoleMessageEntry {
+  type: string;
+  text: string;
+  location: string | null;
+}
+
+export interface NetworkRequestEntry {
+  method: string;
+  url: string;
+  resourceType: string;
+  status: number | null;
+  failure: string | null;
+}
+
+export interface PerformanceScriptAttribution {
+  sourceUrl: string;
+  sourceFunctionName: string;
+  invokerType: string;
+  durationMs: number;
+  forcedStyleAndLayoutMs: number;
+}
+
+// A frame that took >50ms.
+export interface LongAnimationFrame {
+  startTimeMs: number;
+  durationMs: number;
+  blockingDurationMs: number;
+  scripts: PerformanceScriptAttribution[];
+}
+
+export interface PerformanceReport {
+  longAnimationFrames: LongAnimationFrame[];
+  largestContentfulPaintMs: number | null;
+  cumulativeLayoutShift: number;
+}
+
+export interface PageInspection {
+  console: ConsoleMessageEntry[];
+  network: NetworkRequestEntry[];
+  performance: PerformanceReport;
+  accessibility: AccessibilityViolation[];
+}
diff --git a/packages/browser/src/utils/cdp-port.ts b/packages/browser/src/utils/cdp-port.ts
new file mode 100644
index 000000000..0e64addaf
--- /dev/null
+++ b/packages/browser/src/utils/cdp-port.ts
@@ -0,0 +1,11 @@
+import { DEFAULT_CDP_PORT } from "../constants.js";
+
+// Defaults to 9222 when the endpoint has no explicit port or can't be parsed.
+export const cdpPortFromEndpoint = (endpoint: string): string => {
+  const fallbackPort = String(DEFAULT_CDP_PORT);
+  try {
+    return new URL(endpoint).port || fallbackPort;
+  } catch {
+    return fallbackPort;
+  }
+};
diff --git a/packages/browser/src/utils/delay.ts b/packages/browser/src/utils/delay.ts
new file mode 100644
index 000000000..fce605b1e
--- /dev/null
+++ b/packages/browser/src/utils/delay.ts
@@ -0,0 +1,2 @@
+export const delay = (durationMs: number): Promise<void> =>
+  new Promise((resolve) => setTimeout(resolve, durationMs));
diff --git a/packages/browser/src/utils/is-loopback-endpoint.ts b/packages/browser/src/utils/is-loopback-endpoint.ts
new file mode 100644
index 000000000..dc3859c5e
--- /dev/null
+++ b/packages/browser/src/utils/is-loopback-endpoint.ts
@@ -0,0 +1,8 @@
+export const isLoopbackEndpoint = (endpoint: string): boolean => {
+  try {
+    const { hostname } = new URL(endpoint);
+    return hostname === "127.0.0.1" || hostname === "localhost" || hostname === "::1";
+  } catch {
+    return false;
+  }
+};
diff --git a/packages/browser/tests/cdp-port.test.ts b/packages/browser/tests/cdp-port.test.ts
new file mode 100644
index 000000000..8a0ef9df1
--- /dev/null
+++ b/packages/browser/tests/cdp-port.test.ts
@@ -0,0 +1,14 @@
+import { expect, test } from "vite-plus/test";
+import { cdpPortFromEndpoint } from "../src/utils/cdp-port.js";
+
+test("cdpPortFromEndpoint returns the explicit port", () => {
+  expect(cdpPortFromEndpoint("http://127.0.0.1:9333")).toBe("9333");
+});
+
+test("cdpPortFromEndpoint falls back to the default when no port is present", () => {
+  expect(cdpPortFromEndpoint("http://localhost")).toBe("9222");
+});
+
+test("cdpPortFromEndpoint falls back to the default for an unparseable endpoint", () => {
+  expect(cdpPortFromEndpoint("not a url")).toBe("9222");
+});
diff --git a/packages/browser/tests/connect.test.ts b/packages/browser/tests/connect.test.ts
new file mode 100644
index 000000000..783c97f22
--- /dev/null
+++ b/packages/browser/tests/connect.test.ts
@@ -0,0 +1,11 @@
+import { expect, test } from "vite-plus/test";
+import { connectToBrowser } from "../src/connect.js";
+
+// An unreachable CDP endpoint refuses fast, so we don't wait the full attach
+// timeout. With launching disabled, the attach failure should surface as the
+// actionable "start Chrome with --remote-debugging-port" error.
+test("connectToBrowser throws an actionable error when attach fails and launch is disabled", async () => {
+  await expect(
+    connectToBrowser({ cdpEndpoint: "http://127.0.0.1:1", launch: false }),
+  ).rejects.toThrow(/--remote-debugging-port=1/);
+});
diff --git a/packages/browser/tests/react-profiler/collect-profiling-export.test.ts b/packages/browser/tests/react-profiler/collect-profiling-export.test.ts
new file mode 100644
index 000000000..2233abe7c
--- /dev/null
+++ b/packages/browser/tests/react-profiler/collect-profiling-export.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, it } from "vite-plus/test";
+import { collectProfilingExport } from "../../src/react-profiler/devtools/collect-profiling-export.js";
+import type { ReactProfilerCommitDataExport } from "../../src/react-profiler/types/profiling-export.js";
+import type {
+  DevtoolsGlobal,
+  ReactRendererInterface,
+} from "../../src/react-profiler/types/react-devtools.js";
+
+const commit = (
+  overrides: Partial<ReactProfilerCommitDataExport> = {},
+): ReactProfilerCommitDataExport => ({
+  changeDescriptions: [
+    [2, { context: null, didHooksChange: true, isFirstMount: false, props: [], state: ["count"] }],
+  ],
+  duration: 1.2,
+  effectDuration: null,
+  fiberActualDurations: [[2, 1.2]],
+  fiberSelfDurations: [[2, 0.8]],
+  passiveEffectDuration: null,
+  priorityLevel: "Normal",
+  timestamp: 100,
+  updaters: null,
+  ...overrides,
+});
+
+const makeRenderer = (options: {
+  commits: Array<ReactProfilerCommitDataExport>;
+  names?: Record<number, string>;
+}): ReactRendererInterface => ({
+  startProfiling: () => {},
+  stopProfiling: () => {},
+  getProfilingData: () => ({
+    rendererID: 1,
+    dataForRoots: [
+      {
+        rootID: 1,
+        displayName: "App",
+        commitData: options.commits,
+        initialTreeBaseDurations: [[2, 1.2]],
+      },
+    ],
+  }),
+  getDisplayNameForElementID: options.names ? (id) => options.names?.[id] ?? null : undefined,
+});
+
+const targetWith = (renderer: ReactRendererInterface | null): DevtoolsGlobal => {
+  const hook = renderer
+    ? { rendererInterfaces: new Map([[1, renderer]]) }
+    : { rendererInterfaces: new Map() };
+  return { __REACT_DEVTOOLS_GLOBAL_HOOK__: hook } as unknown as DevtoolsGlobal;
+};
+
+describe("collectProfilingExport", () => {
+  it("returns the renderer's commit data with resolved element names", () => {
+    const target = targetWith(makeRenderer({ commits: [commit()], names: { 2: "Counter" } }));
+    const result = collectProfilingExport(target);
+    expect(result?.version).toBe(5);
+    expect(result?.dataForRoots).toHaveLength(1);
+    const root = result?.dataForRoots[0];
+    expect(root?.displayName).toBe("App");
+    expect(root?.commitData[0]?.fiberSelfDurations).toEqual([[2, 0.8]]);
+    expect(root?.elementNames).toEqual([[2, "Counter"]]);
+  });
+
+  it("returns null when no commits were recorded (e.g. a production build)", () => {
+    const target = targetWith(makeRenderer({ commits: [] }));
+    expect(collectProfilingExport(target)).toBeNull();
+  });
+
+  it("returns null when no renderer is attached", () => {
+    expect(collectProfilingExport(targetWith(null))).toBeNull();
+  });
+
+  it("omits names when the renderer cannot resolve them", () => {
+    const target = targetWith(makeRenderer({ commits: [commit()] }));
+    expect(collectProfilingExport(target)?.dataForRoots[0]?.elementNames).toEqual([]);
+  });
+});
diff --git a/packages/browser/tsconfig.json b/packages/browser/tsconfig.json
new file mode 100644
index 000000000..e35ad0c69
--- /dev/null
+++ b/packages/browser/tsconfig.json
@@ -0,0 +1,12 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "lib": ["ESNext", "DOM", "DOM.Iterable"],
+    "types": ["node"],
+    "paths": {
+      "react-devtools-inline/backend": ["./src/react-profiler/types/devtools-inline-backend.d.ts"]
+    }
+  },
+  "include": ["src", "tests"]
+}
diff --git a/packages/browser/vite.config.ts b/packages/browser/vite.config.ts
new file mode 100644
index 000000000..4581a9ea6
--- /dev/null
+++ b/packages/browser/vite.config.ts
@@ -0,0 +1,51 @@
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+import { buildSync } from "esbuild";
+import { defineConfig } from "vite-plus";
+
+const packageRoot = path.dirname(fileURLToPath(import.meta.url));
+
+// Bundle the React-profiler init script into a single self-contained IIFE the
+// session injects via `addInitScript`. It must be standalone (no module system)
+// because it runs in the page before any app code, and is minified because it
+// inlines the React DevTools backend (~1.5MB). Built after pack so the node
+// dist never imports this browser-only code — the session loads it by path.
+const buildReactProfilerInject = (): void => {
+  buildSync({
+    entryPoints: [path.join(packageRoot, "src/react-profiler/inject.ts")],
+    outfile: path.join(packageRoot, "dist/inject/react-profiler.global.js"),
+    bundle: true,
+    format: "iife",
+    platform: "browser",
+    target: "es2022",
+    minify: true,
+    define: { "process.env.NODE_ENV": '"production"' },
+  });
+};
+
+export default defineConfig({
+  pack: [
+    {
+      entry: {
+        index: "./src/index.ts",
+      },
+      deps: {
+        // playwright-core is large and resolves its own browser channel at
+        // runtime; keep it external so the dist stays a thin wrapper.
+        neverBundle: ["playwright-core"],
+      },
+      dts: true,
+      target: "es2022",
+      platform: "node",
+      fixedExtension: false,
+      hooks: {
+        "build:done": () => {
+          buildReactProfilerInject();
+        },
+      },
+    },
+  ],
+  test: {
+    testTimeout: 10_000,
+  },
+});
diff --git a/packages/debug/package.json b/packages/debug/package.json
new file mode 100644
index 000000000..9d1f94eaf
--- /dev/null
+++ b/packages/debug/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "@react-doctor/debug",
+  "version": "0.0.1",
+  "private": true,
+  "description": "Internal: React Doctor's logging server for the debug job. A local HTTP server that collects runtime logs as NDJSON, so the agent can post them from instrumented code, read them back, and clear them between reproductions.",
+  "license": "MIT",
+  "type": "module",
+  "sideEffects": false,
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && cross-env NODE_ENV=production vp pack",
+    "typecheck": "tsc --noEmit",
+    "test": "vp test run"
+  },
+  "devDependencies": {
+    "@types/node": "^25.6.0"
+  },
+  "engines": {
+    "node": "^20.19.0 || >=22.13.0"
+  }
+}
diff --git a/packages/debug/src/constants.ts b/packages/debug/src/constants.ts
new file mode 100644
index 000000000..94ff9b3f4
--- /dev/null
+++ b/packages/debug/src/constants.ts
@@ -0,0 +1,23 @@
+// Random bytes used to mint a session id; 3 bytes renders as a 6-char hex string.
+export const SESSION_ID_BYTE_LENGTH = 3;
+
+// How long to wait for an existing server to answer a health ping before we
+// treat its lock file as stale and take the port for ourselves.
+export const LOCK_PING_TIMEOUT_MS = 1000;
+
+export const LOG_DIRECTORY_NAME = "react-doctor-debug";
+
+// Hex chars of the project-path hash used to give each project its own log +
+// lock subdirectory. 16 keeps collisions negligible without a long path.
+export const PROJECT_KEY_LENGTH = 16;
+
+// Cap on remembered entry ids for dedup. When reached, the set clears, so the
+// memory stays bounded over a long session at the cost of allowing a stale
+// duplicate after a wraparound.
+export const MAX_DEDUP_ENTRIES = 10_000;
+
+// Reject a POST body larger than this so a runaway/slow client can't exhaust
+// memory (a single NDJSON log line is tiny).
+export const MAX_REQUEST_BODY_BYTES = 1_000_000;
+
+export const DEFAULT_HOST = "127.0.0.1";
diff --git a/packages/debug/src/index.ts b/packages/debug/src/index.ts
new file mode 100644
index 000000000..22e719880
--- /dev/null
+++ b/packages/debug/src/index.ts
@@ -0,0 +1,3 @@
+export { createLogServer } from "./server.js";
+export { DEFAULT_HOST } from "./constants.js";
+export type { LogServerOptions, LogServerInfo, LogServerResult } from "./types.js";
diff --git a/packages/debug/src/server.ts b/packages/debug/src/server.ts
new file mode 100644
index 000000000..2060cb495
--- /dev/null
+++ b/packages/debug/src/server.ts
@@ -0,0 +1,221 @@
+import crypto from "node:crypto";
+import fs from "node:fs";
+import http from "node:http";
+import path from "node:path";
+import {
+  DEFAULT_HOST,
+  MAX_DEDUP_ENTRIES,
+  MAX_REQUEST_BODY_BYTES,
+  SESSION_ID_BYTE_LENGTH,
+} from "./constants.js";
+import type { LogEntry, LogServerInfo, LogServerOptions, LogServerResult } from "./types.js";
+import { pingServer } from "./utils/ping-server.js";
+import { resolveLogDirectory } from "./utils/resolve-log-directory.js";
+import { readServerLock, removeServerLock, writeServerLock } from "./utils/server-lock.js";
+
+// Per-session view of the log file plus the ids already written, so a retried
+// POST (same `id`) is acknowledged without appending a duplicate line.
+interface SessionState {
+  logPath: string;
+  processedEntryIds: Set<string>;
+}
+
+const parseIngestSessionId = (requestUrl: string): string | null => {
+  try {
+    const { pathname } = new URL(requestUrl, "http://localhost");
+    const match = pathname.match(/^\/ingest\/([a-zA-Z0-9_-]+)\/?$/);
+    return match ? match[1] : null;
+  } catch {
+    return null;
+  }
+};
+
+const sendJson = (response: http.ServerResponse, statusCode: number, payload: unknown): void => {
+  response.writeHead(statusCode, { "Content-Type": "application/json" });
+  response.end(JSON.stringify(payload));
+};
+
+// One server can host many sessions; each owns one log file and posts to
+// `/ingest/<sessionId>`. Starting twice is safe: a live server holding the lock
+// is returned with `reused: true` and no second port is bound.
+export const createLogServer = async (options: LogServerOptions = {}): Promise<LogServerResult> => {
+  const sessionId = options.sessionId || crypto.randomBytes(SESSION_ID_BYTE_LENGTH).toString("hex");
+  const logDirectory = resolveLogDirectory(options.cwd);
+  const logFilePathFor = (id: string): string => path.join(logDirectory, `debug-${id}.log`);
+  const primaryLogPath = options.logPath || logFilePathFor(sessionId);
+  const host = options.host || DEFAULT_HOST;
+  const requestedPort = options.port || 0;
+
+  if (!fs.existsSync(logDirectory)) fs.mkdirSync(logDirectory, { recursive: true });
+
+  const existingLock = readServerLock(logDirectory);
+  if (existingLock) {
+    if (await pingServer(existingLock.host, existingLock.port)) {
+      // The running server hosts any `/ingest/<id>` on demand and writes each to
+      // `logFilePathFor(id)` in this (shared, per-project) directory. So when the
+      // caller asked for a different session id, return info pointing at THAT
+      // session instead of the lock's original — otherwise they'd instrument and
+      // read the wrong endpoint/file. An explicit `--log-path` can't be honored on
+      // reuse: the already-running server owns where it writes, so `logPath`
+      // reflects the server's real location, not the requested one.
+      const usesRequestedSession =
+        Boolean(options.sessionId) && sessionId !== existingLock.sessionId;
+      const info: LogServerInfo = usesRequestedSession
+        ? {
+            sessionId,
+            port: existingLock.port,
+            endpoint: `http://${existingLock.host}:${existingLock.port}/ingest/${sessionId}`,
+            logPath: logFilePathFor(sessionId),
+          }
+        : {
+            sessionId: existingLock.sessionId,
+            port: existingLock.port,
+            endpoint: existingLock.endpoint,
+            logPath: existingLock.logPath,
+          };
+      return { server: null, info, reused: true };
+    }
+    removeServerLock(logDirectory);
+  }
+
+  const sessions = new Map<string, SessionState>();
+  const getSessionState = (requestSessionId: string): SessionState => {
+    const existing = sessions.get(requestSessionId);
+    if (existing) return existing;
+    const logPath =
+      requestSessionId === sessionId ? primaryLogPath : logFilePathFor(requestSessionId);
+    const sessionState: SessionState = { logPath, processedEntryIds: new Set() };
+    sessions.set(requestSessionId, sessionState);
+    return sessionState;
+  };
+
+  const appendLog = (
+    sessionState: SessionState,
+    requestSessionId: string,
+    requestBody: string,
+  ): boolean => {
+    const logEntry: LogEntry = JSON.parse(requestBody);
+    if (typeof logEntry !== "object" || logEntry === null)
+      throw new Error("Body must be an object");
+    if (logEntry.id && sessionState.processedEntryIds.has(logEntry.id)) return true;
+
+    logEntry.sessionId = logEntry.sessionId || requestSessionId;
+    logEntry.timestamp = logEntry.timestamp || Date.now();
+    fs.appendFileSync(sessionState.logPath, `${JSON.stringify(logEntry)}\n`);
+
+    if (logEntry.id) {
+      if (sessionState.processedEntryIds.size >= MAX_DEDUP_ENTRIES) {
+        sessionState.processedEntryIds.clear();
+      }
+      sessionState.processedEntryIds.add(logEntry.id);
+    }
+    return false;
+  };
+
+  const server = http.createServer((request, response) => {
+    response.setHeader("Access-Control-Allow-Origin", "*");
+    response.setHeader("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS");
+    response.setHeader("Access-Control-Allow-Headers", "Content-Type");
+
+    if (request.method === "OPTIONS") {
+      response.writeHead(204).end();
+      return;
+    }
+
+    const requestUrl = request.url || "/";
+
+    // Liveness marker `pingServer` checks before reusing this server.
+    if (requestUrl === "/" && request.method === "GET") {
+      sendJson(response, 200, { ok: true });
+      return;
+    }
+
+    const requestSessionId = parseIngestSessionId(requestUrl);
+    if (!requestSessionId) {
+      sendJson(response, 404, { error: "Not found" });
+      return;
+    }
+
+    const sessionState = getSessionState(requestSessionId);
+
+    if (request.method === "POST") {
+      const chunks: Buffer[] = [];
+      let byteLength = 0;
+      request.on("data", (chunk: Buffer) => {
+        byteLength += chunk.length;
+        if (byteLength > MAX_REQUEST_BODY_BYTES) {
+          sendJson(response, 413, { error: "Body too large" });
+          request.destroy();
+          return;
+        }
+        chunks.push(chunk);
+      });
+      request.on("end", () => {
+        if (response.writableEnded) return;
+        try {
+          const wasDuplicate = appendLog(
+            sessionState,
+            requestSessionId,
+            Buffer.concat(chunks).toString("utf-8"),
+          );
+          sendJson(response, 200, wasDuplicate ? { ok: true, duplicate: true } : { ok: true });
+        } catch {
+          sendJson(response, 400, { error: "Invalid JSON" });
+        }
+      });
+      return;
+    }
+
+    if (request.method === "DELETE") {
+      try {
+        if (fs.existsSync(sessionState.logPath)) fs.unlinkSync(sessionState.logPath);
+        sessionState.processedEntryIds.clear();
+        sendJson(response, 200, { ok: true, cleared: true });
+      } catch {
+        // Generic, not the raw error: don't echo filesystem detail back to a
+        // client (it's the local log file's path/permissions).
+        sendJson(response, 500, { error: "Failed to clear log" });
+      }
+      return;
+    }
+
+    if (request.method === "GET") {
+      try {
+        const logContent = fs.existsSync(sessionState.logPath)
+          ? fs.readFileSync(sessionState.logPath, "utf-8")
+          : "";
+        response.writeHead(200, { "Content-Type": "application/x-ndjson" });
+        response.end(logContent);
+      } catch {
+        response.writeHead(500, { "Content-Type": "text/plain" });
+        response.end("Failed to read log");
+      }
+      return;
+    }
+
+    response.writeHead(405).end();
+  });
+
+  return new Promise<LogServerResult>((resolve, reject) => {
+    server.listen(requestedPort, host, () => {
+      const serverAddress = server.address();
+      if (!serverAddress || typeof serverAddress === "string") {
+        reject(new Error("Failed to read the bound server address"));
+        return;
+      }
+
+      const info: LogServerInfo = {
+        sessionId,
+        port: serverAddress.port,
+        endpoint: `http://${host}:${serverAddress.port}/ingest/${sessionId}`,
+        logPath: primaryLogPath,
+      };
+
+      writeServerLock(logDirectory, { ...info, pid: process.pid, host });
+      server.on("close", () => removeServerLock(logDirectory));
+
+      resolve({ server, info, reused: false });
+    });
+    server.on("error", reject);
+  });
+};
diff --git a/packages/debug/src/types.ts b/packages/debug/src/types.ts
new file mode 100644
index 000000000..2bcf44d8c
--- /dev/null
+++ b/packages/debug/src/types.ts
@@ -0,0 +1,38 @@
+import type { Server } from "node:http";
+
+export interface LogServerOptions {
+  sessionId?: string;
+  // Project directory used to scope the reuse lock + default log location to a
+  // single codebase, so unrelated projects don't share one server.
+  cwd?: string;
+  logPath?: string;
+  host?: string;
+  port?: number;
+}
+
+export interface LogServerInfo {
+  sessionId: string;
+  port: number;
+  endpoint: string;
+  logPath: string;
+}
+
+export interface LogServerResult {
+  // null when an already-running server was reused, so callers know there is
+  // nothing of their own to close.
+  server: Server | null;
+  info: LogServerInfo;
+  reused: boolean;
+}
+
+export interface ServerLock extends LogServerInfo {
+  pid: number;
+  host: string;
+}
+
+export interface LogEntry {
+  id?: string;
+  sessionId?: string;
+  timestamp?: number;
+  [field: string]: unknown;
+}
diff --git a/packages/debug/src/utils/ping-server.ts b/packages/debug/src/utils/ping-server.ts
new file mode 100644
index 000000000..bc532e68d
--- /dev/null
+++ b/packages/debug/src/utils/ping-server.ts
@@ -0,0 +1,33 @@
+import http from "node:http";
+import { LOCK_PING_TIMEOUT_MS } from "../constants.js";
+
+// Resolves true only when OUR server answers (200 + the `{ ok: true }` marker
+// the `/` route returns), so a stale lock left by a crashed server — or an
+// unrelated process that took the recycled port — reads as dead.
+export const pingServer = (host: string, port: number): Promise<boolean> =>
+  new Promise((resolve) => {
+    const request = http.get(
+      { hostname: host, port, path: "/", timeout: LOCK_PING_TIMEOUT_MS },
+      (response) => {
+        if (response.statusCode !== 200) {
+          response.resume();
+          resolve(false);
+          return;
+        }
+        let body = "";
+        response.on("data", (chunk: Buffer) => (body += chunk));
+        response.on("end", () => {
+          try {
+            resolve(JSON.parse(body)?.ok === true);
+          } catch {
+            resolve(false);
+          }
+        });
+      },
+    );
+    request.on("error", () => resolve(false));
+    request.on("timeout", () => {
+      request.destroy();
+      resolve(false);
+    });
+  });
diff --git a/packages/debug/src/utils/resolve-log-directory.ts b/packages/debug/src/utils/resolve-log-directory.ts
new file mode 100644
index 000000000..9c959606f
--- /dev/null
+++ b/packages/debug/src/utils/resolve-log-directory.ts
@@ -0,0 +1,20 @@
+import crypto from "node:crypto";
+import os from "node:os";
+import path from "node:path";
+import { LOG_DIRECTORY_NAME, PROJECT_KEY_LENGTH } from "../constants.js";
+
+// Server logs + the reuse lock live in the OS temp dir, never the repo. Each
+// project gets its own subdirectory (keyed by a hash of the resolved project
+// path, so no absolute path leaks into the tree) so two projects' `debug serve`
+// runs don't share a lock and hand each other's endpoint/session back. Without a
+// project directory it falls back to the shared base, preserving the old default.
+export const resolveLogDirectory = (projectDirectory?: string): string => {
+  const baseDirectory = path.join(os.tmpdir(), LOG_DIRECTORY_NAME);
+  if (!projectDirectory) return baseDirectory;
+  const projectKey = crypto
+    .createHash("sha256")
+    .update(path.resolve(projectDirectory))
+    .digest("hex")
+    .slice(0, PROJECT_KEY_LENGTH);
+  return path.join(baseDirectory, projectKey);
+};
diff --git a/packages/debug/src/utils/server-lock.ts b/packages/debug/src/utils/server-lock.ts
new file mode 100644
index 000000000..ce6466df4
--- /dev/null
+++ b/packages/debug/src/utils/server-lock.ts
@@ -0,0 +1,38 @@
+import fs from "node:fs";
+import path from "node:path";
+import type { ServerLock } from "../types.js";
+
+const LOCK_FILENAME = "debug-server.lock";
+
+const getLockPath = (directory: string): string => path.join(directory, LOCK_FILENAME);
+
+const isServerLock = (value: unknown): value is ServerLock =>
+  typeof value === "object" &&
+  value !== null &&
+  "host" in value &&
+  typeof value.host === "string" &&
+  "port" in value &&
+  typeof value.port === "number";
+
+export const readServerLock = (directory: string): ServerLock | null => {
+  try {
+    const parsed: unknown = JSON.parse(fs.readFileSync(getLockPath(directory), "utf-8"));
+    return isServerLock(parsed) ? parsed : null;
+  } catch {
+    return null;
+  }
+};
+
+// Write to a temp file then rename so a concurrent reader never sees a torn lock.
+export const writeServerLock = (directory: string, lock: ServerLock): void => {
+  const lockPath = getLockPath(directory);
+  const temporaryPath = `${lockPath}.${process.pid}.tmp`;
+  fs.writeFileSync(temporaryPath, JSON.stringify(lock, null, 2));
+  fs.renameSync(temporaryPath, lockPath);
+};
+
+export const removeServerLock = (directory: string): void => {
+  try {
+    fs.unlinkSync(getLockPath(directory));
+  } catch {}
+};
diff --git a/packages/debug/tests/server.test.ts b/packages/debug/tests/server.test.ts
new file mode 100644
index 000000000..2aee83ca7
--- /dev/null
+++ b/packages/debug/tests/server.test.ts
@@ -0,0 +1,71 @@
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, beforeEach, expect, test } from "vite-plus/test";
+import { createLogServer } from "../src/server.js";
+import type { LogServerResult } from "../src/types.js";
+
+let workingDirectory: string;
+let started: LogServerResult | null = null;
+
+beforeEach(() => {
+  workingDirectory = fs.mkdtempSync(path.join(os.tmpdir(), "rd-debug-test-"));
+});
+
+afterEach(() => {
+  started?.server?.close();
+  started = null;
+  fs.rmSync(workingDirectory, { recursive: true, force: true });
+});
+
+const postLog = (endpoint: string, body: unknown): Promise<Response> =>
+  fetch(endpoint, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify(body),
+  });
+
+test("appends posted logs, reads them back, dedups by id, and clears on DELETE", async () => {
+  started = await createLogServer({ cwd: workingDirectory });
+  const { endpoint, logPath } = started.info;
+
+  await postLog(endpoint, { id: "a", hypothesisId: "A", message: "first" });
+  await postLog(endpoint, { id: "a", hypothesisId: "A", message: "duplicate ignored" });
+  await postLog(endpoint, { id: "b", hypothesisId: "B", message: "second" });
+
+  const lines = fs.readFileSync(logPath, "utf-8").trim().split("\n");
+  expect(lines).toHaveLength(2);
+  expect(JSON.parse(lines[0]).timestamp).toBeTypeOf("number");
+
+  const read = await (await fetch(endpoint)).text();
+  expect(read.trim().split("\n")).toHaveLength(2);
+
+  await fetch(endpoint, { method: "DELETE" });
+  expect(fs.existsSync(logPath)).toBe(false);
+});
+
+test("a second start reuses the running server instead of binding a new port", async () => {
+  started = await createLogServer({ cwd: workingDirectory });
+  const second = await createLogServer({ cwd: workingDirectory });
+
+  expect(second.reused).toBe(true);
+  expect(second.server).toBeNull();
+  expect(second.info.port).toBe(started.info.port);
+});
+
+test("reusing with a requested session id returns that session's endpoint and log path", async () => {
+  started = await createLogServer({ cwd: workingDirectory });
+  const reused = await createLogServer({ cwd: workingDirectory, sessionId: "custom-session" });
+
+  expect(reused.reused).toBe(true);
+  expect(reused.info.sessionId).toBe("custom-session");
+  expect(reused.info.port).toBe(started.info.port);
+  expect(reused.info.endpoint).toBe(`http://127.0.0.1:${started.info.port}/ingest/custom-session`);
+  expect(reused.info.logPath).not.toBe(started.info.logPath);
+  expect(reused.info.logPath).toContain("custom-session");
+
+  // The running server accepts the on-demand session and writes it where the
+  // reused info said it would.
+  await postLog(reused.info.endpoint, { id: "x", message: "on the custom session" });
+  expect(fs.existsSync(reused.info.logPath)).toBe(true);
+});
diff --git a/packages/debug/tsconfig.json b/packages/debug/tsconfig.json
new file mode 100644
index 000000000..c94f93c9f
--- /dev/null
+++ b/packages/debug/tsconfig.json
@@ -0,0 +1,9 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "lib": ["ESNext"],
+    "types": ["node"]
+  },
+  "include": ["src", "tests"]
+}
diff --git a/packages/debug/vite.config.ts b/packages/debug/vite.config.ts
new file mode 100644
index 000000000..3eaf4fb15
--- /dev/null
+++ b/packages/debug/vite.config.ts
@@ -0,0 +1,18 @@
+import { defineConfig } from "vite-plus";
+
+export default defineConfig({
+  pack: [
+    {
+      entry: {
+        index: "./src/index.ts",
+      },
+      dts: true,
+      target: "es2022",
+      platform: "node",
+      fixedExtension: false,
+    },
+  ],
+  test: {
+    testTimeout: 10_000,
+  },
+});
diff --git a/packages/react-doctor/package.json b/packages/react-doctor/package.json
index 9dad36cb4..c90ffa711 100644
--- a/packages/react-doctor/package.json
+++ b/packages/react-doctor/package.json
@@ -74,7 +74,9 @@
   },
   "devDependencies": {
     "@react-doctor/api": "workspace:*",
+    "@react-doctor/browser": "workspace:*",
     "@react-doctor/core": "workspace:*",
+    "@react-doctor/debug": "workspace:*",
     "@react-doctor/language-server": "workspace:*",
     "@types/babel__code-frame": "^7.27.0",
     "@types/prompts": "^2.4.9",
@@ -82,6 +84,9 @@
     "commander": "^14.0.3",
     "ora": "^9.4.0"
   },
+  "optionalDependencies": {
+    "playwright-core": "^1.49.1"
+  },
   "engines": {
     "node": "^20.19.0 || >=22.13.0"
   }
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
new file mode 100644
index 000000000..b01ca379f
--- /dev/null
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -0,0 +1,238 @@
+import type {
+  AccessibilityViolation,
+  BrowserSession,
+  ConsoleMessageEntry,
+  NetworkRequestEntry,
+  PerformanceReport,
+} from "@react-doctor/browser";
+import { DEFAULT_SCREENSHOT_FILENAME, METRIC } from "../utils/constants.js";
+import { cliLogger as logger } from "../utils/cli-logger.js";
+import { recordCount } from "../utils/record-metric.js";
+
+export interface BrowserCommandOptions {
+  cdp?: string;
+  launch?: boolean;
+  out?: string;
+  viewport?: { width: number; height: number };
+}
+
+const isModuleNotFoundError = (error: unknown): boolean =>
+  error instanceof Error &&
+  "code" in error &&
+  (error.code === "ERR_MODULE_NOT_FOUND" || error.code === "MODULE_NOT_FOUND");
+
+// playwright-core is heavy and only the browser jobs need it, so it's an
+// optional dependency loaded on demand. A missing install becomes an actionable
+// hint; any other failure (a real bug in the browser package) rethrows as-is.
+const loadBrowser = async (): Promise<typeof import("@react-doctor/browser")> => {
+  try {
+    return await import("@react-doctor/browser");
+  } catch (error: unknown) {
+    if (!isModuleNotFoundError(error)) throw error;
+    throw new Error(
+      "The browser tools need playwright-core, which isn't installed. Install it with `npm i -D playwright-core`, then retry.",
+    );
+  }
+};
+
+const withSession = async (
+  options: BrowserCommandOptions,
+  useSession: (session: BrowserSession) => Promise<void>,
+): Promise<void> => {
+  const { BrowserSession: Session } = await loadBrowser();
+  const session = await Session.attach({ cdpEndpoint: options.cdp, launch: options.launch });
+  try {
+    if (options.viewport) await session.setViewport(options.viewport);
+    await useSession(session);
+  } finally {
+    await session.dispose();
+  }
+};
+
+export const browserOpenAction = async (
+  url: string,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.open" });
+  await withSession(options, async (session) => {
+    // Wires the DevTools profiler before the page's React loads, drives this one
+    // load, then removes the init-script registration — so it doesn't stack on
+    // repeated opens or re-run on a later `perf`/`report` navigation in the same
+    // persistent Chrome. The page persists, so later `eval`s reach
+    // `window.__REACT_PERF__`.
+    await session.openWithReactProfiler(url);
+    logger.success(`Opened ${url}`);
+    logger.log(
+      "React profiler ready: `browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'`, run a scenario, then `stop()` for the DevTools profiling export.",
+    );
+    if (session.launched) {
+      logger.log(
+        "Launched a dedicated Chrome (separate from your main profile); later browser commands reuse it. Quit that window when you're done.",
+      );
+    }
+  });
+};
+
+export const browserEvalAction = async (
+  expression: string,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.eval" });
+  await withSession(options, async (session) => {
+    const result = await session.evaluate(expression);
+    if (result === undefined) return;
+    logger.log(typeof result === "string" ? result : JSON.stringify(result, null, 2));
+  });
+};
+
+export const browserSnapshotAction = async (options: BrowserCommandOptions): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.snapshot" });
+  await withSession(options, async (session) => {
+    logger.log(await session.snapshot());
+  });
+};
+
+export const browserScreenshotAction = async (options: BrowserCommandOptions): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.screenshot" });
+  const outputPath = options.out ?? DEFAULT_SCREENSHOT_FILENAME;
+  await withSession(options, async (session) => {
+    await session.screenshot(outputPath);
+    logger.success(`Saved ${outputPath}`);
+  });
+};
+
+// Section printers, shared by the focused commands and the combined `report` so
+// the line format lives in one place. Each prints the section body only; the
+// callers decide on headers and empty-state messaging.
+const printAuditViolations = (violations: AccessibilityViolation[]): void => {
+  for (const violation of violations) {
+    const impact = violation.impact ? `[${violation.impact}] ` : "";
+    logger.log(`${impact}${violation.id} — ${violation.help}`);
+    logger.log(`  ${violation.helpUrl}`);
+    for (const target of violation.targets) logger.log(`  ${target}`);
+  }
+};
+
+const printConsoleMessages = (messages: ConsoleMessageEntry[]): void => {
+  for (const message of messages) {
+    const location = message.location ? ` (${message.location})` : "";
+    logger.log(`[${message.type}] ${message.text}${location}`);
+  }
+};
+
+const printNetworkRequests = (requests: NetworkRequestEntry[]): void => {
+  const failures = requests.filter(
+    (request) => request.failure !== null || (request.status !== null && request.status >= 400),
+  );
+  for (const request of requests) {
+    const outcome = request.failure ?? (request.status === null ? "pending" : request.status);
+    logger.log(`${outcome} ${request.method} ${request.url}`);
+  }
+  logger.log(`${requests.length} request(s), ${failures.length} failed`);
+};
+
+const printPerformanceReport = (report: PerformanceReport): void => {
+  const lcp = report.largestContentfulPaintMs;
+  logger.log(`LCP: ${lcp === null ? "n/a" : `${lcp}ms`}   CLS: ${report.cumulativeLayoutShift}`);
+  if (report.longAnimationFrames.length === 0) {
+    logger.log("No long animation frames (>50ms) — no main-thread jank captured");
+    return;
+  }
+  logger.log(`${report.longAnimationFrames.length} long animation frame(s), worst first:`);
+  for (const frame of report.longAnimationFrames) {
+    logger.log(
+      `${frame.durationMs}ms frame (blocking ${frame.blockingDurationMs}ms) @ ${frame.startTimeMs}ms`,
+    );
+    for (const script of frame.scripts) {
+      const functionName = script.sourceFunctionName || "(anonymous)";
+      const reflow =
+        script.forcedStyleAndLayoutMs > 0 ? `, ${script.forcedStyleAndLayoutMs}ms sync layout` : "";
+      logger.log(
+        `  ${script.durationMs}ms ${functionName} — ${script.sourceUrl || "(inline)"}${reflow}`,
+      );
+    }
+  }
+};
+
+export const browserAuditAction = async (
+  url: string | undefined,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.audit" });
+  await withSession(options, async (session) => {
+    const violations = await session.audit(url);
+    if (violations.length === 0) {
+      logger.success("No accessibility violations found");
+      return;
+    }
+    logger.log(`${violations.length} accessibility violation(s):\n`);
+    printAuditViolations(violations);
+  });
+};
+
+export const browserConsoleAction = async (
+  url: string | undefined,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.console" });
+  await withSession(options, async (session) => {
+    const messages = await session.captureConsole(url);
+    if (messages.length === 0) {
+      logger.success("No console output captured");
+      return;
+    }
+    printConsoleMessages(messages);
+  });
+};
+
+export const browserNetworkAction = async (
+  url: string | undefined,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.network" });
+  await withSession(options, async (session) => {
+    const requests = await session.captureNetwork(url);
+    if (requests.length === 0) {
+      logger.success("No network requests captured");
+      return;
+    }
+    printNetworkRequests(requests);
+  });
+};
+
+export const browserPerfAction = async (
+  url: string | undefined,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.perf" });
+  await withSession(options, async (session) => {
+    printPerformanceReport(await session.measurePerformance(url));
+  });
+};
+
+// One navigation, every signal — the efficient path when an agent wants the
+// whole runtime picture instead of reloading the page once per command.
+export const browserReportAction = async (
+  url: string | undefined,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.report" });
+  await withSession(options, async (session) => {
+    const inspection = await session.inspectPage(url);
+
+    logger.log("# Console");
+    if (inspection.console.length === 0) logger.log("(none)");
+    else printConsoleMessages(inspection.console);
+
+    logger.log("\n# Network");
+    if (inspection.network.length === 0) logger.log("(none)");
+    else printNetworkRequests(inspection.network);
+
+    logger.log("\n# Performance");
+    printPerformanceReport(inspection.performance);
+
+    logger.log("\n# Accessibility");
+    if (inspection.accessibility.length === 0) logger.log("(none)");
+    else printAuditViolations(inspection.accessibility);
+  });
+};
diff --git a/packages/react-doctor/src/cli/commands/debug.ts b/packages/react-doctor/src/cli/commands/debug.ts
new file mode 100644
index 000000000..def27b281
--- /dev/null
+++ b/packages/react-doctor/src/cli/commands/debug.ts
@@ -0,0 +1,110 @@
+import { spawn } from "node:child_process";
+import { createLogServer, DEFAULT_HOST, type LogServerOptions } from "@react-doctor/debug";
+import { cliLogger as logger } from "../utils/cli-logger.js";
+import { METRIC } from "../utils/constants.js";
+import { recordCount } from "../utils/record-metric.js";
+
+export interface DebugServeOptions {
+  port?: number;
+  host: string;
+  sessionId?: string;
+  logPath?: string;
+  daemon?: boolean;
+  json?: boolean;
+}
+
+const toServerOptions = (options: DebugServeOptions): LogServerOptions => ({
+  port: options.port,
+  host: options.host,
+  sessionId: options.sessionId,
+  logPath: options.logPath,
+  // Scope the lock + default log directory to this project so a different
+  // codebase's `debug serve` can't reuse this server and cross its sessions.
+  cwd: process.cwd(),
+});
+
+const installShutdown = (server: { close: () => void }): void => {
+  const shutdown = (): void => {
+    server.close();
+    process.exit(0);
+  };
+  process.on("SIGINT", shutdown);
+  process.on("SIGTERM", shutdown);
+};
+
+// Re-spawn `debug serve --json` detached so the server outlives this process,
+// forward the one JSON info line it prints on startup, then exit. The agent gets
+// the endpoint without a server stuck in its foreground.
+const startDaemon = async (options: DebugServeOptions): Promise<void> => {
+  const childArguments = [process.argv[1], "debug", "serve", "--json"];
+  if (options.port) childArguments.push("--port", String(options.port));
+  if (options.host !== DEFAULT_HOST) childArguments.push("--host", options.host);
+  if (options.sessionId) childArguments.push("--session-id", options.sessionId);
+  if (options.logPath) childArguments.push("--log-path", options.logPath);
+
+  const child = spawn(process.execPath, childArguments, {
+    detached: true,
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+
+  let stdoutBuffer = "";
+  let stderrBuffer = "";
+  child.stderr?.on("data", (chunk: Buffer) => (stderrBuffer += chunk.toString()));
+  const infoLine = await new Promise<string>((resolve, reject) => {
+    const resolveFromBuffer = (): boolean => {
+      const newlineIndex = stdoutBuffer.indexOf("\n");
+      if (newlineIndex === -1) return false;
+      resolve(stdoutBuffer.slice(0, newlineIndex));
+      return true;
+    };
+    child.stdout?.on("data", (chunk: Buffer) => {
+      stdoutBuffer += chunk.toString();
+      resolveFromBuffer();
+    });
+    child.on("error", reject);
+    // `close` (not `exit`) so stdout is fully drained: the reuse path prints its
+    // info line and exits 0 immediately, which can beat the `data` event, so the
+    // line is only guaranteed buffered once the pipe closes. A long-running
+    // server never closes stdout — it's already resolved via `data` above.
+    child.on("close", (code) => {
+      if (resolveFromBuffer()) return;
+      const detail = stderrBuffer.trim();
+      reject(new Error(`Debug log server exited with code ${code}${detail ? `: ${detail}` : ""}`));
+    });
+  });
+
+  logger.log(infoLine);
+  child.unref();
+  process.exit(0);
+};
+
+// Foreground server that prints one JSON info line then keeps listening, for
+// agents that background it themselves (`… &`).
+const startJson = async (options: DebugServeOptions): Promise<void> => {
+  const { server, info } = await createLogServer(toServerOptions(options));
+  logger.log(JSON.stringify(info));
+  if (!server) {
+    process.exit(0);
+  }
+  installShutdown(server);
+};
+
+const startInteractive = async (options: DebugServeOptions): Promise<void> => {
+  const { server, info } = await createLogServer(toServerOptions(options));
+  if (!server) {
+    logger.success(`Debug log server already running on port ${info.port}`);
+    logger.dim(`  ${info.endpoint}`);
+    return;
+  }
+  logger.success(`Debug log server listening on port ${info.port}`);
+  logger.dim(`  Endpoint: ${info.endpoint}`);
+  logger.dim(`  Log path: ${info.logPath}`);
+  installShutdown(server);
+};
+
+export const debugServeAction = async (options: DebugServeOptions): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "debug.serve" });
+  if (options.daemon) return startDaemon(options);
+  if (options.json) return startJson(options);
+  return startInteractive(options);
+};
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index f8425d28e..ef365ed65 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -1,6 +1,19 @@
 import { Command, Option } from "commander";
 import { CANONICAL_GITHUB_URL, highlighter } from "@react-doctor/core";
 import { flushSentry, initializeSentry } from "../instrument.js";
+import {
+  browserAuditAction,
+  browserConsoleAction,
+  browserEvalAction,
+  browserNetworkAction,
+  browserOpenAction,
+  browserPerfAction,
+  browserReportAction,
+  browserScreenshotAction,
+  browserSnapshotAction,
+} from "./commands/browser.js";
+import { DEFAULT_HOST } from "@react-doctor/debug";
+import { debugServeAction } from "./commands/debug.js";
 import { inspectAction } from "./commands/inspect.js";
 import { installAction } from "./commands/install.js";
 import {
@@ -23,6 +36,7 @@ import { isDebugFlagEnabled } from "./utils/is-debug-flag.js";
 import { isExpectedUserError } from "./utils/is-expected-user-error.js";
 import { isJsonModeActive, writeJsonErrorReport } from "./utils/json-mode.js";
 import { normalizeHelpInvocation } from "./utils/normalize-help-command.js";
+import { parseViewport } from "./utils/parse-viewport.js";
 import { printDebugTrace } from "./utils/print-debug-trace.js";
 import { assertNoRemovedFlags } from "./utils/removed-cli-flags.js";
 import { reportErrorToSentry } from "./utils/report-error.js";
@@ -220,6 +234,115 @@ program
   .addHelpText("after", renderInstallHelpEpilog)
   .action(installAction);
 
+const browser = program
+  .command("browser")
+  .description(
+    "Drive a real browser for the debug and design jobs (attaches to your running Chrome over CDP, launches one only as a fallback)",
+  );
+
+// Every browser subcommand attaches the same way, so they share the connection flags.
+const withConnectionOptions = (command: Command): Command =>
+  command
+    .option("--cdp <endpoint>", "CDP endpoint to attach to (default http://127.0.0.1:9222)")
+    .option("--no-launch", "fail instead of launching Chrome when no attach target exists");
+
+// Commands that render or measure the page also accept a one-shot emulated
+// viewport (e.g. a phone). It's applied via a CDP override that clears when the
+// command ends, so it never resizes the user's real window — which is why `open`
+// (whose job is to leave a persistent page behind) does not take it.
+const withRenderOptions = (command: Command): Command =>
+  withConnectionOptions(command).addOption(
+    new Option(
+      "--viewport <size>",
+      "emulate a viewport for this command, WIDTHxHEIGHT (e.g. 390x844)",
+    ).argParser(parseViewport),
+  );
+
+withConnectionOptions(
+  browser
+    .command("open <url>")
+    .description(
+      "Open a URL and keep the page, with the React DevTools profiler injected for `browser eval` (window.__REACT_PERF__)",
+    ),
+).action(browserOpenAction);
+
+withRenderOptions(
+  browser
+    .command("eval <expression>")
+    .description(
+      "Run an expression with the Playwright `page` in scope, e.g. 'page.locator(\"text=Login\").click()'",
+    ),
+).action(browserEvalAction);
+
+withRenderOptions(
+  browser
+    .command("snapshot")
+    .description("Print the page's accessibility tree (a stable view of what is rendered)"),
+).action(browserSnapshotAction);
+
+withRenderOptions(
+  browser
+    .command("screenshot")
+    .description("Save a screenshot of the page")
+    .option("--out <path>", "output file path (default react-doctor-screenshot.png)"),
+).action(browserScreenshotAction);
+
+withRenderOptions(
+  browser
+    .command("audit [url]")
+    .description("Run an accessibility audit (axe-core) on the page or a URL"),
+).action(browserAuditAction);
+
+withRenderOptions(
+  browser
+    .command("console [url]")
+    .description("Capture console output and page errors during a load (reloads if no URL)"),
+).action(browserConsoleAction);
+
+withRenderOptions(
+  browser
+    .command("network [url]")
+    .description("Capture network requests during a load, flagging failures (reloads if no URL)"),
+).action(browserNetworkAction);
+
+withRenderOptions(
+  browser
+    .command("perf [url]")
+    .description(
+      "Capture long animation frames (jank) with per-script attribution, plus LCP/CLS (reloads if no URL)",
+    ),
+).action(browserPerfAction);
+
+withRenderOptions(
+  browser
+    .command("report [url]")
+    .description(
+      "Capture console, network, performance, and accessibility in a single load (reloads if no URL)",
+    ),
+).action(browserReportAction);
+
+const debug = program
+  .command("debug")
+  .description("Runtime debugging tools for the debug job (NDJSON logging server)");
+
+// `serve` is the default so `react-doctor debug` starts the server. Agents use
+// `--daemon` to get the endpoint and a detached server in one shot.
+debug
+  .command("serve", { isDefault: true })
+  .description("Start the NDJSON logging server the debug job posts runtime logs to")
+  .option("-p, --port <number>", "port to listen on (default: random)", (value) =>
+    parseInt(value, 10),
+  )
+  .option("-H, --host <address>", "host to bind to", DEFAULT_HOST)
+  .option("-s, --session-id <id>", "session id (default: random hex)")
+  .option(
+    "-l, --log-path <path>",
+    "log file path (default: <tmpdir>/react-doctor-debug/debug-<sessionId>.log)",
+  )
+  .option("-d, --daemon", "start in the background, print the server info, then exit")
+  .option("--json", "print the server info as one JSON line (for agents)")
+  .action(debugServeAction);
+
 program
   .command("version")
   .description("show the version with Node and platform info")
diff --git a/packages/react-doctor/src/cli/utils/constants.ts b/packages/react-doctor/src/cli/utils/constants.ts
index e312f32ae..6ebf6efac 100644
--- a/packages/react-doctor/src/cli/utils/constants.ts
+++ b/packages/react-doctor/src/cli/utils/constants.ts
@@ -27,6 +27,13 @@ export const CACHE_FILENAME_HASH_LENGTH_CHARS = 16;
 
 export const GIT_HOOK_EXECUTABLE_MODE = 0o755;
 
+// Default output path for `browser screenshot` when `--out` is omitted.
+export const DEFAULT_SCREENSHOT_FILENAME = "react-doctor-screenshot.png";
+
+// Upper bound on an emulated `--viewport` dimension, so a typo can't push an
+// absurd device-metrics override into CDP.
+export const MAX_VIEWPORT_PX = 10_000;
+
 export const AGENT_HOOK_TIMEOUT_SECONDS = 120;
 
 // Hard cap on the `gh repo view` default-branch probe. A healthy gh answers
diff --git a/packages/react-doctor/src/cli/utils/parse-viewport.ts b/packages/react-doctor/src/cli/utils/parse-viewport.ts
new file mode 100644
index 000000000..4c8405c37
--- /dev/null
+++ b/packages/react-doctor/src/cli/utils/parse-viewport.ts
@@ -0,0 +1,17 @@
+import { InvalidArgumentError } from "commander";
+import { MAX_VIEWPORT_PX } from "./constants.js";
+
+// Throws Commander's InvalidArgumentError so a bad `--viewport WIDTHxHEIGHT`
+// value renders as a clean usage error rather than a crash report.
+export const parseViewport = (value: string): { width: number; height: number } => {
+  const match = /^(\d+)x(\d+)$/i.exec(value.trim());
+  const width = match ? Number(match[1]) : 0;
+  const height = match ? Number(match[2]) : 0;
+  if (!match || width === 0 || height === 0) {
+    throw new InvalidArgumentError(`Use WIDTHxHEIGHT in pixels, e.g. 390x844 (got "${value}").`);
+  }
+  if (width > MAX_VIEWPORT_PX || height > MAX_VIEWPORT_PX) {
+    throw new InvalidArgumentError(`Viewport dimensions must be at most ${MAX_VIEWPORT_PX}px.`);
+  }
+  return { width, height };
+};
diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
index 88502c907..bece4fa1a 100644
--- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
+++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
@@ -99,12 +99,39 @@ const WHY_FLAG_SPEC: CliFlagSpec = {
   shortOptionsWithRequiredValues: new Set(["-c"]),
 };
 
+// Union of every flag across the `browser` subcommands (open / eval / snapshot /
+// screenshot / audit / console / network / perf / report). The sub-subcommand
+// name and any URL / expression positional pass through untouched; only these
+// options need to survive the pre-parse strip so Commander can route them —
+// without this, `--cdp <endpoint>` is dropped and its value leaks in as a stray
+// positional.
+const BROWSER_FLAG_SPEC: CliFlagSpec = {
+  longOptionsWithoutValues: new Set(["--help", "--no-launch"]),
+  longOptionsWithRequiredValues: new Set(["--cdp", "--out", "--viewport"]),
+  longOptionsWithOptionalValues: new Set(),
+  shortOptionsWithoutValues: new Set(["-h"]),
+  shortOptionsWithRequiredValues: new Set(),
+};
+
+// `debug serve` flags. The `serve` subcommand name passes through as a non-flag
+// token; only these options need to survive the pre-parse strip so Commander
+// can route them (without it the ROOT spec drops `--port`, `--daemon`, …).
+const DEBUG_FLAG_SPEC: CliFlagSpec = {
+  longOptionsWithoutValues: new Set(["--color", "--daemon", "--help", "--json", "--no-color"]),
+  longOptionsWithRequiredValues: new Set(["--host", "--log-path", "--port", "--session-id"]),
+  longOptionsWithOptionalValues: new Set(),
+  shortOptionsWithoutValues: new Set(["-d", "-h"]),
+  shortOptionsWithRequiredValues: new Set(["-H", "-l", "-p", "-s"]),
+};
+
 const COMMAND_FLAG_SPECS = new Map<string, CliFlagSpec>([
   ["install", INSTALL_FLAG_SPEC],
   ["setup", INSTALL_FLAG_SPEC],
   ["version", VERSION_FLAG_SPEC],
   ["rules", RULES_FLAG_SPEC],
   ["why", WHY_FLAG_SPEC],
+  ["browser", BROWSER_FLAG_SPEC],
+  ["debug", DEBUG_FLAG_SPEC],
 ]);
 
 const isFlagLike = (argument: string): boolean => argument.startsWith("-") && argument !== "-";
diff --git a/packages/react-doctor/tests/parse-viewport.test.ts b/packages/react-doctor/tests/parse-viewport.test.ts
new file mode 100644
index 000000000..2f0f351a7
--- /dev/null
+++ b/packages/react-doctor/tests/parse-viewport.test.ts
@@ -0,0 +1,16 @@
+import { InvalidArgumentError } from "commander";
+import { describe, expect, it } from "vite-plus/test";
+import { parseViewport } from "../src/cli/utils/parse-viewport.js";
+
+describe("parseViewport", () => {
+  it("parses WIDTHxHEIGHT into pixel dimensions", () => {
+    expect(parseViewport("390x844")).toEqual({ width: 390, height: 844 });
+    expect(parseViewport(" 1280X720 ")).toEqual({ width: 1280, height: 720 });
+  });
+
+  it("rejects malformed or zero-sized values with a usage error", () => {
+    for (const value of ["", "390", "390*844", "abc", "0x844", "390x0"]) {
+      expect(() => parseViewport(value)).toThrow(InvalidArgumentError);
+    }
+  });
+});
diff --git a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
index d7a3aba3c..ae73df1f2 100644
--- a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
+++ b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
@@ -130,6 +130,52 @@ describe("stripUnknownCliFlags", () => {
     ]);
   });
 
+  it("keeps browser subcommand flags and consumes --cdp's value (no value leaks as a positional)", () => {
+    // Regression: without a browser flag spec, --cdp is dropped and its endpoint
+    // value leaks in as a second positional, so `browser audit <url> --cdp <endpoint>`
+    // makes Commander throw "too many arguments".
+    expect(
+      stripUserArguments([
+        "browser",
+        "audit",
+        "https://example.com",
+        "--cdp",
+        "http://127.0.0.1:9456",
+      ]),
+    ).toEqual(["browser", "audit", "https://example.com", "--cdp", "http://127.0.0.1:9456"]);
+    expect(stripUserArguments(["browser", "open", "https://example.com", "--no-launch"])).toEqual([
+      "browser",
+      "open",
+      "https://example.com",
+      "--no-launch",
+    ]);
+    expect(stripUserArguments(["browser", "screenshot", "--out", "shot.png", "--offline"])).toEqual(
+      ["browser", "screenshot", "--out", "shot.png"],
+    );
+    expect(
+      stripUserArguments(["browser", "screenshot", "--viewport", "390x844", "--out", "m.png"]),
+    ).toEqual(["browser", "screenshot", "--viewport", "390x844", "--out", "m.png"]);
+    expect(
+      stripUserArguments(["browser", "eval", 'page.locator("a").click()', "--cdp", "http://x"]),
+    ).toEqual(["browser", "eval", 'page.locator("a").click()', "--cdp", "http://x"]);
+  });
+
+  it("keeps debug serve flags and consumes their values (no value leaks as a positional)", () => {
+    expect(
+      stripUserArguments(["debug", "serve", "--port", "9000", "--daemon", "--offline"]),
+    ).toEqual(["debug", "serve", "--port", "9000", "--daemon"]);
+    expect(stripUserArguments(["debug", "--json"])).toEqual(["debug", "--json"]);
+    expect(stripUserArguments(["debug", "serve", "-p", "9000", "-s", "abc123", "-d"])).toEqual([
+      "debug",
+      "serve",
+      "-p",
+      "9000",
+      "-s",
+      "abc123",
+      "-d",
+    ]);
+  });
+
   it("keeps color flags on rules subcommands so the color resolver can see them", () => {
     expect(stripUserArguments(["rules", "list", "--no-color"])).toEqual([
       "rules",
diff --git a/packages/react-doctor/vite.config.ts b/packages/react-doctor/vite.config.ts
index 3940e2ddd..b00f1cdc9 100644
--- a/packages/react-doctor/vite.config.ts
+++ b/packages/react-doctor/vite.config.ts
@@ -58,6 +58,23 @@ const copySkillsToDist = () => {
   }
 };
 
+// The React-profiler init script is a prebuilt browser-only asset, not JS the
+// CLI bundle imports. @react-doctor/browser is inlined into dist/cli.js, so its
+// session resolves the asset relative to its own output — which after bundling
+// is dist/cli.js. Copy it next to the CLI bundle so that path resolves in the
+// published tarball. (`dist/**/*.js` in package.json "files" then ships it.)
+const copyBrowserInjectToDist = () => {
+  const injectSource = path.resolve(packageRoot, "../browser/dist/inject");
+  const injectTarget = path.resolve(packageRoot, "dist/inject");
+  if (!fs.existsSync(injectSource)) {
+    throw new Error(
+      `Browser inject asset missing at ${injectSource}; build @react-doctor/browser first.`,
+    );
+  }
+  fs.rmSync(injectTarget, { recursive: true, force: true });
+  fs.cpSync(injectSource, injectTarget, { recursive: true });
+};
+
 export default defineConfig({
   pack: [
     {
@@ -77,6 +94,14 @@ export default defineConfig({
           // and resolves native/optional deps via require() at runtime;
           // keep it external so those lookups run untouched.
           "@sentry/node",
+          // playwright-core (a browser engine) backs only the debug and design
+          // jobs and is reached lazily through @react-doctor/browser's dynamic
+          // import, so keep it external — never inlined into the CLI's hot path.
+          // @react-doctor/browser itself is a thin wrapper, so it's bundled; that
+          // lets the published CLI load it without it being a runtime dependency (it
+          // is private), while playwright-core ships as an optional dependency
+          // installed only when present.
+          "playwright-core",
           "agent-install",
           // Config loading/editing: jiti (TS/JS config eval) + confbox
           // (JSONC parse) power the loader in @react-doctor/core (bundled
@@ -139,6 +164,7 @@ export default defineConfig({
       hooks: {
         "build:done": () => {
           copySkillsToDist();
+          copyBrowserInjectToDist();
         },
       },
     },
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index a9a909f6a..6b0c0bd1d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -59,6 +59,25 @@ importers:
         specifier: ^25.6.0
         version: 25.6.0
 
+  packages/browser:
+    dependencies:
+      axe-core:
+        specifier: ^4.10.2
+        version: 4.12.1
+      playwright-core:
+        specifier: ^1.49.1
+        version: 1.60.0
+    devDependencies:
+      '@types/node':
+        specifier: ^25.6.0
+        version: 25.6.0
+      esbuild:
+        specifier: ^0.25.12
+        version: 0.25.12
+      react-devtools-inline:
+        specifier: ^6.1.5
+        version: 6.1.5
+
   packages/core:
     dependencies:
       '@effect/platform-node-shared':
@@ -108,6 +127,12 @@ importers:
         specifier: ^7.7.1
         version: 7.7.1
 
+  packages/debug:
+    devDependencies:
+      '@types/node':
+        specifier: ^25.6.0
+        version: 25.6.0
+
   packages/deslop-cli:
     dependencies:
       commander:
@@ -254,9 +279,15 @@ importers:
       '@react-doctor/api':
         specifier: workspace:*
         version: link:../api
+      '@react-doctor/browser':
+        specifier: workspace:*
+        version: link:../browser
       '@react-doctor/core':
         specifier: workspace:*
         version: link:../core
+      '@react-doctor/debug':
+        specifier: workspace:*
+        version: link:../debug
       '@react-doctor/language-server':
         specifier: workspace:*
         version: link:../language-server
@@ -275,6 +306,10 @@ importers:
       ora:
         specifier: ^9.4.0
         version: 9.4.0
+    optionalDependencies:
+      playwright-core:
+        specifier: ^1.49.1
+        version: 1.60.0
 
   packages/vscode-react-doctor:
     dependencies:
@@ -2720,6 +2755,10 @@ packages:
   atomically@2.1.1:
     resolution: {integrity: sha512-P4w9o2dqARji6P7MHprklbfiArZAWvo07yW7qs3pdljb3BWr12FIB7W+p0zJiuiVsUpRO0iZn1kFFcpPegg0tQ==}
 
+  axe-core@4.12.1:
+    resolution: {integrity: sha512-s7iGf5GaVMxEG0ENN9x+xTr7GFZCb1ZP/1uATUpCEK2X78nDB3RwbtFCo9pGAf9ru+VwoQ464DkaLEeRM08wJA==}
+    engines: {node: '>=4'}
+
   balanced-match@1.0.2:
     resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
 
@@ -3551,6 +3590,11 @@ packages:
     resolution: {integrity: sha512-xhcb4yHu9sM/G7foGzoLtXYcC0zHEaOXXjRKhGup0fw78Nf2Tkiapv4EQyMzrbcmQPsllAI7DbFY2UT7PlI9Pg==}
     hasBin: true
 
+  playwright-core@1.60.0:
+    resolution: {integrity: sha512-9bW6zvX/m0lEbgTKJ6YppOKx8H3VOPBMOCFh2irXFOT4BbHgrx5hPjwJYLT40Lu+4qtD36qKc/Hn56StUW57IA==}
+    engines: {node: '>=18'}
+    hasBin: true
+
   pngjs@7.0.0:
     resolution: {integrity: sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow==}
     engines: {node: '>=14.19.0'}
@@ -3596,6 +3640,9 @@ packages:
   queue-microtask@1.2.3:
     resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
 
+  react-devtools-inline@6.1.5:
+    resolution: {integrity: sha512-8FbBqZrOk4k4uWgkDNj7CVb975oKOMuPYZMQi4UHVW1RhbnEFOVZ7cdKvv6tbzbhy2D1aFwfj1T58atSoedEKQ==}
+
   react-dom@19.2.5:
     resolution: {integrity: sha512-J5bAZz+DXMMwW/wV3xzKke59Af6CHY7G4uYLN1OvBcKEsWOs4pQExj86BBKamxl/Ik5bx9whOrvBlSDfWzgSag==}
     peerDependencies:
@@ -3694,6 +3741,10 @@ packages:
     resolution: {integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==}
     engines: {node: '>=8'}
 
+  source-map-js@0.6.2:
+    resolution: {integrity: sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug==}
+    engines: {node: '>=0.10.0'}
+
   source-map-js@1.2.1:
     resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==}
     engines: {node: '>=0.10.0'}
@@ -3705,6 +3756,10 @@ packages:
     resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==}
     engines: {node: '>=0.10.0'}
 
+  sourcemap-codec@1.4.8:
+    resolution: {integrity: sha512-9NykojV5Uih4lgo5So5dtw+f0JgJX30KCNI8gwhz2J9A15wD0Ml6tjHKwf6fTSa6fAdVBdZeNOs9eJ71qCk8vA==}
+    deprecated: Please use @jridgewell/sourcemap-codec instead
+
   spawndamnit@3.0.1:
     resolution: {integrity: sha512-MmnduQUuHCoFckZoWnXsTg7JaiLBJrKFj9UI2MbRPGaJeVpsLcVBu6P/IGZovziM/YBsellCmsprgNA+w0CzVg==}
 
@@ -5753,6 +5808,8 @@ snapshots:
       stubborn-fs: 2.0.0
       when-exit: 2.1.5
 
+  axe-core@4.12.1: {}
+
   balanced-match@1.0.2: {}
 
   balanced-match@4.0.4: {}
@@ -6697,6 +6754,8 @@ snapshots:
     dependencies:
       pngjs: 7.0.0
 
+  playwright-core@1.60.0: {}
+
   pngjs@7.0.0: {}
 
   postcss@8.4.31:
@@ -6732,6 +6791,11 @@ snapshots:
 
   queue-microtask@1.2.3: {}
 
+  react-devtools-inline@6.1.5:
+    dependencies:
+      source-map-js: 0.6.2
+      sourcemap-codec: 1.4.8
+
   react-dom@19.2.5(react@19.2.5):
     dependencies:
       react: 19.2.5
@@ -6867,6 +6931,8 @@ snapshots:
 
   slash@3.0.0: {}
 
+  source-map-js@0.6.2: {}
+
   source-map-js@1.2.1: {}
 
   source-map-support@0.5.21:
@@ -6878,6 +6944,8 @@ snapshots:
   source-map@0.6.1:
     optional: true
 
+  sourcemap-codec@1.4.8: {}
+
   spawndamnit@3.0.1:
     dependencies:
       cross-spawn: 7.0.6
diff --git a/skills/react-doctor/SKILL.md b/skills/react-doctor/SKILL.md
index 332a47f8b..e85b926fa 100644
--- a/skills/react-doctor/SKILL.md
+++ b/skills/react-doctor/SKILL.md
@@ -1,26 +1,61 @@
 ---
 name: react-doctor
-description: Use when finishing a feature, fixing a bug, before committing React code, or when the user types `/doctor`, asks to scan, triage, or clean up React diagnostics. Covers lint, accessibility, bundle size, architecture. Includes a regression check and a full local-triage workflow that fetches the canonical playbook.
-version: "1.2.0"
+description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
+version: "1.5.0"
 ---
 
 # React Doctor
 
-Scans React codebases for security, performance, correctness, and architecture issues. Outputs a 0–100 health score.
+One skill that makes your agent good at React. It writes better React by default, checks your changes in the background, and opens a real browser to profile performance, reproduce bugs, and review design.
 
-## After making React code changes:
+## Baseline rules (always on)
 
-Run `npx react-doctor@latest --verbose --scope changed` and check the score did not regress.
+Apply these on every React edit, before any tool runs. They shape how you write code, not only what you flag:
 
-If the score dropped, fix the regressions before committing.
+1. Derive state during render, don't duplicate it in another `useState`.
+2. Skip effects for values you can compute while rendering and for logic that belongs in an event handler.
+3. Compose components instead of piling on boolean props.
+4. Lift state only as far as it needs to go, no higher.
+5. Keep one source of truth for each piece of state.
+6. Render without side effects; keep the render pass pure.
+7. Use stable keys in lists, never the array index.
+8. Fetch independent data in parallel, not in a waterfall.
+9. Skip manual `useMemo`, `useCallback`, and `memo`; let the React Compiler handle it.
+10. Handle the loading, error, and empty states, not only the happy path.
 
-## For general cleanup or code improvement:
+## Routing
 
-Run `npx react-doctor@latest --verbose` (the default `--scope full`) to scan the full codebase. Fix issues by severity — errors first, then warnings.
+`/react-doctor` picks the job from what you're doing. Name a job (`/react-doctor perf`) to force it. When the request is genuinely unclear, ask which one rather than guessing.
 
-## /doctor — full local triage workflow
+| Signal                                                  | Job        | What it does                    |
+| ------------------------------------------------------- | ---------- | ------------------------------- |
+| "review", "before commit", "clean up", or changed files | **doctor** | static scan plus 0 to 100 score |
+| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React DevTools profiler harness |
+| "broken", "crashes", "doesn't work" in the UI           | **debug**  | reproduce in a real browser     |
+| "looks off", "polish", a screenshot or pasted element   | **design** | measured UI review              |
 
-When the user types `/doctor`, says "run react doctor", or asks for a full triage / cleanup pass (not just a regression check), fetch the canonical local-triage playbook and follow every step in it:
+doctor runs from code alone, so it is the one that fires in the background. The browser jobs (perf, debug, design) need a live page and are slower, so they run only when asked.
+
+## Which browser to drive
+
+debug, design, and perf need a real Chrome. Two ways to get one:
+
+1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
+2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, and `perf` (long animation frames with per-script attribution).
+
+It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
+
+## doctor: scan and triage
+
+After making React changes, run a regression check and confirm the score did not drop:
+
+```bash
+npx react-doctor@latest --verbose --scope changed
+```
+
+If the score dropped, fix the regressions before committing. For a cleanup of the whole codebase, drop `--scope changed` (the default is `--scope full`) and fix by severity: errors first, then warnings.
+
+When the user types `/react-doctor`, `/doctor`, says "run react doctor", or asks for a full triage or cleanup pass (not a regression check), fetch the canonical local-triage playbook and follow every step in it:
 
 ```bash
 curl --fail --silent --show-error \
@@ -28,13 +63,23 @@ curl --fail --silent --show-error \
   https://www.react.doctor/prompts/react-doctor-agent.md
 ```
 
-The playbook is the single source of truth — a scan → filter → triage → fix → validate loop that edits the working tree directly (never commits, never opens PRs). Updating the prompt at its source updates every agent on its next fetch — no skill reinstall needed.
+The playbook is the single source of truth: a scan, filter, triage, fix, validate loop that edits the working tree directly and never commits or opens PRs. Updating the prompt at its source updates every agent on its next fetch, no reinstall needed. Pair it with the per-rule prompts at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md` (fetched on demand inside the playbook) so each fix uses the reviewer-tested recipe.
+
+## perf: profile performance
+
+When the user reports jank, slow interactions, dropped frames, excessive re-renders, or asks to profile or optimize render performance, read [references/performance.md](references/performance.md) and follow it. It runs an evidence-driven profile, analyze, fix, re-profile loop against the real React DevTools profiler export, never guessing from code alone.
+
+## debug: reproduce in a real browser
+
+When the user says something is broken, crashes, throws, or behaves wrong in the running app, read [references/debug.md](references/debug.md) and follow it. It runs the [debug-agent](https://github.com/millionco/debug-agent) loop: generate hypotheses, instrument the code with runtime NDJSON logs, reproduce the bug in the live browser, and fix only once the logs prove the cause.
+
+## design: review and improve UI
 
-Pair it with the matching per-rule prompts at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md` (fetched on demand inside the playbook) so each fix uses the canonical, reviewer-tested recipe.
+When the user wants to build, polish, or review an interface ("looks off", "make this nicer", a pasted screenshot or element), read [references/design.md](references/design.md) and follow it. It opens the page, takes a screenshot, and reports what it can measure (contrast, line length, spacing, tap-target size), not only taste.
 
 ## Configuring or explaining rules
 
-When the user wants to understand a rule, disagrees with one, or wants to disable / tune which rules run (not fix code), read [references/explain.md](references/explain.md) and follow it. Start with `npx react-doctor@latest rules explain <rule>`, then apply the narrowest control via `npx react-doctor@latest rules disable|set|category|ignore-tag …`, which edits your `doctor.config.*` (or `package.json#reactDoctor`).
+When the user wants to understand a rule, disagrees with one, or wants to disable or tune which rules run (not fix code), read [references/explain.md](references/explain.md) and follow it. Start with `npx react-doctor@latest rules explain <rule>`, then apply the narrowest control via `npx react-doctor@latest rules disable|set|category|ignore-tag …`.
 
 ## Command
 
diff --git a/skills/react-doctor/references/debug.md b/skills/react-doctor/references/debug.md
new file mode 100644
index 000000000..afd5431e2
--- /dev/null
+++ b/skills/react-doctor/references/debug.md
@@ -0,0 +1,87 @@
+# Debugging with runtime evidence
+
+Reproduce and fix UI bugs with runtime evidence, never by guessing from code alone. Use this when the user says something is broken, crashes, throws, hangs, or behaves wrong in the running app.
+
+This is the [debug-agent](https://github.com/millionco/debug-agent) loop, built into React Doctor: hypothesize, instrument with logs, reproduce, analyze the logs, fix only once the logs prove the cause, verify, clean up.
+
+## 0. Start the logging server (before any instrumentation)
+
+The server is long-running. Start it once and keep it up for the whole session. `--daemon` prints the server info and returns, leaving the server running in the background:
+
+```bash
+npx react-doctor debug serve --daemon
+```
+
+It prints one JSON line. Capture and remember:
+
+- `endpoint`: POST your logs here from JS or TS at runtime
+- `logPath`: the NDJSON log file you read after each run
+- `sessionId`: include it in every log payload
+
+The server is idempotent: a second start returns the running server's info. If it fails to start, stop and tell the user. Do not instrument without it.
+
+## 1. Generate hypotheses
+
+Write 3 to 5 precise hypotheses about why the bug happens: a thrown error in a specific component, a failed or duplicated request, a null or undefined access, a state update after unmount, a missing loading or error branch. Aim for more, not fewer. Each hypothesis gets an id (A, B, C, …).
+
+## 2. Instrument the code
+
+Add 2 to 6 logs (never more than 10) at the points that confirm or reject each hypothesis: function entry and exit, values before and after a critical operation, which branch ran. In JS or TS, POST to the server `endpoint`:
+
+```js
+// #region debug log
+fetch("ENDPOINT", {
+  method: "POST",
+  headers: { "Content-Type": "application/json" },
+  body: JSON.stringify({
+    sessionId: "SESSION_ID",
+    hypothesisId: "A",
+    location: "cart.tsx:42",
+    message: "cart total before render",
+    data: { total },
+    timestamp: Date.now(),
+  }),
+}).catch(() => {});
+// #endregion
+```
+
+Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup later is deterministic. Each log maps to at least one `hypothesisId`. Never log secrets or PII.
+
+## 3. Reproduce
+
+Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
+
+- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser console` and `browser network` hand you the runtime console (with uncaught errors) and the request waterfall with failures flagged, often the evidence you need before instrumenting at all. To get the whole picture in one pass, `browser report` captures console, network, performance, and accessibility in a single page load instead of reloading once per command; prefer it over running the four separately. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+
+```bash
+npx react-doctor browser open http://localhost:3000           # attach + open the page
+npx react-doctor browser report http://localhost:3000         # console + network + perf + a11y in one load
+npx react-doctor browser console http://localhost:3000        # console output + uncaught errors
+npx react-doctor browser network http://localhost:3000        # request waterfall, failures flagged
+npx react-doctor browser snapshot                             # what rendered, by role + name
+npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()'
+npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
+```
+
+`snapshot` and `eval` are a pair. `snapshot` lists the rendered elements by role and accessible name. `eval` runs an expression with the Playwright `page` in scope, so you act on what you saw using Playwright's own selectors: `page.locator("text=Login").click()`, `page.getByRole(...)`, `page.fill(...)`, `page.waitForSelector(...)`. For raw DOM, reach through `page.evaluate(() => …)`. No separate ref scheme to track.
+
+- **Backend or CLI bugs:** write and run a small repro script (Node, shell) yourself.
+- Otherwise ask the user for numbered steps, and remind them to restart any app or service whose instrumented files are bundled or cached.
+
+Reuse the same repro pathway for every iteration.
+
+## 4. Analyze the logs
+
+Read the NDJSON at `logPath`. Mark each hypothesis CONFIRMED, REJECTED, or INCONCLUSIVE, citing the specific log lines. If the file is empty, the repro likely did not run the instrumented path, so try again. If every hypothesis is rejected, revert the rejected code changes, generate new hypotheses from a different subsystem, and add more instrumentation.
+
+## 5. Fix, only with proof
+
+Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in `SKILL.md` (derive don't duplicate, effects, single source of truth). Do not remove the instrumentation yet. Never use `setTimeout` or `sleep` as a fix.
+
+## 6. Verify
+
+Clear the log file, re-run the same reproduction (tag the logs `runId:"post-fix"` if helpful), and compare before and after with cited lines. Re-run a couple of times to rule out races. No fix is confirmed without log proof.
+
+## 7. Clean up
+
+Once verified, search every file for `#region debug log`, delete each block through its `#endregion`, grep again to confirm none remain, and `git diff` to confirm only the intentional fix is left.
diff --git a/skills/react-doctor/references/design.md b/skills/react-doctor/references/design.md
new file mode 100644
index 000000000..74f50928f
--- /dev/null
+++ b/skills/react-doctor/references/design.md
@@ -0,0 +1,52 @@
+# Reviewing and improving UI
+
+Improve interfaces with measured evidence from the rendered page, not taste alone. Use this when the user wants to build, polish, or review a UI: "looks off", "make this nicer", or a pasted screenshot.
+
+The value here is what a screenshot and the live DOM let you measure that reading code cannot: contrast ratios, line length, the spacing scale, and tap-target size. Lead with those, then apply craft.
+
+## Review against the live page
+
+```bash
+npx react-doctor browser open http://localhost:3000
+npx react-doctor browser screenshot --out review.png   # what the user actually sees
+npx react-doctor browser audit                          # axe-core: contrast, names, landmarks
+```
+
+Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, `audit`, or `perf`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
+
+```bash
+npx react-doctor browser screenshot --viewport 390x844 --out mobile.png
+```
+
+Look at the screenshot, then measure specifics with `eval` (computed styles, bounding boxes, color values) to get objective numbers rather than opinions:
+
+```bash
+npx react-doctor browser eval 'page.evaluate(() => getComputedStyle(document.querySelector("button")).fontSize)'
+```
+
+`browser audit` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
+
+## What to check
+
+Measured, in priority order:
+
+1. **Contrast**: body text at least 4.5:1, large text at least 3:1. Report the actual ratio.
+2. **Tap targets**: interactive elements at least 24 × 24 px (ideally 44 × 44 on touch).
+3. **Line length**: body copy roughly 45 to 75 characters per line.
+4. **Spacing**: spacing values come from one consistent scale, not ad-hoc px.
+
+Then craft, drawing on the bundled design rules:
+
+5. **Type**: one clear hierarchy; avoid default system-only stacks for brand surfaces; consistent line-height.
+6. **Color**: a committed palette, not arbitrary hexes; check both light and dark.
+7. **Layout**: alignment, rhythm, and a deliberate focal point.
+8. **State**: hover, focus-visible, disabled, loading, and empty states exist.
+
+## The loop
+
+Build or fix, screenshot, re-audit, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
+
+## Working rules
+
+- Always look at the screenshot; do not review UI from JSX alone.
+- Report measured findings with their numbers; keep taste suggestions short and clearly separate from the measured ones.
diff --git a/skills/react-doctor/references/explain.md b/skills/react-doctor/references/explain.md
index 8e4defe4a..18cd0cea2 100644
--- a/skills/react-doctor/references/explain.md
+++ b/skills/react-doctor/references/explain.md
@@ -1,15 +1,12 @@
 # Explaining and configuring rules
 
-Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user
-wants to understand a rule or change which rules run — not for fixing diagnostics
-(that is the main `react-doctor` skill / `/doctor`).
+Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user wants to understand a rule or change which rules run, not for fixing diagnostics (that is the main `react-doctor` skill, `/doctor`).
 
-Triggers: "why did this rule fire", "I disagree with this rule", "turn this rule off",
-"stop flagging X", "too noisy", "disable design rules".
+Triggers: "why did this rule fire", "I disagree with this rule", "turn this rule off", "stop flagging X", "too noisy", "disable design rules".
 
 ## Workflow
 
-1. Identify the rule key from the diagnostic (e.g. `react-doctor/no-array-index-as-key`).
+1. Identify the rule key from the diagnostic (for example `react-doctor/no-array-index-as-key`).
 2. Explain it before changing anything:
 
 ```bash
@@ -17,7 +14,7 @@ npx react-doctor@latest rules explain react-doctor/no-array-index-as-key
 ```
 
 3. Pick the narrowest control that matches the user's intent (see decision guide).
-4. Apply it with a `rules` subcommand (edits your `doctor.config.*` or `package.json#reactDoctor` in place, preserving other fields and formatting).
+4. Apply it with a `rules` subcommand. It edits your `doctor.config.*` or `package.json#reactDoctor` in place, preserving other fields and formatting.
 5. Validate the change did what they wanted:
 
 ```bash
@@ -27,15 +24,15 @@ npx react-doctor@latest --verbose --diff
 ## Commands
 
 ```bash
-npx react-doctor@latest rules list                         # every rule + its effective severity
-npx react-doctor@latest rules list --configured            # only what your config changed
-npx react-doctor@latest rules list --category Performance   # filter by category
-npx react-doctor@latest rules explain <rule>               # why it matters + how to configure
-npx react-doctor@latest rules disable <rule>               # rule never runs
-npx react-doctor@latest rules enable <rule>                # turn back on at its recommended severity
-npx react-doctor@latest rules set <rule> warn              # off | warn | error
-npx react-doctor@latest rules category "React Native" off   # whole category
-npx react-doctor@latest rules ignore-tag design            # skip a rule family (design, test-noise, …)
+npx react-doctor@latest rules list                          # every rule + its effective severity
+npx react-doctor@latest rules list --configured             # only what your config changed
+npx react-doctor@latest rules list --category Performance    # filter by category
+npx react-doctor@latest rules explain <rule>                # why it matters + how to configure
+npx react-doctor@latest rules disable <rule>                # rule never runs
+npx react-doctor@latest rules enable <rule>                 # turn back on at its recommended severity
+npx react-doctor@latest rules set <rule> warn               # off | warn | error
+npx react-doctor@latest rules category "React Native" off    # whole category
+npx react-doctor@latest rules ignore-tag design             # skip a rule family (design, test-noise, …)
 npx react-doctor@latest rules unignore-tag design
 ```
 
@@ -43,20 +40,20 @@ Rule references accept the full key (`react-doctor/no-danger`), the bare id (`no
 
 ## Decision guide
 
-Match the control to the intent — prefer the narrowest one:
+Match the control to the intent, and prefer the narrowest one:
 
-- **User disagrees with one rule / it's a false positive for them** → `rules disable <rule>` (sets `rules.<key> = "off"`; the rule stops running everywhere). This is the default for "I don't want this rule".
-- **Rule is fine but wrong severity** → `rules set <rule> warn` or `rules set <rule> error`.
-- **A disabled-by-default rule they want on** → `rules enable <rule>`.
-- **A whole area is unwanted** (e.g. all React Native rules) → `rules category "<Category>" off`.
-- **A behavioral family is noisy** (`design`, `test-noise`, `migration-hint`) → `rules ignore-tag <tag>`.
-- **Keep it locally but hide from PR comment / score / CI gate only** → do NOT disable. Edit `surfaces` in your config (`surfaces.prComment.excludeRules`, `surfaces.score.excludeTags`, `surfaces.ciFailure.excludeCategories`). The rule still shows in local `cli` output.
+- **User disagrees with one rule, or it is a false positive for them**: `rules disable <rule>` (sets `rules.<key> = "off"`; the rule stops running everywhere). This is the default for "I don't want this rule".
+- **Rule is fine but wrong severity**: `rules set <rule> warn` or `rules set <rule> error`.
+- **A disabled-by-default rule they want on**: `rules enable <rule>`.
+- **A whole area is unwanted** (for example all React Native rules): `rules category "<Category>" off`.
+- **A behavioral family is noisy** (`design`, `test-noise`, `migration-hint`): `rules ignore-tag <tag>`.
+- **Keep it locally but hide from PR comment, score, or CI gate only**: do not disable. Edit `surfaces` in your config (`surfaces.prComment.excludeRules`, `surfaces.score.excludeTags`, `surfaces.ciFailure.excludeCategories`). The rule still shows in local `cli` output.
 
-How the layers combine: `ignore.tags` disables every rule carrying that tag **before** linting, so a tagged rule stays off even if `rules`/`categories` set it to `warn`/`error` (a rule-level override cannot re-enable a tag-ignored rule). For rules that aren't tag-disabled, `rules` overrides `categories` overrides the rule's default. `surfaces` is visibility-only and never changes whether a rule runs.
+How the layers combine: `ignore.tags` disables every rule carrying that tag before linting, so a tagged rule stays off even if `rules` or `categories` set it to `warn` or `error` (a rule-level override cannot re-enable a tag-ignored rule). For rules that are not tag-disabled, `rules` overrides `categories` overrides the rule's default. `surfaces` is visibility-only and never changes whether a rule runs.
 
 ## Config shape
 
-Config lives in `doctor.config.ts` (or `.js`/`.mjs`/`.cjs`/`.json`/`.jsonc`), or the `reactDoctor` key in `package.json`. The `rules` commands edit whichever exists — TS/JS edits preserve formatting (via magicast) — and create `doctor.config.json` when none does, stamping `$schema`:
+Config lives in `doctor.config.ts` (or `.js`, `.mjs`, `.cjs`, `.json`, `.jsonc`), or the `reactDoctor` key in `package.json`. The `rules` commands edit whichever exists (TS and JS edits preserve formatting via magicast) and create `doctor.config.json` when none does, stamping `$schema`:
 
 ```ts
 // doctor.config.ts
@@ -69,4 +66,4 @@ export default {
 
 ## Educating the user
 
-When explaining a rule, lead with the "Why it matters" guidance from `rules explain` and, when they want depth, the per-rule recipe at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md`. Only after they understand it should you offer to disable it — many "bad" rules are catching real issues.
+When explaining a rule, lead with the "Why it matters" guidance from `rules explain` and, when they want depth, the per-rule recipe at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md`. Only after they understand it should you offer to disable it: many "bad" rules are catching real issues.
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
new file mode 100644
index 000000000..e0dc85c79
--- /dev/null
+++ b/skills/react-doctor/references/performance.md
@@ -0,0 +1,55 @@
+# Performance engineering (runtime-evidence loop)
+
+Find and fix jank with runtime evidence, never code reading alone. The primary signal is the long animation frame (LoAF): a frame longer than 50 ms, captured with `PerformanceObserver` and attributed to the exact script that blocked it (its `sourceURL`, `sourceFunctionName`, and how much of that time was synchronous layout). That attribution is what `performance.now()` and reading code cannot give you. Use this when the user reports jank, dropped frames, janky scroll, slow click or typing response, poor INP, slow LCP, or layout shift, or asks to make something faster.
+
+Same discipline as [debug](./debug.md): hypothesize, capture, analyze the worst frame, fix the top evidence-backed cause, re-capture to verify, repeat. A change that does not make the offending script's frame time drop is not a fix.
+
+## 1. Hypothesize (3 to 5)
+
+Why is it slow, and where? Common React causes: unstable callback or object props, a missing `memo` or `useMemo`, a context provider that is too broad, large unvirtualized lists, expensive children re-rendering on every parent commit, or a sync layout read interleaved with writes (layout thrashing).
+
+## 2. Capture (no app changes)
+
+`browser perf` arms the LoAF, LCP, and CLS observers, loads the page, watches briefly past load, then reports the worst frames first with per-script attribution:
+
+```bash
+npx react-doctor browser perf http://localhost:3000   # measures the current page if URL omitted
+```
+
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The output leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
+
+To attribute interaction jank (a slow click, scroll, or keypress), drive the repro between load and the read: `browser open`, then `browser eval` the interaction, then `browser perf` with no URL. Without a URL it does not reload; it reads the long frames already buffered in the timeline, so the jank from your interaction is included.
+
+## 3. Analyze the worst frame first
+
+The output is already sorted worst-first. The script with the largest duration inside the worst frame is your culprit. If a script's sync-layout time is a large share of its duration, that is layout thrashing: sync reads (`offsetHeight`, `getBoundingClientRect`, `scrollTop`, `getComputedStyle`) interleaved with DOM writes. A minified `sourceURL` is meaningless on its own, so resolve it through your sourcemap. Cite the specific script when you conclude:
+
+> CONFIRMED: 128 ms frame, script `app.js` `drawSeries` ran 84 ms with 42 ms sync layout. The chart redraw forces layout inside the scroll handler.
+
+## 4. Zoom into React renders (optional)
+
+When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop:
+
+```bash
+npx react-doctor browser open http://localhost:3000
+```
+
+For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
+
+Drive it through `browser eval` (the Playwright `page` is in scope). `stop()` returns a JSON profiling export and resolves to `null` when nothing was recorded (a production React build records no profiling data):
+
+```bash
+npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
+# drive the exact repro with more `browser eval`: page.locator("...").click(), page.keyboard.type("...")
+npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
+```
+
+Aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates).
+
+## 5. Fix, only with proof
+
+Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in [`SKILL.md`](../SKILL.md) (derive don't duplicate, effects, single source of truth). Never fix by wrapping work in `setTimeout`: that defers the work to a later frame, it does not remove it.
+
+## 6. Verify
+
+Re-run the same capture and diff before and after: the offending frame and its script time must drop, and no other frame may regress. For the React profiler, re-run the scenario a few times and compare medians (dev timings are noisy; StrictMode double-renders on mount). Never claim a performance win without before-and-after evidence. The profiler leaves nothing behind in your app to clean up; it lives only in the injected browser session.

From 5b1dc39cca1587a8b0279a2238ce6b95954912f8 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 01:27:57 -0700
Subject: [PATCH 02/38] feat(react): add `react-doctor mcp` Model Context
 Protocol server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the internal @react-doctor/mcp package and surfaces it as a `react-doctor
mcp` subcommand (bundled into the CLI and bin-fast-pathed like experimental-lsp,
since the stdio transport owns stdin/stdout). It exposes the skill's jobs as MCP
tools over stdio: doctor_scan (diagnose() score + diagnostics), the browser
tools (open with React profiler, eval, snapshot, screenshot, audit, console,
network, perf, report — each attaching a fresh CDP session per call), and the
debug log server (debug_serve / debug_read_logs / debug_clear_logs). Tools reuse
the existing @react-doctor/api and @react-doctor/browser/debug surfaces; failures
return isError results instead of throwing. Records one cli.invoked{command:mcp}
metric on start.
---
 .changeset/react-browser-debug-skill.md   |   2 +-
 packages/mcp/package.json                 |  34 ++
 packages/mcp/src/constants.ts             |  14 +
 packages/mcp/src/index.ts                 |   2 +
 packages/mcp/src/server.ts                |  28 +
 packages/mcp/src/tools/browser.ts         | 207 +++++++
 packages/mcp/src/tools/debug.ts           | 109 ++++
 packages/mcp/src/tools/doctor.ts          |  68 +++
 packages/mcp/src/utils/load-browser.ts    |  20 +
 packages/mcp/src/utils/parse-viewport.ts  |  17 +
 packages/mcp/src/utils/tool-result.ts     |  21 +
 packages/mcp/src/utils/with-session.ts    |  28 +
 packages/mcp/tests/server.test.ts         |  53 ++
 packages/mcp/tsconfig.json                |   8 +
 packages/mcp/vite.config.ts               |  18 +
 packages/react-doctor/bin/react-doctor.js |   5 +
 packages/react-doctor/package.json        |   1 +
 packages/react-doctor/src/cli/index.ts    |  11 +
 packages/react-doctor/src/mcp.ts          |  18 +
 packages/react-doctor/vite.config.ts      |  24 +
 pnpm-lock.yaml                            | 668 ++++++++++++++++++++++
 21 files changed, 1355 insertions(+), 1 deletion(-)
 create mode 100644 packages/mcp/package.json
 create mode 100644 packages/mcp/src/constants.ts
 create mode 100644 packages/mcp/src/index.ts
 create mode 100644 packages/mcp/src/server.ts
 create mode 100644 packages/mcp/src/tools/browser.ts
 create mode 100644 packages/mcp/src/tools/debug.ts
 create mode 100644 packages/mcp/src/tools/doctor.ts
 create mode 100644 packages/mcp/src/utils/load-browser.ts
 create mode 100644 packages/mcp/src/utils/parse-viewport.ts
 create mode 100644 packages/mcp/src/utils/tool-result.ts
 create mode 100644 packages/mcp/src/utils/with-session.ts
 create mode 100644 packages/mcp/tests/server.test.ts
 create mode 100644 packages/mcp/tsconfig.json
 create mode 100644 packages/mcp/vite.config.ts
 create mode 100644 packages/react-doctor/src/mcp.ts

diff --git a/.changeset/react-browser-debug-skill.md b/.changeset/react-browser-debug-skill.md
index ddb941b42..032461179 100644
--- a/.changeset/react-browser-debug-skill.md
+++ b/.changeset/react-browser-debug-skill.md
@@ -2,4 +2,4 @@
 "react-doctor": minor
 ---
 
-Add the `browser` and `debug` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback) for accessibility audits, console/network capture, performance traces with React DevTools profiling, snapshots, and screenshots. `debug` runs an NDJSON logging server the debug job posts runtime evidence to.
+Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback) for accessibility audits, console/network capture, performance traces with React DevTools profiling, snapshots, and screenshots. `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools, and the `debug_*` log server directly.
diff --git a/packages/mcp/package.json b/packages/mcp/package.json
new file mode 100644
index 000000000..7ab29fe11
--- /dev/null
+++ b/packages/mcp/package.json
@@ -0,0 +1,34 @@
+{
+  "name": "@react-doctor/mcp",
+  "version": "0.0.1",
+  "private": true,
+  "description": "Internal: React Doctor's Model Context Protocol server. Exposes the doctor scan and browser jobs as MCP tools over stdio. Not published directly; bundled into the react-doctor CLI as `react-doctor mcp`.",
+  "license": "MIT",
+  "type": "module",
+  "sideEffects": false,
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && cross-env NODE_ENV=production vp pack",
+    "typecheck": "tsc --noEmit",
+    "test": "vp test run"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "@react-doctor/api": "workspace:*",
+    "@react-doctor/browser": "workspace:*",
+    "@react-doctor/core": "workspace:*",
+    "@react-doctor/debug": "workspace:*",
+    "zod": "^4.4.3"
+  },
+  "devDependencies": {
+    "@types/node": "^25.6.0"
+  },
+  "engines": {
+    "node": "^20.19.0 || >=22.13.0"
+  }
+}
diff --git a/packages/mcp/src/constants.ts b/packages/mcp/src/constants.ts
new file mode 100644
index 000000000..586b4fb03
--- /dev/null
+++ b/packages/mcp/src/constants.ts
@@ -0,0 +1,14 @@
+export const MCP_SERVER_NAME = "react-doctor";
+
+// Loopback CDP endpoint the browser tools attach to by default, surfaced in
+// tool descriptions so an agent knows what the `cdp` argument overrides.
+export const DEFAULT_CDP_ENDPOINT_HINT = "http://127.0.0.1:9222";
+
+// Cap on diagnostics returned inline by `doctor_scan` so a large codebase's
+// scan stays a readable tool result rather than a multi-megabyte dump; the
+// summary still reports the full counts and a `truncated` flag.
+export const MAX_INLINE_DIAGNOSTICS = 100;
+
+// Upper bound on an emulated viewport dimension, so a typo can't push an
+// absurd device-metrics override into CDP (mirrors the CLI's --viewport guard).
+export const MAX_VIEWPORT_PX = 10_000;
diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts
new file mode 100644
index 000000000..a8ca07a3b
--- /dev/null
+++ b/packages/mcp/src/index.ts
@@ -0,0 +1,2 @@
+export { createMcpServer, startMcpServer } from "./server.js";
+export type { StartMcpServerOptions } from "./server.js";
diff --git a/packages/mcp/src/server.ts b/packages/mcp/src/server.ts
new file mode 100644
index 000000000..0615e1e1b
--- /dev/null
+++ b/packages/mcp/src/server.ts
@@ -0,0 +1,28 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { MCP_SERVER_NAME } from "./constants.js";
+import { registerBrowserTools } from "./tools/browser.js";
+import { registerDebugTools } from "./tools/debug.js";
+import { registerDoctorTools } from "./tools/doctor.js";
+
+export interface StartMcpServerOptions {
+  version: string;
+}
+
+// Build the server with every tool registered, but without a transport — so
+// tests can introspect it and the entry point owns the stdio wiring.
+export const createMcpServer = (options: StartMcpServerOptions): McpServer => {
+  const server = new McpServer({ name: MCP_SERVER_NAME, version: options.version });
+  registerDoctorTools(server);
+  registerBrowserTools(server);
+  registerDebugTools(server);
+  return server;
+};
+
+// Run the server over stdio. The transport owns stdout for the JSON-RPC stream,
+// so nothing on this path may write to stdout (tool handlers return content;
+// diagnostics go to stderr).
+export const startMcpServer = async (options: StartMcpServerOptions): Promise<void> => {
+  const server = createMcpServer(options);
+  await server.connect(new StdioServerTransport());
+};
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
new file mode 100644
index 000000000..cac5e4f2e
--- /dev/null
+++ b/packages/mcp/src/tools/browser.ts
@@ -0,0 +1,207 @@
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { z } from "zod";
+import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
+import { parseViewport } from "../utils/parse-viewport.js";
+import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
+import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
+
+const connectionShape = {
+  cdp: z
+    .string()
+    .optional()
+    .describe(`CDP endpoint to attach to (default ${DEFAULT_CDP_ENDPOINT_HINT})`),
+  noLaunch: z
+    .boolean()
+    .optional()
+    .describe("Fail instead of launching Chrome when no attach target exists"),
+};
+
+const viewportShape = {
+  viewport: z
+    .string()
+    .optional()
+    .describe("Emulate a viewport for this call, WIDTHxHEIGHT in pixels (e.g. 390x844)"),
+};
+
+const urlShape = {
+  url: z
+    .string()
+    .optional()
+    .describe("URL to load; omit to read the current page without reloading"),
+};
+
+interface ConnectionArgs {
+  cdp?: string;
+  noLaunch?: boolean;
+  viewport?: string;
+}
+
+const toConnection = (args: ConnectionArgs): BrowserToolConnection => ({
+  cdp: args.cdp,
+  noLaunch: args.noLaunch,
+  viewport: args.viewport ? parseViewport(args.viewport) : undefined,
+});
+
+export const registerBrowserTools = (server: McpServer): void => {
+  server.registerTool(
+    "browser_open",
+    {
+      title: "Open a URL with the React profiler",
+      description:
+        "Open a URL in the attached Chrome and keep the page, injecting the React DevTools profiler so browser_eval can drive window.__REACT_PERF__ (start()/stop()) for render profiling. Attaches to your running Chrome over CDP, launching a dedicated one only as a fallback.",
+      inputSchema: { url: z.string().describe("URL to open"), ...connectionShape },
+      annotations: { openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        await withSession(toConnection(args), (session) => session.openWithReactProfiler(args.url));
+        return textResult(
+          `Opened ${args.url}. React profiler ready: call browser_eval with "page.evaluate(() => window.__REACT_PERF__.start())", drive a scenario, then stop() for the DevTools profiling export.`,
+        );
+      }),
+  );
+
+  server.registerTool(
+    "browser_eval",
+    {
+      title: "Evaluate Playwright code on the page",
+      description:
+        'Run an async expression with the Playwright `page` in scope (e.g. page.locator("text=Login").click()) against the attached page. Use to drive the exact repro between opening a page and measuring it.',
+      inputSchema: {
+        expression: z.string().describe("Async expression with the Playwright `page` in scope"),
+        ...connectionShape,
+      },
+      annotations: { openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const result = await withSession(toConnection(args), (session) =>
+          session.evaluate(args.expression),
+        );
+        if (result === undefined) return textResult("(no value)");
+        return textResult(typeof result === "string" ? result : JSON.stringify(result, null, 2));
+      }),
+  );
+
+  server.registerTool(
+    "browser_snapshot",
+    {
+      title: "Snapshot the page's accessibility tree",
+      description:
+        "Return the attached page's accessibility tree — a stable, text view of what is rendered, useful for locating elements before driving them with browser_eval.",
+      inputSchema: { ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () =>
+        textResult(await withSession(toConnection(args), (session) => session.snapshot())),
+      ),
+  );
+
+  server.registerTool(
+    "browser_screenshot",
+    {
+      title: "Screenshot the page",
+      description: "Capture a screenshot of the attached page as a PNG image.",
+      inputSchema: { ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const bytes = await withSession(toConnection(args), (session) => session.screenshot());
+        return {
+          content: [
+            { type: "image", data: Buffer.from(bytes).toString("base64"), mimeType: "image/png" },
+          ],
+        };
+      }),
+  );
+
+  server.registerTool(
+    "browser_audit",
+    {
+      title: "Run an accessibility audit",
+      description:
+        "Run an axe-core accessibility audit on the attached page (or a URL) and return the violations with impact, help text, and affected element targets.",
+      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const violations = await withSession(toConnection(args), (session) =>
+          session.audit(args.url),
+        );
+        return jsonResult({ violationCount: violations.length, violations });
+      }),
+  );
+
+  server.registerTool(
+    "browser_console",
+    {
+      title: "Capture console output",
+      description:
+        "Capture console messages and page errors during a load of the attached page (or a URL; reloads when no URL is given).",
+      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const messages = await withSession(toConnection(args), (session) =>
+          session.captureConsole(args.url),
+        );
+        return jsonResult({ messageCount: messages.length, messages });
+      }),
+  );
+
+  server.registerTool(
+    "browser_network",
+    {
+      title: "Capture network requests",
+      description:
+        "Capture network requests during a load of the attached page (or a URL; reloads when no URL is given), flagging failures and non-2xx/3xx responses.",
+      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const requests = await withSession(toConnection(args), (session) =>
+          session.captureNetwork(args.url),
+        );
+        return jsonResult({ requestCount: requests.length, requests });
+      }),
+  );
+
+  server.registerTool(
+    "browser_perf",
+    {
+      title: "Measure runtime performance (jank)",
+      description:
+        "Capture long animation frames (>50ms main-thread jank) with per-script attribution, plus LCP and CLS. Loads a URL when given; omit the URL to measure the current page without reloading (so a browser_eval interaction's jank is included).",
+      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () =>
+        jsonResult(
+          await withSession(toConnection(args), (session) => session.measurePerformance(args.url)),
+        ),
+      ),
+  );
+
+  server.registerTool(
+    "browser_report",
+    {
+      title: "Capture a full page report",
+      description:
+        "Capture console, network, performance, and accessibility for the attached page (or a URL) in a single load — the efficient path when you want the whole runtime picture at once.",
+      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () =>
+        jsonResult(
+          await withSession(toConnection(args), (session) => session.inspectPage(args.url)),
+        ),
+      ),
+  );
+};
diff --git a/packages/mcp/src/tools/debug.ts b/packages/mcp/src/tools/debug.ts
new file mode 100644
index 000000000..267f1c075
--- /dev/null
+++ b/packages/mcp/src/tools/debug.ts
@@ -0,0 +1,109 @@
+import type { Server } from "node:http";
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { createLogServer, DEFAULT_HOST } from "@react-doctor/debug";
+import { z } from "zod";
+import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
+
+// Log servers started via `debug_serve` must outlive the tool call (the agent
+// instruments the app, then reads them back), so they live for the MCP process
+// and are closed when it exits. A stale lock from a hard kill self-heals: the
+// reuse path pings for liveness and clears a dead lock before binding.
+const liveServers = new Set<Server>();
+let cleanupRegistered = false;
+const trackLogServer = (logServer: Server): void => {
+  liveServers.add(logServer);
+  logServer.on("close", () => liveServers.delete(logServer));
+  if (cleanupRegistered) return;
+  cleanupRegistered = true;
+  process.once("exit", () => {
+    for (const server of liveServers) server.close();
+  });
+};
+
+export const registerDebugTools = (server: McpServer): void => {
+  server.registerTool(
+    "debug_serve",
+    {
+      title: "Start the runtime debug log server",
+      description:
+        "Start the NDJSON logging server for the debug job. Instrument your app to POST runtime logs to the returned endpoint, reproduce the bug, then read them with debug_read_logs. Reuses an already-running server for this project, returning its endpoint.",
+      inputSchema: {
+        sessionId: z
+          .string()
+          .optional()
+          .describe("Session id to write under (default: random hex)"),
+        port: z.number().int().optional().describe("Port to listen on (default: random)"),
+        host: z
+          .string()
+          .optional()
+          .describe(`Host to bind (default ${DEFAULT_HOST}; keep it loopback)`),
+      },
+      annotations: { openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const {
+          server: logServer,
+          info,
+          reused,
+        } = await createLogServer({
+          sessionId: args.sessionId,
+          port: args.port,
+          host: args.host,
+          cwd: process.cwd(),
+        });
+        if (logServer) trackLogServer(logServer);
+        return jsonResult({ ...info, reused });
+      }),
+  );
+
+  server.registerTool(
+    "debug_read_logs",
+    {
+      title: "Read captured runtime logs",
+      description:
+        "Fetch the NDJSON runtime logs the debug log server has captured at the given endpoint (returned by debug_serve).",
+      inputSchema: {
+        endpoint: z
+          .string()
+          .describe("The endpoint from debug_serve, e.g. http://127.0.0.1:PORT/ingest/<sessionId>"),
+      },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const response = await fetch(args.endpoint);
+        if (!response.ok) {
+          return {
+            content: [{ type: "text", text: `Log server returned ${response.status}` }],
+            isError: true,
+          };
+        }
+        const logs = await response.text();
+        return textResult(logs.length > 0 ? logs : "(no logs captured yet)");
+      }),
+  );
+
+  server.registerTool(
+    "debug_clear_logs",
+    {
+      title: "Clear captured runtime logs",
+      description:
+        "Delete the runtime logs captured so far at the given endpoint (returned by debug_serve), so the next reproduction starts from a clean slate.",
+      inputSchema: {
+        endpoint: z.string().describe("The endpoint from debug_serve to clear"),
+      },
+      annotations: { openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () => {
+        const response = await fetch(args.endpoint, { method: "DELETE" });
+        return response.ok
+          ? textResult("Cleared logs")
+          : {
+              content: [{ type: "text", text: `Log server returned ${response.status}` }],
+              isError: true,
+            };
+      }),
+  );
+};
diff --git a/packages/mcp/src/tools/doctor.ts b/packages/mcp/src/tools/doctor.ts
new file mode 100644
index 000000000..9024a3cfd
--- /dev/null
+++ b/packages/mcp/src/tools/doctor.ts
@@ -0,0 +1,68 @@
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { diagnose, type DiagnoseResult } from "@react-doctor/api";
+import { z } from "zod";
+import { MAX_INLINE_DIAGNOSTICS } from "../constants.js";
+import { jsonResult, runTool } from "../utils/tool-result.js";
+
+const summarizeScan = (result: DiagnoseResult) => {
+  const errorCount = result.diagnostics.filter(
+    (diagnostic) => diagnostic.severity === "error",
+  ).length;
+  const affectedFiles = new Set(result.diagnostics.map((diagnostic) => diagnostic.filePath));
+  return {
+    score: result.score?.score ?? null,
+    scoreLabel: result.score?.label ?? null,
+    totalDiagnostics: result.diagnostics.length,
+    errorCount,
+    warningCount: result.diagnostics.length - errorCount,
+    affectedFileCount: affectedFiles.size,
+    skippedChecks: result.skippedChecks,
+    truncated: result.diagnostics.length > MAX_INLINE_DIAGNOSTICS,
+    diagnostics: result.diagnostics.slice(0, MAX_INLINE_DIAGNOSTICS).map((diagnostic) => ({
+      rule: `${diagnostic.plugin}/${diagnostic.rule}`,
+      severity: diagnostic.severity,
+      category: diagnostic.category,
+      title: diagnostic.title ?? null,
+      message: diagnostic.message,
+      file: diagnostic.filePath,
+      line: diagnostic.line,
+      column: diagnostic.column,
+    })),
+  };
+};
+
+export const registerDoctorTools = (server: McpServer): void => {
+  server.registerTool(
+    "doctor_scan",
+    {
+      title: "Scan React project health",
+      description:
+        "Run React Doctor's static analysis on a project directory and return a 0–100 health score plus diagnostics across lint, accessibility, performance, security, and architecture. Use after writing or before committing React / React Native code, or to triage a codebase.",
+      inputSchema: {
+        directory: z
+          .string()
+          .optional()
+          .describe("Project directory to scan (default: the server's working directory)"),
+        deadCode: z
+          .boolean()
+          .optional()
+          .describe(
+            "Run dead-code analysis — unused files/exports/dependencies, circular imports (default true)",
+          ),
+        warnings: z
+          .boolean()
+          .optional()
+          .describe("Include warning-severity diagnostics (default true)"),
+      },
+      annotations: { readOnlyHint: true, openWorldHint: false },
+    },
+    (args) =>
+      runTool(async () => {
+        const result = await diagnose(args.directory ?? process.cwd(), {
+          deadCode: args.deadCode,
+          warnings: args.warnings,
+        });
+        return jsonResult(summarizeScan(result));
+      }),
+  );
+};
diff --git a/packages/mcp/src/utils/load-browser.ts b/packages/mcp/src/utils/load-browser.ts
new file mode 100644
index 000000000..4aad547ad
--- /dev/null
+++ b/packages/mcp/src/utils/load-browser.ts
@@ -0,0 +1,20 @@
+import type * as BrowserModule from "@react-doctor/browser";
+
+const isModuleNotFoundError = (error: unknown): boolean =>
+  error instanceof Error &&
+  "code" in error &&
+  (error.code === "ERR_MODULE_NOT_FOUND" || error.code === "MODULE_NOT_FOUND");
+
+// playwright-core is heavy and optional, so the browser package is loaded on
+// demand: `doctor_scan` works without it, and the browser tools turn a missing
+// install into an actionable message instead of a stack trace.
+export const loadBrowser = async (): Promise<typeof BrowserModule> => {
+  try {
+    return await import("@react-doctor/browser");
+  } catch (error: unknown) {
+    if (!isModuleNotFoundError(error)) throw error;
+    throw new Error(
+      "The browser tools need playwright-core, which isn't installed. Install it with `npm i -D playwright-core`, then retry.",
+    );
+  }
+};
diff --git a/packages/mcp/src/utils/parse-viewport.ts b/packages/mcp/src/utils/parse-viewport.ts
new file mode 100644
index 000000000..1c7950494
--- /dev/null
+++ b/packages/mcp/src/utils/parse-viewport.ts
@@ -0,0 +1,17 @@
+import type { Viewport } from "@react-doctor/browser";
+import { MAX_VIEWPORT_PX } from "../constants.js";
+
+// Parse a `WIDTHxHEIGHT` string into a viewport, throwing a readable message
+// (surfaced as a tool error by `runTool`) on a malformed or out-of-range value.
+export const parseViewport = (value: string): Viewport => {
+  const match = /^(\d+)x(\d+)$/i.exec(value.trim());
+  const width = match ? Number(match[1]) : 0;
+  const height = match ? Number(match[2]) : 0;
+  if (!match || width === 0 || height === 0) {
+    throw new Error(`Use WIDTHxHEIGHT in pixels, e.g. 390x844 (got "${value}").`);
+  }
+  if (width > MAX_VIEWPORT_PX || height > MAX_VIEWPORT_PX) {
+    throw new Error(`Viewport dimensions must be at most ${MAX_VIEWPORT_PX}px.`);
+  }
+  return { width, height };
+};
diff --git a/packages/mcp/src/utils/tool-result.ts b/packages/mcp/src/utils/tool-result.ts
new file mode 100644
index 000000000..9bf4e2d4d
--- /dev/null
+++ b/packages/mcp/src/utils/tool-result.ts
@@ -0,0 +1,21 @@
+import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
+
+export const textResult = (text: string): CallToolResult => ({
+  content: [{ type: "text", text }],
+});
+
+export const jsonResult = (value: unknown): CallToolResult =>
+  textResult(JSON.stringify(value, null, 2));
+
+// MCP convention: a tool reports a failure as a result with `isError: true` so
+// the model sees the message and can react, rather than throwing — which would
+// abort the protocol turn. Wrap every handler so a missing browser, an
+// unreachable Chrome, or a failed scan comes back as readable tool output.
+export const runTool = async (run: () => Promise<CallToolResult>): Promise<CallToolResult> => {
+  try {
+    return await run();
+  } catch (error: unknown) {
+    const message = error instanceof Error ? error.message : String(error);
+    return { content: [{ type: "text", text: message }], isError: true };
+  }
+};
diff --git a/packages/mcp/src/utils/with-session.ts b/packages/mcp/src/utils/with-session.ts
new file mode 100644
index 000000000..0933d9a96
--- /dev/null
+++ b/packages/mcp/src/utils/with-session.ts
@@ -0,0 +1,28 @@
+import type { BrowserSession, Viewport } from "@react-doctor/browser";
+import { loadBrowser } from "./load-browser.js";
+
+export interface BrowserToolConnection {
+  cdp?: string;
+  noLaunch?: boolean;
+  viewport?: Viewport;
+}
+
+// Attach a fresh session per tool call, act, then disconnect. The page lives in
+// the browser (attached over CDP), so each call is cheap and the page persists
+// across calls — the same persistent model the CLI's `browser` commands use.
+export const withSession = async <ResultType>(
+  connection: BrowserToolConnection,
+  useSession: (session: BrowserSession) => Promise<ResultType>,
+): Promise<ResultType> => {
+  const { BrowserSession: Session } = await loadBrowser();
+  const session = await Session.attach({
+    cdpEndpoint: connection.cdp,
+    launch: connection.noLaunch === true ? false : undefined,
+  });
+  try {
+    if (connection.viewport) await session.setViewport(connection.viewport);
+    return await useSession(session);
+  } finally {
+    await session.dispose();
+  }
+};
diff --git a/packages/mcp/tests/server.test.ts b/packages/mcp/tests/server.test.ts
new file mode 100644
index 000000000..e35a38dc2
--- /dev/null
+++ b/packages/mcp/tests/server.test.ts
@@ -0,0 +1,53 @@
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
+import { expect, test } from "vite-plus/test";
+import { createMcpServer } from "../src/server.js";
+
+const listToolNames = async (): Promise<string[]> => {
+  const server = createMcpServer({ version: "0.0.0-test" });
+  const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
+  const client = new Client({ name: "test", version: "0.0.0" });
+  await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]);
+  try {
+    const { tools } = await client.listTools();
+    return tools.map((tool) => tool.name).sort();
+  } finally {
+    await client.close();
+    await server.close();
+  }
+};
+
+test("registers the doctor, browser, and debug tools", async () => {
+  expect(await listToolNames()).toEqual([
+    "browser_audit",
+    "browser_console",
+    "browser_eval",
+    "browser_network",
+    "browser_open",
+    "browser_perf",
+    "browser_report",
+    "browser_screenshot",
+    "browser_snapshot",
+    "debug_clear_logs",
+    "debug_read_logs",
+    "debug_serve",
+    "doctor_scan",
+  ]);
+});
+
+test("each tool exposes a description and input schema", async () => {
+  const server = createMcpServer({ version: "0.0.0-test" });
+  const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
+  const client = new Client({ name: "test", version: "0.0.0" });
+  await Promise.all([server.connect(serverTransport), client.connect(clientTransport)]);
+  try {
+    const { tools } = await client.listTools();
+    for (const tool of tools) {
+      expect(tool.description, `${tool.name} description`).toBeTruthy();
+      expect(tool.inputSchema, `${tool.name} inputSchema`).toBeTruthy();
+    }
+  } finally {
+    await client.close();
+    await server.close();
+  }
+});
diff --git a/packages/mcp/tsconfig.json b/packages/mcp/tsconfig.json
new file mode 100644
index 000000000..9ef507c84
--- /dev/null
+++ b/packages/mcp/tsconfig.json
@@ -0,0 +1,8 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "types": ["node"]
+  },
+  "include": ["src", "tests"]
+}
diff --git a/packages/mcp/vite.config.ts b/packages/mcp/vite.config.ts
new file mode 100644
index 000000000..3eaf4fb15
--- /dev/null
+++ b/packages/mcp/vite.config.ts
@@ -0,0 +1,18 @@
+import { defineConfig } from "vite-plus";
+
+export default defineConfig({
+  pack: [
+    {
+      entry: {
+        index: "./src/index.ts",
+      },
+      dts: true,
+      target: "es2022",
+      platform: "node",
+      fixedExtension: false,
+    },
+  ],
+  test: {
+    testTimeout: 10_000,
+  },
+});
diff --git a/packages/react-doctor/bin/react-doctor.js b/packages/react-doctor/bin/react-doctor.js
index 24835733b..783c8bc81 100755
--- a/packages/react-doctor/bin/react-doctor.js
+++ b/packages/react-doctor/bin/react-doctor.js
@@ -16,6 +16,11 @@ if (module.enableCompileCache && !process.env.NODE_DISABLE_COMPILE_CACHE) {
 if (process.argv[2] === "experimental-lsp") {
   const { startLanguageServer } = await import("../dist/lsp.js");
   startLanguageServer();
+} else if (process.argv[2] === "mcp") {
+  // Same fast-path for the MCP server: its stdio transport owns stdin/stdout,
+  // so the CLI's commander / prompts / ora layer must never load first.
+  const { startReactDoctorMcp } = await import("../dist/mcp.js");
+  await startReactDoctorMcp();
 } else {
   await import("../dist/cli.js");
 }
diff --git a/packages/react-doctor/package.json b/packages/react-doctor/package.json
index c90ffa711..c2fb9a0c8 100644
--- a/packages/react-doctor/package.json
+++ b/packages/react-doctor/package.json
@@ -78,6 +78,7 @@
     "@react-doctor/core": "workspace:*",
     "@react-doctor/debug": "workspace:*",
     "@react-doctor/language-server": "workspace:*",
+    "@react-doctor/mcp": "workspace:*",
     "@types/babel__code-frame": "^7.27.0",
     "@types/prompts": "^2.4.9",
     "@xterm/headless": "^6.0.0",
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index ef365ed65..4e536a248 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -431,6 +431,17 @@ program
   .allowUnknownOption()
   .action(() => {});
 
+// NOTE: like `experimental-lsp`, `react-doctor mcp` is fast-pathed by the bin
+// shim (bin/react-doctor.js) to a dedicated stdio entry, so the CLI layer
+// (commander / prompts / ora) never touches process.stdin before the MCP
+// transport attaches. Registered here only so `--help` lists it; its body
+// never runs in practice.
+program
+  .command("mcp")
+  .description("Run the React Doctor MCP server over stdio (doctor scan + browser jobs as tools)")
+  .allowUnknownOption()
+  .action(() => {});
+
 // HACK: when stdout is piped into a process that closes early (e.g.
 // `react-doctor . | head`), Node throws an uncaught EPIPE on the next
 // write. Exit cleanly instead of dumping a stack trace.
diff --git a/packages/react-doctor/src/mcp.ts b/packages/react-doctor/src/mcp.ts
new file mode 100644
index 000000000..0bfc44d9b
--- /dev/null
+++ b/packages/react-doctor/src/mcp.ts
@@ -0,0 +1,18 @@
+/**
+ * Dedicated entry for `react-doctor mcp`. The bin shim fast-paths to this
+ * module so the MCP server runs without loading the CLI (commander / prompts /
+ * ora), which would otherwise touch `process.stdin` before the stdio transport
+ * attaches and break the JSON-RPC stream. Nothing on this path may write to
+ * stdout — the transport owns it.
+ */
+import { startMcpServer } from "@react-doctor/mcp";
+import { METRIC } from "./cli/utils/constants.js";
+import { recordCount } from "./cli/utils/record-metric.js";
+import { VERSION } from "./cli/utils/version.js";
+import { initializeSentry } from "./instrument.js";
+
+export const startReactDoctorMcp = (): Promise<void> => {
+  initializeSentry();
+  recordCount(METRIC.cliInvoked, 1, { command: "mcp" });
+  return startMcpServer({ version: VERSION });
+};
diff --git a/packages/react-doctor/vite.config.ts b/packages/react-doctor/vite.config.ts
index b00f1cdc9..1488cc7e3 100644
--- a/packages/react-doctor/vite.config.ts
+++ b/packages/react-doctor/vite.config.ts
@@ -192,6 +192,30 @@ export default defineConfig({
       platform: "node",
       fixedExtension: false,
     },
+    {
+      // Dedicated MCP-server entry the bin shim fast-paths to for
+      // `react-doctor mcp`. Inlines @react-doctor/mcp + its api/core/browser/
+      // debug deps and the MCP SDK; keeps the heavy/native engines external
+      // (same set as the CLI pack) so playwright-core stays an optional install
+      // and oxlint/oxc/deslop resolve their native bindings at runtime.
+      entry: { mcp: "./src/mcp.ts" },
+      deps: {
+        neverBundle: [
+          "@sentry/node",
+          "playwright-core",
+          "deslop-js",
+          "oxc-parser",
+          "oxc-resolver",
+          "oxlint",
+          "oxlint-plugin-react-doctor",
+          "typescript",
+        ],
+      },
+      dts: false,
+      target: "node20",
+      platform: "node",
+      fixedExtension: false,
+    },
     {
       // Dedicated language-server entry the bin shim fast-paths to for
       // `react-doctor experimental-lsp`. Inlines @react-doctor/language-server + core;
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 6b0c0bd1d..3f302b27a 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -206,6 +206,31 @@ importers:
         specifier: ^25.6.0
         version: 25.6.0
 
+  packages/mcp:
+    dependencies:
+      '@modelcontextprotocol/sdk':
+        specifier: ^1.29.0
+        version: 1.29.0(zod@4.4.3)
+      '@react-doctor/api':
+        specifier: workspace:*
+        version: link:../api
+      '@react-doctor/browser':
+        specifier: workspace:*
+        version: link:../browser
+      '@react-doctor/core':
+        specifier: workspace:*
+        version: link:../core
+      '@react-doctor/debug':
+        specifier: workspace:*
+        version: link:../debug
+      zod:
+        specifier: ^4.4.3
+        version: 4.4.3
+    devDependencies:
+      '@types/node':
+        specifier: ^25.6.0
+        version: 25.6.0
+
   packages/oxlint-plugin-react-doctor:
     dependencies:
       '@typescript-eslint/types':
@@ -291,6 +316,9 @@ importers:
       '@react-doctor/language-server':
         specifier: workspace:*
         version: link:../language-server
+      '@react-doctor/mcp':
+        specifier: workspace:*
+        version: link:../mcp
       '@types/babel__code-frame':
         specifier: ^7.27.0
         version: 7.27.0
@@ -1058,6 +1086,12 @@ packages:
     resolution: {integrity: sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==}
     engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
 
+  '@hono/node-server@1.19.14':
+    resolution: {integrity: sha512-GwtvgtXxnWsucXvbQXkRgqksiH2Qed37H9xHZocE5sA3N8O8O8/8FA3uclQXxXVzc9XBZuEOMK7+r02FmSpHtw==}
+    engines: {node: '>=18.14.1'}
+    peerDependencies:
+      hono: ^4
+
   '@humanfs/core@0.19.1':
     resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==}
     engines: {node: '>=18.18.0'}
@@ -1264,6 +1298,16 @@ packages:
   '@manypkg/get-packages@1.1.3':
     resolution: {integrity: sha512-fo+QhuU3qE/2TQMQmbVMqaQ6EWbMhi4ABWP+O4AM1NqPBuy0OrApV5LO6BrrgnhtAHS2NH6RrVk9OL181tTi8A==}
 
+  '@modelcontextprotocol/sdk@1.29.0':
+    resolution: {integrity: sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      '@cfworker/json-schema': ^4.1.1
+      zod: ^3.25 || ^4.0
+    peerDependenciesMeta:
+      '@cfworker/json-schema':
+        optional: true
+
   '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3':
     resolution: {integrity: sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw==}
     cpu: [arm64]
@@ -2683,6 +2727,10 @@ packages:
   '@xterm/headless@6.0.0':
     resolution: {integrity: sha512-5Yj1QINYCyzrZtf8OFIHi47iQtI+0qYFPHmouEfG8dHNxbZ9Tb9YGSuLcsEwj9Z+OL75GJqPyJbyoFer80a2Hw==}
 
+  accepts@2.0.0:
+    resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==}
+    engines: {node: '>= 0.6'}
+
   acorn-import-attributes@1.9.5:
     resolution: {integrity: sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==}
     peerDependencies:
@@ -2774,6 +2822,10 @@ packages:
     resolution: {integrity: sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g==}
     engines: {node: '>=4'}
 
+  body-parser@2.2.2:
+    resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
+    engines: {node: '>=18'}
+
   brace-expansion@1.1.13:
     resolution: {integrity: sha512-9ZLprWS6EENmhEOpjCYW2c8VkmOvckIJZfkr7rBW6dObmfgJ/L1GpSYW5Hpo9lDz4D1+n0Ckz8rU7FwHDQiG/w==}
 
@@ -2796,6 +2848,18 @@ packages:
   buffer-from@1.1.2:
     resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
 
+  bytes@3.1.2:
+    resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
+    engines: {node: '>= 0.8'}
+
+  call-bind-apply-helpers@1.0.2:
+    resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==}
+    engines: {node: '>= 0.4'}
+
+  call-bound@1.0.4:
+    resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==}
+    engines: {node: '>= 0.4'}
+
   callsites@3.1.0:
     resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==}
     engines: {node: '>=6'}
@@ -2856,9 +2920,33 @@ packages:
   confbox@0.2.4:
     resolution: {integrity: sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ==}
 
+  content-disposition@1.1.0:
+    resolution: {integrity: sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==}
+    engines: {node: '>=18'}
+
+  content-type@1.0.5:
+    resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
+    engines: {node: '>= 0.6'}
+
+  content-type@2.0.0:
+    resolution: {integrity: sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ==}
+    engines: {node: '>=18'}
+
   convert-source-map@2.0.0:
     resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==}
 
+  cookie-signature@1.2.2:
+    resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==}
+    engines: {node: '>=6.6.0'}
+
+  cookie@0.7.2:
+    resolution: {integrity: sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==}
+    engines: {node: '>= 0.6'}
+
+  cors@2.8.6:
+    resolution: {integrity: sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==}
+    engines: {node: '>= 0.10'}
+
   cross-env@10.1.0:
     resolution: {integrity: sha512-GsYosgnACZTADcmEyJctkJIoqAhHjttw7RsFrVoJNXbsWWqaq6Ym+7kZjq6mS45O0jij6vtiReppKQEtqWy6Dw==}
     engines: {node: '>=20'}
@@ -2890,6 +2978,10 @@ packages:
   deep-is@0.1.4:
     resolution: {integrity: sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==}
 
+  depd@2.0.0:
+    resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
+    engines: {node: '>= 0.8'}
+
   detect-indent@6.1.0:
     resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==}
     engines: {node: '>=8'}
@@ -2910,12 +3002,23 @@ packages:
     resolution: {integrity: sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g==}
     engines: {node: '>=10'}
 
+  dunder-proto@1.0.1:
+    resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
+    engines: {node: '>= 0.4'}
+
+  ee-first@1.1.1:
+    resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
+
   effect@4.0.0-beta.70:
     resolution: {integrity: sha512-8AwGTRiNriirHGEYHrOS0E9fzdhIqCdZjiHP1YXmNo2UyPGS43ILsymsSHT7V0DJS+8dvlKq2RxnrDBUhDNZHg==}
 
   electron-to-chromium@1.5.286:
     resolution: {integrity: sha512-9tfDXhJ4RKFNerfjdCcZfufu49vg620741MNs26a9+bhLThdB+plgMeou98CAaHu/WATj2iHOOHTp1hWtABj2A==}
 
+  encodeurl@2.0.0:
+    resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
+    engines: {node: '>= 0.8'}
+
   enhanced-resolve@5.19.0:
     resolution: {integrity: sha512-phv3E1Xl4tQOShqSte26C7Fl84EwUdZsyOuSSk9qtAGyyQs2s3jJzComh+Abf4g187lUUAvH+H26omrqia2aGg==}
     engines: {node: '>=10.13.0'}
@@ -2928,12 +3031,24 @@ packages:
     resolution: {integrity: sha512-dtJUTepzMW3Lm/NPxRf3wP4642UWhjL2sQxc+ym2YMj1m/H2zDNQOlezafzkHwn6sMstjHTwG6iQQsctDW/b1A==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
 
+  es-define-property@1.0.1:
+    resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
+    engines: {node: '>= 0.4'}
+
+  es-errors@1.3.0:
+    resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
+    engines: {node: '>= 0.4'}
+
   es-module-lexer@1.7.0:
     resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==}
 
   es-module-lexer@2.1.0:
     resolution: {integrity: sha512-n27zTYMjYu1aj4MjCWzSP7G9r75utsaoc8m61weK+W8JMBGGQybd43GstCXZ3WNmSFtGT9wi59qQTW6mhTR5LQ==}
 
+  es-object-atoms@1.1.2:
+    resolution: {integrity: sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==}
+    engines: {node: '>= 0.4'}
+
   esbuild@0.25.12:
     resolution: {integrity: sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==}
     engines: {node: '>=18'}
@@ -2953,6 +3068,9 @@ packages:
     resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==}
     engines: {node: '>=6'}
 
+  escape-html@1.0.3:
+    resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
+
   escape-string-regexp@4.0.0:
     resolution: {integrity: sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==}
     engines: {node: '>=10'}
@@ -3021,10 +3139,32 @@ packages:
     resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==}
     engines: {node: '>=0.10.0'}
 
+  etag@1.8.1:
+    resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
+    engines: {node: '>= 0.6'}
+
+  eventsource-parser@3.1.0:
+    resolution: {integrity: sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==}
+    engines: {node: '>=18.0.0'}
+
+  eventsource@3.0.7:
+    resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==}
+    engines: {node: '>=18.0.0'}
+
   expect-type@1.3.0:
     resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==}
     engines: {node: '>=12.0.0'}
 
+  express-rate-limit@8.5.2:
+    resolution: {integrity: sha512-5Kb34ipNX694DH48vN9irak1Qx30nb0PLYHXfJgw4YEjiC3ZEmZJhwOp+VfiCYwFzvFTdB9QkArYS5kXa2cx2A==}
+    engines: {node: '>= 16'}
+    peerDependencies:
+      express: '>= 4.11'
+
+  express@5.2.1:
+    resolution: {integrity: sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==}
+    engines: {node: '>= 18'}
+
   extendable-error@0.1.7:
     resolution: {integrity: sha512-UOiS2in6/Q0FK0R0q6UY9vYpQ21mr/Qn1KOnte7vsACuNJf514WvCCUHSRCPcgjPT2bAhNIJdlE6bVap1GKmeg==}
 
@@ -3068,6 +3208,10 @@ packages:
     resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
     engines: {node: '>=8'}
 
+  finalhandler@2.1.1:
+    resolution: {integrity: sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==}
+    engines: {node: '>= 18.0.0'}
+
   find-my-way-ts@0.1.6:
     resolution: {integrity: sha512-a85L9ZoXtNAey3Y6Z+eBWW658kO/MwR7zIafkIUPUMf3isZG0NCs2pjW2wtjxAKuJPxMAsHUIP4ZPGv0o5gyTA==}
 
@@ -3086,6 +3230,14 @@ packages:
   flatted@3.4.2:
     resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==}
 
+  forwarded@0.2.0:
+    resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
+    engines: {node: '>= 0.6'}
+
+  fresh@2.0.0:
+    resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==}
+    engines: {node: '>= 0.8'}
+
   fs-extra@7.0.1:
     resolution: {integrity: sha512-YJDaCJZEnBmcbw13fvdAM9AwNOJwOzrE4pqMqBq5nFiEqXUqHwlK4B+3pUw6JNvfSPtX05xFHtYy/1ni01eGCw==}
     engines: {node: '>=6 <7 || >=8'}
@@ -3099,6 +3251,9 @@ packages:
     engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
     os: [darwin]
 
+  function-bind@1.1.2:
+    resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
+
   gensync@1.0.0-beta.2:
     resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==}
     engines: {node: '>=6.9.0'}
@@ -3107,6 +3262,14 @@ packages:
     resolution: {integrity: sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q==}
     engines: {node: '>=18'}
 
+  get-intrinsic@1.3.0:
+    resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==}
+    engines: {node: '>= 0.4'}
+
+  get-proto@1.0.1:
+    resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==}
+    engines: {node: '>= 0.4'}
+
   glob-parent@5.1.2:
     resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
     engines: {node: '>= 6'}
@@ -3127,6 +3290,10 @@ packages:
     resolution: {integrity: sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==}
     engines: {node: '>=10'}
 
+  gopd@1.2.0:
+    resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
+    engines: {node: '>= 0.4'}
+
   graceful-fs@4.2.11:
     resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
 
@@ -3134,12 +3301,28 @@ packages:
     resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
     engines: {node: '>=8'}
 
+  has-symbols@1.1.0:
+    resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
+    engines: {node: '>= 0.4'}
+
+  hasown@2.0.4:
+    resolution: {integrity: sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A==}
+    engines: {node: '>= 0.4'}
+
   hermes-estree@0.25.1:
     resolution: {integrity: sha512-0wUoCcLp+5Ev5pDW2OriHC2MJCbwLwuRx+gAqMTOkGKJJiBCLjtrvy4PWUGn6MIVefecRpzoOZ/UV6iGdOr+Cw==}
 
   hermes-parser@0.25.1:
     resolution: {integrity: sha512-6pEjquH3rqaI6cYAXYPcz9MS4rY6R4ngRgrgfDshRptUZIc3lw0MCIJIGDj9++mfySOuPTHB4nrSW99BCvOPIA==}
 
+  hono@4.12.25:
+    resolution: {integrity: sha512-2NFaIyNVgJmBs/ecmtGzlmluTFs5cHEWGTdu0t1HBwYzoGXOL5nUQBRMXsXWla5i4KkG//QMzVP88m1+I3fdAQ==}
+    engines: {node: '>=16.9.0'}
+
+  http-errors@2.0.1:
+    resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
+    engines: {node: '>= 0.8'}
+
   human-id@4.1.3:
     resolution: {integrity: sha512-tsYlhAYpjCKa//8rXZ9DqKEawhPoSytweBC2eNvcaDK+57RZLHGqNs3PZTQO6yekLFSuvA6AlnAfrw1uBvtb+Q==}
     hasBin: true
@@ -3164,10 +3347,21 @@ packages:
     resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==}
     engines: {node: '>=0.8.19'}
 
+  inherits@2.0.4:
+    resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
+
   ini@7.0.0:
     resolution: {integrity: sha512-ifK0CgjALofS5bkrcTy4RaQ9Vx2Knf/eLeIO+NaswQEpH1UblrtTSCIvN71qQDMq0PeQ/SSPojvEJp9vvvfr+w==}
     engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0}
 
+  ip-address@10.2.0:
+    resolution: {integrity: sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==}
+    engines: {node: '>= 12'}
+
+  ipaddr.js@1.9.1:
+    resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
+    engines: {node: '>= 0.10'}
+
   is-extglob@2.1.1:
     resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
     engines: {node: '>=0.10.0'}
@@ -3184,6 +3378,9 @@ packages:
     resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
     engines: {node: '>=0.12.0'}
 
+  is-promise@4.0.0:
+    resolution: {integrity: sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==}
+
   is-subdir@1.2.0:
     resolution: {integrity: sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw==}
     engines: {node: '>=4'}
@@ -3203,6 +3400,9 @@ packages:
     resolution: {integrity: sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ==}
     hasBin: true
 
+  jose@6.2.3:
+    resolution: {integrity: sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==}
+
   js-tokens@4.0.0:
     resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}
 
@@ -3369,6 +3569,18 @@ packages:
   magicast@0.5.3:
     resolution: {integrity: sha512-pVKE4UdSQ7DvHzivsCIFx2BJn1mHG6KsyrFcaxFx6tONdneEuThrDx0Cj3AMg58KyN4pzYT+LHOotxDQDjNvkw==}
 
+  math-intrinsics@1.1.0:
+    resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
+    engines: {node: '>= 0.4'}
+
+  media-typer@1.1.0:
+    resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==}
+    engines: {node: '>= 0.8'}
+
+  merge-descriptors@2.0.0:
+    resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==}
+    engines: {node: '>=18'}
+
   merge2@1.4.1:
     resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
     engines: {node: '>= 8'}
@@ -3377,6 +3589,14 @@ packages:
     resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
     engines: {node: '>=8.6'}
 
+  mime-db@1.54.0:
+    resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==}
+    engines: {node: '>= 0.6'}
+
+  mime-types@3.0.2:
+    resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==}
+    engines: {node: '>=18'}
+
   mimic-function@5.0.1:
     resolution: {integrity: sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA==}
     engines: {node: '>=18'}
@@ -3428,6 +3648,10 @@ packages:
   natural-compare@1.4.0:
     resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==}
 
+  negotiator@1.0.0:
+    resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==}
+    engines: {node: '>= 0.6'}
+
   next@16.2.4:
     resolution: {integrity: sha512-kPvz56wF5frc+FxlHI5qnklCzbq53HTwORaWBGdT0vNoKh1Aya9XC8aPauH4NJxqtzbWsS5mAbctm4cr+EkQ2Q==}
     engines: {node: '>=20.9.0'}
@@ -3469,9 +3693,24 @@ packages:
     resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
     engines: {node: '>=0.10.0'}
 
+  object-assign@4.1.1:
+    resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
+    engines: {node: '>=0.10.0'}
+
+  object-inspect@1.13.4:
+    resolution: {integrity: sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==}
+    engines: {node: '>= 0.4'}
+
   obug@2.1.1:
     resolution: {integrity: sha512-uTqF9MuPraAQ+IsnPf366RG4cP9RtUi7MLO1N3KEc+wb0a6yKpeL0lmk2IB1jY5KHPAlTc6T/JRdC/YqxHNwkQ==}
 
+  on-finished@2.4.1:
+    resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
+    engines: {node: '>= 0.8'}
+
+  once@1.4.0:
+    resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
+
   onetime@7.0.0:
     resolution: {integrity: sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ==}
     engines: {node: '>=18'}
@@ -3552,6 +3791,10 @@ packages:
     resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
     engines: {node: '>=6'}
 
+  parseurl@1.3.3:
+    resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
+    engines: {node: '>= 0.8'}
+
   path-exists@4.0.0:
     resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==}
     engines: {node: '>=8'}
@@ -3564,6 +3807,9 @@ packages:
     resolution: {integrity: sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==}
     engines: {node: 18 || 20 || >=22}
 
+  path-to-regexp@8.4.2:
+    resolution: {integrity: sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==}
+
   path-type@4.0.0:
     resolution: {integrity: sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==}
     engines: {node: '>=8'}
@@ -3590,6 +3836,10 @@ packages:
     resolution: {integrity: sha512-xhcb4yHu9sM/G7foGzoLtXYcC0zHEaOXXjRKhGup0fw78Nf2Tkiapv4EQyMzrbcmQPsllAI7DbFY2UT7PlI9Pg==}
     hasBin: true
 
+  pkce-challenge@5.0.1:
+    resolution: {integrity: sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==}
+    engines: {node: '>=16.20.0'}
+
   playwright-core@1.60.0:
     resolution: {integrity: sha512-9bW6zvX/m0lEbgTKJ6YppOKx8H3VOPBMOCFh2irXFOT4BbHgrx5hPjwJYLT40Lu+4qtD36qKc/Hn56StUW57IA==}
     engines: {node: '>=18'}
@@ -3624,6 +3874,10 @@ packages:
     resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==}
     engines: {node: '>= 6'}
 
+  proxy-addr@2.0.7:
+    resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
+    engines: {node: '>= 0.10'}
+
   proxy-from-env@1.1.0:
     resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==}
 
@@ -3634,12 +3888,24 @@ packages:
   pure-rand@8.4.0:
     resolution: {integrity: sha512-IoM8YF/jY0hiugFo/wOWqfmarlE6J0wc6fDK1PhftMk7MGhVZl88sZimmqBBFomLOCSmcCCpsfj7wXASCpvK9A==}
 
+  qs@6.15.2:
+    resolution: {integrity: sha512-Rzq0KEyX/w/tEybncDgdkZrJgVUsUMk3xjh3t5bv3S1HTAtg+uOYt72+ZfwiQwKdysThkTBdL/rTi6HDmX9Ddw==}
+    engines: {node: '>=0.6'}
+
   quansync@0.2.11:
     resolution: {integrity: sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA==}
 
   queue-microtask@1.2.3:
     resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
 
+  range-parser@1.2.1:
+    resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
+    engines: {node: '>= 0.6'}
+
+  raw-body@3.0.2:
+    resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==}
+    engines: {node: '>= 0.10'}
+
   react-devtools-inline@6.1.5:
     resolution: {integrity: sha512-8FbBqZrOk4k4uWgkDNj7CVb975oKOMuPYZMQi4UHVW1RhbnEFOVZ7cdKvv6tbzbhy2D1aFwfj1T58atSoedEKQ==}
 
@@ -3685,6 +3951,10 @@ packages:
     engines: {node: '>=18.0.0', npm: '>=8.0.0'}
     hasBin: true
 
+  router@2.2.0:
+    resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==}
+    engines: {node: '>= 18'}
+
   run-parallel@1.2.0:
     resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==}
 
@@ -3711,6 +3981,17 @@ packages:
     engines: {node: '>=10'}
     hasBin: true
 
+  send@1.2.1:
+    resolution: {integrity: sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==}
+    engines: {node: '>= 18'}
+
+  serve-static@2.2.1:
+    resolution: {integrity: sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==}
+    engines: {node: '>= 18'}
+
+  setprototypeof@1.2.0:
+    resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
+
   sharp@0.34.5:
     resolution: {integrity: sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==}
     engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
@@ -3723,6 +4004,22 @@ packages:
     resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==}
     engines: {node: '>=8'}
 
+  side-channel-list@1.0.1:
+    resolution: {integrity: sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==}
+    engines: {node: '>= 0.4'}
+
+  side-channel-map@1.0.1:
+    resolution: {integrity: sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==}
+    engines: {node: '>= 0.4'}
+
+  side-channel-weakmap@1.0.2:
+    resolution: {integrity: sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==}
+    engines: {node: '>= 0.4'}
+
+  side-channel@1.1.1:
+    resolution: {integrity: sha512-6x6dK6zJdpTzF4sQeNYxwtvBzf6Eg4GtlesS94HOvTudUeyK2WXAaIfmDgsyslYrRBeFIlsi54AYsFGUuhmvrQ==}
+    engines: {node: '>= 0.4'}
+
   siginfo@2.0.0:
     resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==}
 
@@ -3769,6 +4066,10 @@ packages:
   stackback@0.0.2:
     resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
 
+  statuses@2.0.2:
+    resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==}
+    engines: {node: '>= 0.8'}
+
   std-env@4.0.0:
     resolution: {integrity: sha512-zUMPtQ/HBY3/50VbpkupYHbRroTRZJPRLvreamgErJVys0ceuzMkD44J/QjqhHjOzK42GQ3QZIeFG1OYfOtKqQ==}
 
@@ -3866,6 +4167,10 @@ packages:
     resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
     engines: {node: '>=8.0'}
 
+  toidentifier@1.0.1:
+    resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
+    engines: {node: '>=0.6'}
+
   toml@4.1.1:
     resolution: {integrity: sha512-EBJnVBr3dTXdA89WVFoAIPUqkBjxPMwRqsfuo1r240tKFHXv3zgca4+NJib/h6TyvGF7vOawz0jGuryJCdNHrw==}
     engines: {node: '>=20'}
@@ -3902,6 +4207,10 @@ packages:
     resolution: {integrity: sha512-8ZiHFm91orbSAe2PSAiSVBVko18pbhbiB3U9GglSzF/zCGkR+rxpHx6sEMCUm4kxY4LjDIUGgCfUMtwfZfjfUA==}
     engines: {node: '>=20'}
 
+  type-is@2.1.0:
+    resolution: {integrity: sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA==}
+    engines: {node: '>= 18'}
+
   typescript@5.9.3:
     resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==}
     engines: {node: '>=14.17'}
@@ -3927,6 +4236,10 @@ packages:
     resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==}
     engines: {node: '>= 4.0.0'}
 
+  unpipe@1.0.0:
+    resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
+    engines: {node: '>= 0.8'}
+
   update-browserslist-db@1.2.3:
     resolution: {integrity: sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==}
     hasBin: true
@@ -3940,6 +4253,10 @@ packages:
     resolution: {integrity: sha512-Qo+uWgilfSmAhXCMav1uYFynlQO7fMFiMVZsQqZRMIXp0O7rR7qjkj+cPvBHLgBqi960QCoo/PH2/6ZtVqKvrg==}
     hasBin: true
 
+  vary@1.1.2:
+    resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
+    engines: {node: '>= 0.8'}
+
   vite-plus@0.1.20:
     resolution: {integrity: sha512-hxJqXTxiiFhszwAeD0MvKlztVuXE4TztTdJ64BPxGqgY67F0PDa5eZkUsrN91Ae8aYUMfweW6V/J57OUO9/0zw==}
     engines: {node: ^20.19.0 || >=22.12.0}
@@ -4073,6 +4390,9 @@ packages:
     resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==}
     engines: {node: '>=0.10.0'}
 
+  wrappy@1.0.2:
+    resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
+
   ws@8.20.0:
     resolution: {integrity: sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==}
     engines: {node: '>=10.0.0'}
@@ -4106,6 +4426,11 @@ packages:
     resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==}
     engines: {node: '>=18'}
 
+  zod-to-json-schema@3.25.2:
+    resolution: {integrity: sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==}
+    peerDependencies:
+      zod: ^3.25.28 || ^4
+
   zod-validation-error@4.0.2:
     resolution: {integrity: sha512-Q6/nZLe6jxuU80qb/4uJ4t5v2VEZ44lzQjPDhYJNztRQ4wyWc6VF3D3Kb/fAuPetZQnhS3hnajCf9CsWesghLQ==}
     engines: {node: '>=18.0.0'}
@@ -4115,6 +4440,9 @@ packages:
   zod@4.3.6:
     resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==}
 
+  zod@4.4.3:
+    resolution: {integrity: sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ==}
+
 snapshots:
 
   '@alloc/quick-lru@5.2.0': {}
@@ -4710,6 +5038,10 @@ snapshots:
       '@eslint/core': 0.17.0
       levn: 0.4.1
 
+  '@hono/node-server@1.19.14(hono@4.12.25)':
+    dependencies:
+      hono: 4.12.25
+
   '@humanfs/core@0.19.1': {}
 
   '@humanfs/node@0.16.7':
@@ -4868,6 +5200,28 @@ snapshots:
       globby: 11.1.0
       read-yaml-file: 1.1.0
 
+  '@modelcontextprotocol/sdk@1.29.0(zod@4.4.3)':
+    dependencies:
+      '@hono/node-server': 1.19.14(hono@4.12.25)
+      ajv: 8.20.0
+      ajv-formats: 3.0.1(ajv@8.20.0)
+      content-type: 1.0.5
+      cors: 2.8.6
+      cross-spawn: 7.0.6
+      eventsource: 3.0.7
+      eventsource-parser: 3.1.0
+      express: 5.2.1
+      express-rate-limit: 8.5.2(express@5.2.1)
+      hono: 4.12.25
+      jose: 6.2.3
+      json-schema-typed: 8.0.2
+      pkce-challenge: 5.0.1
+      raw-body: 3.0.2
+      zod: 4.4.3
+      zod-to-json-schema: 3.25.2(zod@4.4.3)
+    transitivePeerDependencies:
+      - supports-color
+
   '@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3':
     optional: true
 
@@ -5736,6 +6090,11 @@ snapshots:
 
   '@xterm/headless@6.0.0': {}
 
+  accepts@2.0.0:
+    dependencies:
+      mime-types: 3.0.2
+      negotiator: 1.0.0
+
   acorn-import-attributes@1.9.5(acorn@8.16.0):
     dependencies:
       acorn: 8.16.0
@@ -5820,6 +6179,20 @@ snapshots:
     dependencies:
       is-windows: 1.0.2
 
+  body-parser@2.2.2:
+    dependencies:
+      bytes: 3.1.2
+      content-type: 1.0.5
+      debug: 4.4.3
+      http-errors: 2.0.1
+      iconv-lite: 0.7.2
+      on-finished: 2.4.1
+      qs: 6.15.2
+      raw-body: 3.0.2
+      type-is: 2.1.0
+    transitivePeerDependencies:
+      - supports-color
+
   brace-expansion@1.1.13:
     dependencies:
       balanced-match: 1.0.2
@@ -5848,6 +6221,18 @@ snapshots:
   buffer-from@1.1.2:
     optional: true
 
+  bytes@3.1.2: {}
+
+  call-bind-apply-helpers@1.0.2:
+    dependencies:
+      es-errors: 1.3.0
+      function-bind: 1.1.2
+
+  call-bound@1.0.4:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      get-intrinsic: 1.3.0
+
   callsites@3.1.0: {}
 
   caniuse-lite@1.0.30001769: {}
@@ -5900,8 +6285,23 @@ snapshots:
 
   confbox@0.2.4: {}
 
+  content-disposition@1.1.0: {}
+
+  content-type@1.0.5: {}
+
+  content-type@2.0.0: {}
+
   convert-source-map@2.0.0: {}
 
+  cookie-signature@1.2.2: {}
+
+  cookie@0.7.2: {}
+
+  cors@2.8.6:
+    dependencies:
+      object-assign: 4.1.1
+      vary: 1.1.2
+
   cross-env@10.1.0:
     dependencies:
       '@epic-web/invariant': 1.0.0
@@ -5927,6 +6327,8 @@ snapshots:
 
   deep-is@0.1.4: {}
 
+  depd@2.0.0: {}
+
   detect-indent@6.1.0: {}
 
   detect-libc@2.1.2: {}
@@ -5941,6 +6343,14 @@ snapshots:
 
   dotenv@8.6.0: {}
 
+  dunder-proto@1.0.1:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-errors: 1.3.0
+      gopd: 1.2.0
+
+  ee-first@1.1.1: {}
+
   effect@4.0.0-beta.70:
     dependencies:
       '@standard-schema/spec': 1.1.0
@@ -5956,6 +6366,8 @@ snapshots:
 
   electron-to-chromium@1.5.286: {}
 
+  encodeurl@2.0.0: {}
+
   enhanced-resolve@5.19.0:
     dependencies:
       graceful-fs: 4.2.11
@@ -5968,10 +6380,18 @@ snapshots:
 
   env-paths@3.0.0: {}
 
+  es-define-property@1.0.1: {}
+
+  es-errors@1.3.0: {}
+
   es-module-lexer@1.7.0: {}
 
   es-module-lexer@2.1.0: {}
 
+  es-object-atoms@1.1.2:
+    dependencies:
+      es-errors: 1.3.0
+
   esbuild@0.25.12:
     optionalDependencies:
       '@esbuild/aix-ppc64': 0.25.12
@@ -6061,6 +6481,8 @@ snapshots:
 
   escalade@3.2.0: {}
 
+  escape-html@1.0.3: {}
+
   escape-string-regexp@4.0.0: {}
 
   eslint-plugin-react-hooks@7.1.1(eslint@9.39.2(jiti@2.7.0)):
@@ -6157,8 +6579,54 @@ snapshots:
 
   esutils@2.0.3: {}
 
+  etag@1.8.1: {}
+
+  eventsource-parser@3.1.0: {}
+
+  eventsource@3.0.7:
+    dependencies:
+      eventsource-parser: 3.1.0
+
   expect-type@1.3.0: {}
 
+  express-rate-limit@8.5.2(express@5.2.1):
+    dependencies:
+      express: 5.2.1
+      ip-address: 10.2.0
+
+  express@5.2.1:
+    dependencies:
+      accepts: 2.0.0
+      body-parser: 2.2.2
+      content-disposition: 1.1.0
+      content-type: 1.0.5
+      cookie: 0.7.2
+      cookie-signature: 1.2.2
+      debug: 4.4.3
+      depd: 2.0.0
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      finalhandler: 2.1.1
+      fresh: 2.0.0
+      http-errors: 2.0.1
+      merge-descriptors: 2.0.0
+      mime-types: 3.0.2
+      on-finished: 2.4.1
+      once: 1.4.0
+      parseurl: 1.3.3
+      proxy-addr: 2.0.7
+      qs: 6.15.2
+      range-parser: 1.2.1
+      router: 2.2.0
+      send: 1.2.1
+      serve-static: 2.2.1
+      statuses: 2.0.2
+      type-is: 2.1.0
+      vary: 1.1.2
+    transitivePeerDependencies:
+      - supports-color
+
   extendable-error@0.1.7: {}
 
   fast-check@4.8.0:
@@ -6197,6 +6665,17 @@ snapshots:
     dependencies:
       to-regex-range: 5.0.1
 
+  finalhandler@2.1.1:
+    dependencies:
+      debug: 4.4.3
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      on-finished: 2.4.1
+      parseurl: 1.3.3
+      statuses: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+
   find-my-way-ts@0.1.6: {}
 
   find-up@4.1.0:
@@ -6216,6 +6695,10 @@ snapshots:
 
   flatted@3.4.2: {}
 
+  forwarded@0.2.0: {}
+
+  fresh@2.0.0: {}
+
   fs-extra@7.0.1:
     dependencies:
       graceful-fs: 4.2.11
@@ -6231,10 +6714,30 @@ snapshots:
   fsevents@2.3.3:
     optional: true
 
+  function-bind@1.1.2: {}
+
   gensync@1.0.0-beta.2: {}
 
   get-east-asian-width@1.4.0: {}
 
+  get-intrinsic@1.3.0:
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-define-property: 1.0.1
+      es-errors: 1.3.0
+      es-object-atoms: 1.1.2
+      function-bind: 1.1.2
+      get-proto: 1.0.1
+      gopd: 1.2.0
+      has-symbols: 1.1.0
+      hasown: 2.0.4
+      math-intrinsics: 1.1.0
+
+  get-proto@1.0.1:
+    dependencies:
+      dunder-proto: 1.0.1
+      es-object-atoms: 1.1.2
+
   glob-parent@5.1.2:
     dependencies:
       is-glob: 4.0.3
@@ -6260,16 +6763,34 @@ snapshots:
       merge2: 1.4.1
       slash: 3.0.0
 
+  gopd@1.2.0: {}
+
   graceful-fs@4.2.11: {}
 
   has-flag@4.0.0: {}
 
+  has-symbols@1.1.0: {}
+
+  hasown@2.0.4:
+    dependencies:
+      function-bind: 1.1.2
+
   hermes-estree@0.25.1: {}
 
   hermes-parser@0.25.1:
     dependencies:
       hermes-estree: 0.25.1
 
+  hono@4.12.25: {}
+
+  http-errors@2.0.1:
+    dependencies:
+      depd: 2.0.0
+      inherits: 2.0.4
+      setprototypeof: 1.2.0
+      statuses: 2.0.2
+      toidentifier: 1.0.1
+
   human-id@4.1.3: {}
 
   iconv-lite@0.7.2:
@@ -6292,8 +6813,14 @@ snapshots:
 
   imurmurhash@0.1.4: {}
 
+  inherits@2.0.4: {}
+
   ini@7.0.0: {}
 
+  ip-address@10.2.0: {}
+
+  ipaddr.js@1.9.1: {}
+
   is-extglob@2.1.1: {}
 
   is-glob@4.0.3:
@@ -6304,6 +6831,8 @@ snapshots:
 
   is-number@7.0.0: {}
 
+  is-promise@4.0.0: {}
+
   is-subdir@1.2.0:
     dependencies:
       better-path-resolve: 1.0.0
@@ -6316,6 +6845,8 @@ snapshots:
 
   jiti@2.7.0: {}
 
+  jose@6.2.3: {}
+
   js-tokens@4.0.0: {}
 
   js-yaml@3.14.2:
@@ -6446,6 +6977,12 @@ snapshots:
       '@babel/types': 7.29.0
       source-map-js: 1.2.1
 
+  math-intrinsics@1.1.0: {}
+
+  media-typer@1.1.0: {}
+
+  merge-descriptors@2.0.0: {}
+
   merge2@1.4.1: {}
 
   micromatch@4.0.8:
@@ -6453,6 +6990,12 @@ snapshots:
       braces: 3.0.3
       picomatch: 2.3.1
 
+  mime-db@1.54.0: {}
+
+  mime-types@3.0.2:
+    dependencies:
+      mime-db: 1.54.0
+
   mimic-function@5.0.1: {}
 
   minimatch@10.2.5:
@@ -6499,6 +7042,8 @@ snapshots:
 
   natural-compare@1.4.0: {}
 
+  negotiator@1.0.0: {}
+
   next@16.2.4(@opentelemetry/api@1.9.1)(react-dom@19.2.5(react@19.2.5))(react@19.2.5):
     dependencies:
       '@next/env': 16.2.4
@@ -6537,8 +7082,20 @@ snapshots:
 
   normalize-path@3.0.0: {}
 
+  object-assign@4.1.1: {}
+
+  object-inspect@1.13.4: {}
+
   obug@2.1.1: {}
 
+  on-finished@2.4.1:
+    dependencies:
+      ee-first: 1.1.1
+
+  once@1.4.0:
+    dependencies:
+      wrappy: 1.0.2
+
   onetime@7.0.0:
     dependencies:
       mimic-function: 5.0.1
@@ -6729,6 +7286,8 @@ snapshots:
     dependencies:
       callsites: 3.1.0
 
+  parseurl@1.3.3: {}
+
   path-exists@4.0.0: {}
 
   path-key@3.1.1: {}
@@ -6738,6 +7297,8 @@ snapshots:
       lru-cache: 11.5.0
       minipass: 7.1.3
 
+  path-to-regexp@8.4.2: {}
+
   path-type@4.0.0: {}
 
   pathe@2.0.3: {}
@@ -6754,6 +7315,8 @@ snapshots:
     dependencies:
       pngjs: 7.0.0
 
+  pkce-challenge@5.0.1: {}
+
   playwright-core@1.60.0: {}
 
   pngjs@7.0.0: {}
@@ -6781,16 +7344,34 @@ snapshots:
       kleur: 3.0.3
       sisteransi: 1.0.5
 
+  proxy-addr@2.0.7:
+    dependencies:
+      forwarded: 0.2.0
+      ipaddr.js: 1.9.1
+
   proxy-from-env@1.1.0: {}
 
   punycode@2.3.1: {}
 
   pure-rand@8.4.0: {}
 
+  qs@6.15.2:
+    dependencies:
+      side-channel: 1.1.1
+
   quansync@0.2.11: {}
 
   queue-microtask@1.2.3: {}
 
+  range-parser@1.2.1: {}
+
+  raw-body@3.0.2:
+    dependencies:
+      bytes: 3.1.2
+      http-errors: 2.0.1
+      iconv-lite: 0.7.2
+      unpipe: 1.0.0
+
   react-devtools-inline@6.1.5:
     dependencies:
       source-map-js: 0.6.2
@@ -6861,6 +7442,16 @@ snapshots:
       '@rollup/rollup-win32-x64-msvc': 4.57.1
       fsevents: 2.3.3
 
+  router@2.2.0:
+    dependencies:
+      debug: 4.4.3
+      depd: 2.0.0
+      is-promise: 4.0.0
+      parseurl: 1.3.3
+      path-to-regexp: 8.4.2
+    transitivePeerDependencies:
+      - supports-color
+
   run-parallel@1.2.0:
     dependencies:
       queue-microtask: 1.2.3
@@ -6879,6 +7470,33 @@ snapshots:
 
   semver@7.7.4: {}
 
+  send@1.2.1:
+    dependencies:
+      debug: 4.4.3
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      fresh: 2.0.0
+      http-errors: 2.0.1
+      mime-types: 3.0.2
+      ms: 2.1.3
+      on-finished: 2.4.1
+      range-parser: 1.2.1
+      statuses: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+
+  serve-static@2.2.1:
+    dependencies:
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      parseurl: 1.3.3
+      send: 1.2.1
+    transitivePeerDependencies:
+      - supports-color
+
+  setprototypeof@1.2.0: {}
+
   sharp@0.34.5:
     dependencies:
       '@img/colour': 1.0.0
@@ -6917,6 +7535,34 @@ snapshots:
 
   shebang-regex@3.0.0: {}
 
+  side-channel-list@1.0.1:
+    dependencies:
+      es-errors: 1.3.0
+      object-inspect: 1.13.4
+
+  side-channel-map@1.0.1:
+    dependencies:
+      call-bound: 1.0.4
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      object-inspect: 1.13.4
+
+  side-channel-weakmap@1.0.2:
+    dependencies:
+      call-bound: 1.0.4
+      es-errors: 1.3.0
+      get-intrinsic: 1.3.0
+      object-inspect: 1.13.4
+      side-channel-map: 1.0.1
+
+  side-channel@1.1.1:
+    dependencies:
+      es-errors: 1.3.0
+      object-inspect: 1.13.4
+      side-channel-list: 1.0.1
+      side-channel-map: 1.0.1
+      side-channel-weakmap: 1.0.2
+
   siginfo@2.0.0: {}
 
   signal-exit@4.1.0: {}
@@ -6955,6 +7601,8 @@ snapshots:
 
   stackback@0.0.2: {}
 
+  statuses@2.0.2: {}
+
   std-env@4.0.0: {}
 
   stdin-discarder@0.3.2: {}
@@ -7029,6 +7677,8 @@ snapshots:
     dependencies:
       is-number: 7.0.0
 
+  toidentifier@1.0.1: {}
+
   toml@4.1.1: {}
 
   totalist@3.0.1: {}
@@ -7071,6 +7721,12 @@ snapshots:
     dependencies:
       tagged-tag: 1.0.0
 
+  type-is@2.1.0:
+    dependencies:
+      content-type: 2.0.0
+      media-typer: 1.1.0
+      mime-types: 3.0.2
+
   typescript@5.9.3: {}
 
   typescript@6.0.3: {}
@@ -7083,6 +7739,8 @@ snapshots:
 
   universalify@0.1.2: {}
 
+  unpipe@1.0.0: {}
+
   update-browserslist-db@1.2.3(browserslist@4.28.1):
     dependencies:
       browserslist: 4.28.1
@@ -7095,6 +7753,8 @@ snapshots:
 
   uuid@14.0.0: {}
 
+  vary@1.1.2: {}
+
   vite-plus@0.1.20(@opentelemetry/api@1.9.1)(@types/node@25.6.0)(esbuild@0.28.1)(jiti@2.7.0)(terser@5.46.0)(tsx@4.22.4)(typescript@6.0.3)(vite@7.3.1(@types/node@25.6.0)(jiti@2.7.0)(lightningcss@1.30.2)(terser@5.46.0)(tsx@4.22.4)(yaml@2.9.0))(yaml@2.9.0):
     dependencies:
       '@oxc-project/types': 0.127.0
@@ -7230,6 +7890,8 @@ snapshots:
 
   word-wrap@1.2.5: {}
 
+  wrappy@1.0.2: {}
+
   ws@8.20.0: {}
 
   yallist@3.1.1: {}
@@ -7242,8 +7904,14 @@ snapshots:
 
   yoctocolors@2.1.2: {}
 
+  zod-to-json-schema@3.25.2(zod@4.4.3):
+    dependencies:
+      zod: 4.4.3
+
   zod-validation-error@4.0.2(zod@4.3.6):
     dependencies:
       zod: 4.3.6
 
   zod@4.3.6: {}
+
+  zod@4.4.3: {}

From a3484a9ffdfdda24510ec2d8a3229f48a24bf73d Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 01:48:54 -0700
Subject: [PATCH 03/38] refactor(mcp): collapse the read-only browser tools
 into a registration table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The five read-only page tools (browser_audit/console/network/perf/report) shared
the same url+connection+viewport input schema, read-only annotations, and
runTool/withSession lifecycle. Register them through a small registerPageTool
table so each is just its name, description, and a session action — no behavior
change (verified by the tool-registration tests).
---
 packages/mcp/src/tools/browser.ts | 151 ++++++++++++++----------------
 1 file changed, 71 insertions(+), 80 deletions(-)

diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index cac5e4f2e..09a5f73d9 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -1,4 +1,6 @@
+import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import type { BrowserSession } from "@react-doctor/browser";
 import { z } from "zod";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
 import { parseViewport } from "../utils/parse-viewport.js";
@@ -42,6 +44,34 @@ const toConnection = (args: ConnectionArgs): BrowserToolConnection => ({
   viewport: args.viewport ? parseViewport(args.viewport) : undefined,
 });
 
+interface PageToolDefinition {
+  name: string;
+  title: string;
+  description: string;
+  // Build the result inside the session scope (the session is disposed once
+  // this resolves), against the optional `url` to load.
+  run: (session: BrowserSession, url: string | undefined) => Promise<CallToolResult>;
+}
+
+// The read-only "load a page (or read the current one) and report" tools all
+// share the same url + connection + viewport inputs and session lifecycle, so
+// they register through this table rather than repeating the scaffolding.
+const registerPageTool = (server: McpServer, definition: PageToolDefinition): void => {
+  server.registerTool(
+    definition.name,
+    {
+      title: definition.title,
+      description: definition.description,
+      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
+      annotations: { readOnlyHint: true, openWorldHint: true },
+    },
+    (args) =>
+      runTool(() =>
+        withSession(toConnection(args), (session) => definition.run(session, args.url)),
+      ),
+  );
+};
+
 export const registerBrowserTools = (server: McpServer): void => {
   server.registerTool(
     "browser_open",
@@ -117,91 +147,52 @@ export const registerBrowserTools = (server: McpServer): void => {
       }),
   );
 
-  server.registerTool(
-    "browser_audit",
-    {
-      title: "Run an accessibility audit",
-      description:
-        "Run an axe-core accessibility audit on the attached page (or a URL) and return the violations with impact, help text, and affected element targets.",
-      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
-      annotations: { readOnlyHint: true, openWorldHint: true },
+  registerPageTool(server, {
+    name: "browser_audit",
+    title: "Run an accessibility audit",
+    description:
+      "Run an axe-core accessibility audit on the attached page (or a URL) and return the violations with impact, help text, and affected element targets.",
+    run: async (session, url) => {
+      const violations = await session.audit(url);
+      return jsonResult({ violationCount: violations.length, violations });
     },
-    (args) =>
-      runTool(async () => {
-        const violations = await withSession(toConnection(args), (session) =>
-          session.audit(args.url),
-        );
-        return jsonResult({ violationCount: violations.length, violations });
-      }),
-  );
+  });
 
-  server.registerTool(
-    "browser_console",
-    {
-      title: "Capture console output",
-      description:
-        "Capture console messages and page errors during a load of the attached page (or a URL; reloads when no URL is given).",
-      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
-      annotations: { readOnlyHint: true, openWorldHint: true },
+  registerPageTool(server, {
+    name: "browser_console",
+    title: "Capture console output",
+    description:
+      "Capture console messages and page errors during a load of the attached page (or a URL; reloads when no URL is given).",
+    run: async (session, url) => {
+      const messages = await session.captureConsole(url);
+      return jsonResult({ messageCount: messages.length, messages });
     },
-    (args) =>
-      runTool(async () => {
-        const messages = await withSession(toConnection(args), (session) =>
-          session.captureConsole(args.url),
-        );
-        return jsonResult({ messageCount: messages.length, messages });
-      }),
-  );
+  });
 
-  server.registerTool(
-    "browser_network",
-    {
-      title: "Capture network requests",
-      description:
-        "Capture network requests during a load of the attached page (or a URL; reloads when no URL is given), flagging failures and non-2xx/3xx responses.",
-      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
-      annotations: { readOnlyHint: true, openWorldHint: true },
+  registerPageTool(server, {
+    name: "browser_network",
+    title: "Capture network requests",
+    description:
+      "Capture network requests during a load of the attached page (or a URL; reloads when no URL is given), flagging failures and non-2xx/3xx responses.",
+    run: async (session, url) => {
+      const requests = await session.captureNetwork(url);
+      return jsonResult({ requestCount: requests.length, requests });
     },
-    (args) =>
-      runTool(async () => {
-        const requests = await withSession(toConnection(args), (session) =>
-          session.captureNetwork(args.url),
-        );
-        return jsonResult({ requestCount: requests.length, requests });
-      }),
-  );
+  });
 
-  server.registerTool(
-    "browser_perf",
-    {
-      title: "Measure runtime performance (jank)",
-      description:
-        "Capture long animation frames (>50ms main-thread jank) with per-script attribution, plus LCP and CLS. Loads a URL when given; omit the URL to measure the current page without reloading (so a browser_eval interaction's jank is included).",
-      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
-      annotations: { readOnlyHint: true, openWorldHint: true },
-    },
-    (args) =>
-      runTool(async () =>
-        jsonResult(
-          await withSession(toConnection(args), (session) => session.measurePerformance(args.url)),
-        ),
-      ),
-  );
+  registerPageTool(server, {
+    name: "browser_perf",
+    title: "Measure runtime performance (jank)",
+    description:
+      "Capture long animation frames (>50ms main-thread jank) with per-script attribution, plus LCP and CLS. Loads a URL when given; omit the URL to measure the current page without reloading (so a browser_eval interaction's jank is included).",
+    run: async (session, url) => jsonResult(await session.measurePerformance(url)),
+  });
 
-  server.registerTool(
-    "browser_report",
-    {
-      title: "Capture a full page report",
-      description:
-        "Capture console, network, performance, and accessibility for the attached page (or a URL) in a single load — the efficient path when you want the whole runtime picture at once.",
-      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
-      annotations: { readOnlyHint: true, openWorldHint: true },
-    },
-    (args) =>
-      runTool(async () =>
-        jsonResult(
-          await withSession(toConnection(args), (session) => session.inspectPage(args.url)),
-        ),
-      ),
-  );
+  registerPageTool(server, {
+    name: "browser_report",
+    title: "Capture a full page report",
+    description:
+      "Capture console, network, performance, and accessibility for the attached page (or a URL) in a single load — the efficient path when you want the whole runtime picture at once.",
+    run: async (session, url) => jsonResult(await session.inspectPage(url)),
+  });
 };

From 29178bac261e737812ae8861dc3b544b870b7f9a Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 02:07:11 -0700
Subject: [PATCH 04/38] fix(mcp): harden debug fetch + clear viewport override
 (thermos review)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- security: validate the model-supplied debug endpoint is a loopback /ingest/<id>
  URL before debug_read_logs/debug_clear_logs fetch it, and bound those fetches
  with a timeout — closes a prompt-injection SSRF (incl. a destructive DELETE).
- browser: BrowserSession.setViewport now holds its CDP session and clears the
  device-metrics override + detaches on dispose, so an emulated viewport can't
  linger on the persistent Chrome instead of relying on disconnect semantics.
- privacy: scrub the user's home directory out of tool error messages (Chrome /
  profile / CDP paths) before they go back to the model.
- docs: browser_audit/browser_report descriptions now state they reload the
  current page when no URL is given (use browser_perf to measure post-interaction).
---
 packages/browser/src/session.ts               | 17 +++++++++---
 packages/mcp/src/constants.ts                 |  4 +++
 packages/mcp/src/tools/browser.ts             |  4 +--
 packages/mcp/src/tools/debug.ts               | 13 ++++++++--
 packages/mcp/src/utils/parse-log-endpoint.ts  | 22 ++++++++++++++++
 packages/mcp/src/utils/tool-result.ts         | 11 +++++++-
 packages/mcp/tests/parse-log-endpoint.test.ts | 26 +++++++++++++++++++
 7 files changed, 89 insertions(+), 8 deletions(-)
 create mode 100644 packages/mcp/src/utils/parse-log-endpoint.ts
 create mode 100644 packages/mcp/tests/parse-log-endpoint.test.ts

diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 03ad5319d..12712302f 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -1,7 +1,7 @@
 import { readFile } from "node:fs/promises";
 import { fileURLToPath } from "node:url";
 import axe from "axe-core";
-import type { Browser, ConsoleMessage, Page, Request, Response } from "playwright-core";
+import type { Browser, CDPSession, ConsoleMessage, Page, Request, Response } from "playwright-core";
 import { connectToBrowser, type BrowserConnection } from "./connect.js";
 import {
   MAX_VIOLATION_TARGETS,
@@ -53,6 +53,11 @@ const resolveActivePage = async (browser: Browser): Promise<Page> => {
 // A live handle to the attached page. The page state lives in the browser, so a
 // session is cheap to create per command and there is no server to keep alive.
 export class BrowserSession {
+  // Held so the device-metrics override is cleared on dispose rather than
+  // relying on the override happening to reset when the CDP client disconnects
+  // — otherwise an emulated viewport could linger on the persistent Chrome.
+  private viewportOverride: CDPSession | null = null;
+
   private constructor(
     private readonly connection: BrowserConnection,
     readonly page: Page,
@@ -109,8 +114,8 @@ export class BrowserSession {
   }
 
   // A CDP device-metrics override, not page.setViewportSize, so it works on a
-  // page we only attached to and clears on disconnect — it never resizes the
-  // user's real window.
+  // page we only attached to — it never resizes the user's real window. The
+  // session is kept and cleared in dispose() so the override doesn't linger.
   async setViewport(viewport: Viewport): Promise<void> {
     const cdpSession = await this.page.context().newCDPSession(this.page);
     await cdpSession.send("Emulation.setDeviceMetricsOverride", {
@@ -119,6 +124,7 @@ export class BrowserSession {
       deviceScaleFactor: 1,
       mobile: false,
     });
+    this.viewportOverride = cdpSession;
   }
 
   // The expression runs here in Node with the Playwright `page` in scope (the
@@ -324,6 +330,11 @@ export class BrowserSession {
   // whether the user had it open or we launched it — so the page stays alive and
   // the next `browser` command reattaches to the same live session.
   async dispose(): Promise<void> {
+    if (this.viewportOverride) {
+      await this.viewportOverride.send("Emulation.clearDeviceMetricsOverride").catch(() => {});
+      await this.viewportOverride.detach().catch(() => {});
+      this.viewportOverride = null;
+    }
     await this.connection.browser.close().catch(() => {});
   }
 }
diff --git a/packages/mcp/src/constants.ts b/packages/mcp/src/constants.ts
index 586b4fb03..ee00c36c2 100644
--- a/packages/mcp/src/constants.ts
+++ b/packages/mcp/src/constants.ts
@@ -12,3 +12,7 @@ export const MAX_INLINE_DIAGNOSTICS = 100;
 // Upper bound on an emulated viewport dimension, so a typo can't push an
 // absurd device-metrics override into CDP (mirrors the CLI's --viewport guard).
 export const MAX_VIEWPORT_PX = 10_000;
+
+// Cap on the debug tools' HTTP calls to the local log server, so a hung or
+// unresponsive endpoint can't block the MCP tool turn indefinitely.
+export const DEBUG_FETCH_TIMEOUT_MS = 5000;
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index 09a5f73d9..371dbf2cc 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -151,7 +151,7 @@ export const registerBrowserTools = (server: McpServer): void => {
     name: "browser_audit",
     title: "Run an accessibility audit",
     description:
-      "Run an axe-core accessibility audit on the attached page (or a URL) and return the violations with impact, help text, and affected element targets.",
+      "Run an axe-core accessibility audit on the attached page (or a URL; reloads the current page when no URL is given) and return the violations with impact, help text, and affected element targets.",
     run: async (session, url) => {
       const violations = await session.audit(url);
       return jsonResult({ violationCount: violations.length, violations });
@@ -192,7 +192,7 @@ export const registerBrowserTools = (server: McpServer): void => {
     name: "browser_report",
     title: "Capture a full page report",
     description:
-      "Capture console, network, performance, and accessibility for the attached page (or a URL) in a single load — the efficient path when you want the whole runtime picture at once.",
+      "Capture console, network, performance, and accessibility in a single load — the efficient path when you want the whole runtime picture at once. Always loads (a URL when given, otherwise reloads the current page); to measure after a browser_eval interaction without reloading, use browser_perf.",
     run: async (session, url) => jsonResult(await session.inspectPage(url)),
   });
 };
diff --git a/packages/mcp/src/tools/debug.ts b/packages/mcp/src/tools/debug.ts
index 267f1c075..3f8ff8512 100644
--- a/packages/mcp/src/tools/debug.ts
+++ b/packages/mcp/src/tools/debug.ts
@@ -2,6 +2,8 @@ import type { Server } from "node:http";
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { createLogServer, DEFAULT_HOST } from "@react-doctor/debug";
 import { z } from "zod";
+import { DEBUG_FETCH_TIMEOUT_MS } from "../constants.js";
+import { parseLogEndpoint } from "../utils/parse-log-endpoint.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 
 // Log servers started via `debug_serve` must outlive the tool call (the agent
@@ -72,7 +74,10 @@ export const registerDebugTools = (server: McpServer): void => {
     },
     (args) =>
       runTool(async () => {
-        const response = await fetch(args.endpoint);
+        const endpoint = parseLogEndpoint(args.endpoint);
+        const response = await fetch(endpoint, {
+          signal: AbortSignal.timeout(DEBUG_FETCH_TIMEOUT_MS),
+        });
         if (!response.ok) {
           return {
             content: [{ type: "text", text: `Log server returned ${response.status}` }],
@@ -97,7 +102,11 @@ export const registerDebugTools = (server: McpServer): void => {
     },
     (args) =>
       runTool(async () => {
-        const response = await fetch(args.endpoint, { method: "DELETE" });
+        const endpoint = parseLogEndpoint(args.endpoint);
+        const response = await fetch(endpoint, {
+          method: "DELETE",
+          signal: AbortSignal.timeout(DEBUG_FETCH_TIMEOUT_MS),
+        });
         return response.ok
           ? textResult("Cleared logs")
           : {
diff --git a/packages/mcp/src/utils/parse-log-endpoint.ts b/packages/mcp/src/utils/parse-log-endpoint.ts
new file mode 100644
index 000000000..92e741ee0
--- /dev/null
+++ b/packages/mcp/src/utils/parse-log-endpoint.ts
@@ -0,0 +1,22 @@
+const LOOPBACK_HOSTNAMES = new Set(["127.0.0.1", "localhost", "::1", "[::1]"]);
+
+// The debug tools fetch a model-supplied endpoint, so confine it to a loopback
+// `/ingest/<id>` URL — the exact shape `debug_serve` returns — so a prompt
+// injection can't turn `debug_read_logs`/`debug_clear_logs` into an SSRF
+// primitive against an arbitrary host. Throws (surfaced as a tool error by
+// `runTool`) on anything else.
+export const parseLogEndpoint = (endpoint: string): URL => {
+  let url: URL;
+  try {
+    url = new URL(endpoint);
+  } catch {
+    throw new Error(`Invalid endpoint URL: ${endpoint}`);
+  }
+  if (!LOOPBACK_HOSTNAMES.has(url.hostname)) {
+    throw new Error(`Refusing to fetch a non-loopback endpoint: ${url.hostname}`);
+  }
+  if (!url.pathname.startsWith("/ingest/")) {
+    throw new Error(`Not a debug log endpoint (expected /ingest/<id>): ${url.pathname}`);
+  }
+  return url;
+};
diff --git a/packages/mcp/src/utils/tool-result.ts b/packages/mcp/src/utils/tool-result.ts
index 9bf4e2d4d..3c747e62d 100644
--- a/packages/mcp/src/utils/tool-result.ts
+++ b/packages/mcp/src/utils/tool-result.ts
@@ -1,9 +1,18 @@
+import { homedir } from "node:os";
 import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
 
 export const textResult = (text: string): CallToolResult => ({
   content: [{ type: "text", text }],
 });
 
+// Tool output is sent to the model (and may be logged), so keep the user's home
+// directory out of error messages — Chrome/profile/CDP errors otherwise carry an
+// absolute path that includes the username.
+const scrubHomePath = (text: string): string => {
+  const home = homedir();
+  return home ? text.split(home).join("~") : text;
+};
+
 export const jsonResult = (value: unknown): CallToolResult =>
   textResult(JSON.stringify(value, null, 2));
 
@@ -16,6 +25,6 @@ export const runTool = async (run: () => Promise<CallToolResult>): Promise<CallT
     return await run();
   } catch (error: unknown) {
     const message = error instanceof Error ? error.message : String(error);
-    return { content: [{ type: "text", text: message }], isError: true };
+    return { content: [{ type: "text", text: scrubHomePath(message) }], isError: true };
   }
 };
diff --git a/packages/mcp/tests/parse-log-endpoint.test.ts b/packages/mcp/tests/parse-log-endpoint.test.ts
new file mode 100644
index 000000000..18319cb33
--- /dev/null
+++ b/packages/mcp/tests/parse-log-endpoint.test.ts
@@ -0,0 +1,26 @@
+import { expect, test } from "vite-plus/test";
+import { parseLogEndpoint } from "../src/utils/parse-log-endpoint.js";
+
+test("accepts a loopback /ingest/<id> endpoint", () => {
+  const url = parseLogEndpoint("http://127.0.0.1:7331/ingest/abc123");
+  expect(url.hostname).toBe("127.0.0.1");
+  expect(url.pathname).toBe("/ingest/abc123");
+});
+
+test("accepts localhost and ::1", () => {
+  expect(() => parseLogEndpoint("http://localhost:7331/ingest/x")).not.toThrow();
+  expect(() => parseLogEndpoint("http://[::1]:7331/ingest/x")).not.toThrow();
+});
+
+test("rejects a non-loopback host", () => {
+  expect(() => parseLogEndpoint("http://example.com/ingest/x")).toThrow(/non-loopback/);
+  expect(() => parseLogEndpoint("http://169.254.169.254/ingest/x")).toThrow(/non-loopback/);
+});
+
+test("rejects a non-ingest path", () => {
+  expect(() => parseLogEndpoint("http://127.0.0.1:7331/admin")).toThrow(/expected \/ingest/);
+});
+
+test("rejects a malformed URL", () => {
+  expect(() => parseLogEndpoint("not a url")).toThrow(/Invalid endpoint/);
+});

From bbf744e0713cca76497c4f55b046f84d894a91a9 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 09:41:53 -0700
Subject: [PATCH 05/38] refactor(cli): reuse the shared Viewport type for
 --viewport

Replace the CLI's inline { width; height } shape with @react-doctor/browser's
Viewport (type-only import, erased at build, so the lazy playwright-core boundary
is unaffected). Removes a duplicated type per AGENTS.md DRY.
---
 packages/mcp/src/tools/debug.ts                    | 14 +++-----------
 packages/mcp/src/utils/tool-result.ts              | 12 ++++++++----
 packages/react-doctor/src/cli/commands/browser.ts  |  3 ++-
 .../react-doctor/src/cli/utils/parse-viewport.ts   |  3 ++-
 4 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/packages/mcp/src/tools/debug.ts b/packages/mcp/src/tools/debug.ts
index 3f8ff8512..3d4c1f8d1 100644
--- a/packages/mcp/src/tools/debug.ts
+++ b/packages/mcp/src/tools/debug.ts
@@ -4,7 +4,7 @@ import { createLogServer, DEFAULT_HOST } from "@react-doctor/debug";
 import { z } from "zod";
 import { DEBUG_FETCH_TIMEOUT_MS } from "../constants.js";
 import { parseLogEndpoint } from "../utils/parse-log-endpoint.js";
-import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
+import { errorResult, jsonResult, runTool, textResult } from "../utils/tool-result.js";
 
 // Log servers started via `debug_serve` must outlive the tool call (the agent
 // instruments the app, then reads them back), so they live for the MCP process
@@ -78,12 +78,7 @@ export const registerDebugTools = (server: McpServer): void => {
         const response = await fetch(endpoint, {
           signal: AbortSignal.timeout(DEBUG_FETCH_TIMEOUT_MS),
         });
-        if (!response.ok) {
-          return {
-            content: [{ type: "text", text: `Log server returned ${response.status}` }],
-            isError: true,
-          };
-        }
+        if (!response.ok) return errorResult(`Log server returned ${response.status}`);
         const logs = await response.text();
         return textResult(logs.length > 0 ? logs : "(no logs captured yet)");
       }),
@@ -109,10 +104,7 @@ export const registerDebugTools = (server: McpServer): void => {
         });
         return response.ok
           ? textResult("Cleared logs")
-          : {
-              content: [{ type: "text", text: `Log server returned ${response.status}` }],
-              isError: true,
-            };
+          : errorResult(`Log server returned ${response.status}`);
       }),
   );
 };
diff --git a/packages/mcp/src/utils/tool-result.ts b/packages/mcp/src/utils/tool-result.ts
index 3c747e62d..6a7a363d5 100644
--- a/packages/mcp/src/utils/tool-result.ts
+++ b/packages/mcp/src/utils/tool-result.ts
@@ -5,6 +5,9 @@ export const textResult = (text: string): CallToolResult => ({
   content: [{ type: "text", text }],
 });
 
+export const jsonResult = (value: unknown): CallToolResult =>
+  textResult(JSON.stringify(value, null, 2));
+
 // Tool output is sent to the model (and may be logged), so keep the user's home
 // directory out of error messages — Chrome/profile/CDP errors otherwise carry an
 // absolute path that includes the username.
@@ -13,8 +16,10 @@ const scrubHomePath = (text: string): string => {
   return home ? text.split(home).join("~") : text;
 };
 
-export const jsonResult = (value: unknown): CallToolResult =>
-  textResult(JSON.stringify(value, null, 2));
+export const errorResult = (text: string): CallToolResult => ({
+  content: [{ type: "text", text: scrubHomePath(text) }],
+  isError: true,
+});
 
 // MCP convention: a tool reports a failure as a result with `isError: true` so
 // the model sees the message and can react, rather than throwing — which would
@@ -24,7 +29,6 @@ export const runTool = async (run: () => Promise<CallToolResult>): Promise<CallT
   try {
     return await run();
   } catch (error: unknown) {
-    const message = error instanceof Error ? error.message : String(error);
-    return { content: [{ type: "text", text: scrubHomePath(message) }], isError: true };
+    return errorResult(error instanceof Error ? error.message : String(error));
   }
 };
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index b01ca379f..7b1017c4e 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -4,6 +4,7 @@ import type {
   ConsoleMessageEntry,
   NetworkRequestEntry,
   PerformanceReport,
+  Viewport,
 } from "@react-doctor/browser";
 import { DEFAULT_SCREENSHOT_FILENAME, METRIC } from "../utils/constants.js";
 import { cliLogger as logger } from "../utils/cli-logger.js";
@@ -13,7 +14,7 @@ export interface BrowserCommandOptions {
   cdp?: string;
   launch?: boolean;
   out?: string;
-  viewport?: { width: number; height: number };
+  viewport?: Viewport;
 }
 
 const isModuleNotFoundError = (error: unknown): boolean =>
diff --git a/packages/react-doctor/src/cli/utils/parse-viewport.ts b/packages/react-doctor/src/cli/utils/parse-viewport.ts
index 4c8405c37..c60265dae 100644
--- a/packages/react-doctor/src/cli/utils/parse-viewport.ts
+++ b/packages/react-doctor/src/cli/utils/parse-viewport.ts
@@ -1,9 +1,10 @@
+import type { Viewport } from "@react-doctor/browser";
 import { InvalidArgumentError } from "commander";
 import { MAX_VIEWPORT_PX } from "./constants.js";
 
 // Throws Commander's InvalidArgumentError so a bad `--viewport WIDTHxHEIGHT`
 // value renders as a clean usage error rather than a crash report.
-export const parseViewport = (value: string): { width: number; height: number } => {
+export const parseViewport = (value: string): Viewport => {
   const match = /^(\d+)x(\d+)$/i.exec(value.trim());
   const width = match ? Number(match[1]) : 0;
   const height = match ? Number(match[2]) : 0;

From 068f3b216c0bd00dcbf3a529f128c55ffc7746cd Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 11:26:17 -0700
Subject: [PATCH 06/38] fix(mcp): allowlist debug endpoints + guard
 non-loopback bind (thermos review)

- debug_serve rejects non-loopback hosts (loopback-only bind for the
  agent-driven path; CLI --host keeps its on-device flexibility)
- debug_read_logs/clear_logs accept only endpoints debug_serve minted,
  replacing the loopback/path check with a stronger SSRF allowlist
- debug server appendLog rejects JSON array bodies
---
 packages/debug/src/server.ts                  |  2 +-
 packages/mcp/src/tools/debug.ts               | 27 +++++++++++++++----
 packages/mcp/src/utils/is-loopback-host.ts    |  8 ++++++
 packages/mcp/src/utils/parse-log-endpoint.ts  | 22 ---------------
 packages/mcp/tests/is-loopback-host.test.ts   | 17 ++++++++++++
 packages/mcp/tests/parse-log-endpoint.test.ts | 26 ------------------
 6 files changed, 48 insertions(+), 54 deletions(-)
 create mode 100644 packages/mcp/src/utils/is-loopback-host.ts
 delete mode 100644 packages/mcp/src/utils/parse-log-endpoint.ts
 create mode 100644 packages/mcp/tests/is-loopback-host.test.ts
 delete mode 100644 packages/mcp/tests/parse-log-endpoint.test.ts

diff --git a/packages/debug/src/server.ts b/packages/debug/src/server.ts
index 2060cb495..bc9719dfa 100644
--- a/packages/debug/src/server.ts
+++ b/packages/debug/src/server.ts
@@ -95,7 +95,7 @@ export const createLogServer = async (options: LogServerOptions = {}): Promise<L
     requestBody: string,
   ): boolean => {
     const logEntry: LogEntry = JSON.parse(requestBody);
-    if (typeof logEntry !== "object" || logEntry === null)
+    if (typeof logEntry !== "object" || logEntry === null || Array.isArray(logEntry))
       throw new Error("Body must be an object");
     if (logEntry.id && sessionState.processedEntryIds.has(logEntry.id)) return true;
 
diff --git a/packages/mcp/src/tools/debug.ts b/packages/mcp/src/tools/debug.ts
index 3d4c1f8d1..734c1503d 100644
--- a/packages/mcp/src/tools/debug.ts
+++ b/packages/mcp/src/tools/debug.ts
@@ -1,9 +1,10 @@
 import type { Server } from "node:http";
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
 import { createLogServer, DEFAULT_HOST } from "@react-doctor/debug";
 import { z } from "zod";
 import { DEBUG_FETCH_TIMEOUT_MS } from "../constants.js";
-import { parseLogEndpoint } from "../utils/parse-log-endpoint.js";
+import { isLoopbackHost } from "../utils/is-loopback-host.js";
 import { errorResult, jsonResult, runTool, textResult } from "../utils/tool-result.js";
 
 // Log servers started via `debug_serve` must outlive the tool call (the agent
@@ -11,6 +12,16 @@ import { errorResult, jsonResult, runTool, textResult } from "../utils/tool-resu
 // and are closed when it exits. A stale lock from a hard kill self-heals: the
 // reuse path pings for liveness and clears a dead lock before binding.
 const liveServers = new Set<Server>();
+
+// Exact endpoints `debug_serve` has handed out this process. `debug_read_logs`
+// and `debug_clear_logs` fetch a model-supplied endpoint, so they accept only
+// one of these — a tighter SSRF guard than a loopback+path check, which would
+// still let an injected call hit any other loopback service exposing /ingest.
+const mintedEndpoints = new Set<string>();
+
+const unknownEndpointError = (): CallToolResult =>
+  errorResult("Unknown log endpoint — call debug_serve first to get the endpoint to read or clear.");
+
 let cleanupRegistered = false;
 const trackLogServer = (logServer: Server): void => {
   liveServers.add(logServer);
@@ -44,6 +55,11 @@ export const registerDebugTools = (server: McpServer): void => {
     },
     (args) =>
       runTool(async () => {
+        if (args.host !== undefined && !isLoopbackHost(args.host)) {
+          return errorResult(
+            `Refusing to bind a non-loopback host: ${args.host}. The log server must stay on loopback.`,
+          );
+        }
         const {
           server: logServer,
           info,
@@ -55,6 +71,7 @@ export const registerDebugTools = (server: McpServer): void => {
           cwd: process.cwd(),
         });
         if (logServer) trackLogServer(logServer);
+        mintedEndpoints.add(info.endpoint);
         return jsonResult({ ...info, reused });
       }),
   );
@@ -74,8 +91,8 @@ export const registerDebugTools = (server: McpServer): void => {
     },
     (args) =>
       runTool(async () => {
-        const endpoint = parseLogEndpoint(args.endpoint);
-        const response = await fetch(endpoint, {
+        if (!mintedEndpoints.has(args.endpoint)) return unknownEndpointError();
+        const response = await fetch(args.endpoint, {
           signal: AbortSignal.timeout(DEBUG_FETCH_TIMEOUT_MS),
         });
         if (!response.ok) return errorResult(`Log server returned ${response.status}`);
@@ -97,8 +114,8 @@ export const registerDebugTools = (server: McpServer): void => {
     },
     (args) =>
       runTool(async () => {
-        const endpoint = parseLogEndpoint(args.endpoint);
-        const response = await fetch(endpoint, {
+        if (!mintedEndpoints.has(args.endpoint)) return unknownEndpointError();
+        const response = await fetch(args.endpoint, {
           method: "DELETE",
           signal: AbortSignal.timeout(DEBUG_FETCH_TIMEOUT_MS),
         });
diff --git a/packages/mcp/src/utils/is-loopback-host.ts b/packages/mcp/src/utils/is-loopback-host.ts
new file mode 100644
index 000000000..ff97514fc
--- /dev/null
+++ b/packages/mcp/src/utils/is-loopback-host.ts
@@ -0,0 +1,8 @@
+const LOOPBACK_HOSTS = new Set(["127.0.0.1", "localhost", "::1", "[::1]"]);
+
+// `debug_serve` is agent-driven, so a prompt injection could ask it to bind the
+// log server to a routable interface (e.g. 0.0.0.0) and expose captured runtime
+// logs to the LAN. Confine the MCP bind to loopback; the CLI's `debug serve`
+// keeps its `--host` flexibility for on-device debugging.
+export const isLoopbackHost = (host: string): boolean =>
+  LOOPBACK_HOSTS.has(host.trim().toLowerCase());
diff --git a/packages/mcp/src/utils/parse-log-endpoint.ts b/packages/mcp/src/utils/parse-log-endpoint.ts
deleted file mode 100644
index 92e741ee0..000000000
--- a/packages/mcp/src/utils/parse-log-endpoint.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-const LOOPBACK_HOSTNAMES = new Set(["127.0.0.1", "localhost", "::1", "[::1]"]);
-
-// The debug tools fetch a model-supplied endpoint, so confine it to a loopback
-// `/ingest/<id>` URL — the exact shape `debug_serve` returns — so a prompt
-// injection can't turn `debug_read_logs`/`debug_clear_logs` into an SSRF
-// primitive against an arbitrary host. Throws (surfaced as a tool error by
-// `runTool`) on anything else.
-export const parseLogEndpoint = (endpoint: string): URL => {
-  let url: URL;
-  try {
-    url = new URL(endpoint);
-  } catch {
-    throw new Error(`Invalid endpoint URL: ${endpoint}`);
-  }
-  if (!LOOPBACK_HOSTNAMES.has(url.hostname)) {
-    throw new Error(`Refusing to fetch a non-loopback endpoint: ${url.hostname}`);
-  }
-  if (!url.pathname.startsWith("/ingest/")) {
-    throw new Error(`Not a debug log endpoint (expected /ingest/<id>): ${url.pathname}`);
-  }
-  return url;
-};
diff --git a/packages/mcp/tests/is-loopback-host.test.ts b/packages/mcp/tests/is-loopback-host.test.ts
new file mode 100644
index 000000000..7702376c0
--- /dev/null
+++ b/packages/mcp/tests/is-loopback-host.test.ts
@@ -0,0 +1,17 @@
+import { expect, test } from "vite-plus/test";
+import { isLoopbackHost } from "../src/utils/is-loopback-host.js";
+
+test("accepts loopback hosts", () => {
+  expect(isLoopbackHost("127.0.0.1")).toBe(true);
+  expect(isLoopbackHost("localhost")).toBe(true);
+  expect(isLoopbackHost("::1")).toBe(true);
+  expect(isLoopbackHost("[::1]")).toBe(true);
+  expect(isLoopbackHost("LOCALHOST")).toBe(true);
+});
+
+test("rejects routable and all-interface hosts", () => {
+  expect(isLoopbackHost("0.0.0.0")).toBe(false);
+  expect(isLoopbackHost("example.com")).toBe(false);
+  expect(isLoopbackHost("169.254.169.254")).toBe(false);
+  expect(isLoopbackHost("192.168.1.5")).toBe(false);
+});
diff --git a/packages/mcp/tests/parse-log-endpoint.test.ts b/packages/mcp/tests/parse-log-endpoint.test.ts
deleted file mode 100644
index 18319cb33..000000000
--- a/packages/mcp/tests/parse-log-endpoint.test.ts
+++ /dev/null
@@ -1,26 +0,0 @@
-import { expect, test } from "vite-plus/test";
-import { parseLogEndpoint } from "../src/utils/parse-log-endpoint.js";
-
-test("accepts a loopback /ingest/<id> endpoint", () => {
-  const url = parseLogEndpoint("http://127.0.0.1:7331/ingest/abc123");
-  expect(url.hostname).toBe("127.0.0.1");
-  expect(url.pathname).toBe("/ingest/abc123");
-});
-
-test("accepts localhost and ::1", () => {
-  expect(() => parseLogEndpoint("http://localhost:7331/ingest/x")).not.toThrow();
-  expect(() => parseLogEndpoint("http://[::1]:7331/ingest/x")).not.toThrow();
-});
-
-test("rejects a non-loopback host", () => {
-  expect(() => parseLogEndpoint("http://example.com/ingest/x")).toThrow(/non-loopback/);
-  expect(() => parseLogEndpoint("http://169.254.169.254/ingest/x")).toThrow(/non-loopback/);
-});
-
-test("rejects a non-ingest path", () => {
-  expect(() => parseLogEndpoint("http://127.0.0.1:7331/admin")).toThrow(/expected \/ingest/);
-});
-
-test("rejects a malformed URL", () => {
-  expect(() => parseLogEndpoint("not a url")).toThrow(/Invalid endpoint/);
-});

From 7665dadad9b8773f3dba00088174f167c02e4edf Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Fri, 19 Jun 2026 11:38:30 -0700
Subject: [PATCH 07/38] refactor(browser): encapsulate playwright/axe laziness
 in the package
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the playwright-core and axe-core value imports behind dynamic
import() inside @react-doctor/browser (load-playwright.ts + runAxe), so
the barrel has no heavy static deps and consumers can import it
statically. This dissolves the duplicated loadBrowser/isModuleNotFoundError
dance in both the CLI and MCP (the missing-playwright hint now lives in
one place) and lets parseViewport + MAX_VIEWPORT_PX live once in the
browser package, reused by the CLI's Commander wrapper and the MCP tool.

Bundle boundary preserved: axe-core splits into lazy chunks and
playwright-core stays an external dynamic import — neither lands in the
main cli.js startup path.
---
 packages/browser/src/connect.ts               |  4 +-
 packages/browser/src/constants.ts             |  4 ++
 packages/browser/src/index.ts                 |  1 +
 .../utils => browser/src}/parse-viewport.ts   | 10 +++--
 packages/browser/src/session.ts               |  4 +-
 packages/browser/src/utils/load-playwright.ts | 22 +++++++++++
 packages/mcp/src/constants.ts                 |  4 --
 packages/mcp/src/tools/browser.ts             |  2 +-
 packages/mcp/src/tools/debug.ts               |  4 +-
 packages/mcp/src/utils/load-browser.ts        | 20 ----------
 packages/mcp/src/utils/with-session.ts        |  8 ++--
 .../react-doctor/src/cli/commands/browser.ts  | 37 +++++--------------
 .../react-doctor/src/cli/utils/constants.ts   |  4 --
 .../src/cli/utils/parse-viewport.ts           | 21 ++++-------
 14 files changed, 64 insertions(+), 81 deletions(-)
 rename packages/{mcp/src/utils => browser/src}/parse-viewport.ts (58%)
 create mode 100644 packages/browser/src/utils/load-playwright.ts
 delete mode 100644 packages/mcp/src/utils/load-browser.ts

diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
index 52d19770c..b6fb6b24c 100644
--- a/packages/browser/src/connect.ts
+++ b/packages/browser/src/connect.ts
@@ -1,9 +1,10 @@
-import { chromium, type Browser } from "playwright-core";
+import type { Browser } from "playwright-core";
 import { CONNECT_TIMEOUT_MS, DEFAULT_CDP_ENDPOINT } from "./constants.js";
 import { launchPersistentChrome } from "./launch.js";
 import type { BrowserConnectOptions } from "./types.js";
 import { cdpPortFromEndpoint } from "./utils/cdp-port.js";
 import { isLoopbackEndpoint } from "./utils/is-loopback-endpoint.js";
+import { loadPlaywright } from "./utils/load-playwright.js";
 
 export interface BrowserConnection {
   browser: Browser;
@@ -19,6 +20,7 @@ export const connectToBrowser = async (
   options: BrowserConnectOptions = {},
 ): Promise<BrowserConnection> => {
   const endpoint = options.cdpEndpoint ?? DEFAULT_CDP_ENDPOINT;
+  const { chromium } = await loadPlaywright();
   try {
     const browser = await chromium.connectOverCDP(endpoint, { timeout: CONNECT_TIMEOUT_MS });
     return { browser, launched: false };
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index b19da031e..288a00f84 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -42,6 +42,10 @@ export const PERFORMANCE_OBSERVE_WINDOW_MS = 1_000;
 // the problem without dumping every match on a busy page.
 export const MAX_VIOLATION_TARGETS = 5;
 
+// Upper bound on an emulated viewport dimension, so a typo can't push an absurd
+// device-metrics override into CDP.
+export const MAX_VIEWPORT_PX = 10_000;
+
 // Built React-profiler init script, relative to the bundle that imports it.
 // `react-profiler/inject.ts` is esbuilt into this self-contained IIFE at build
 // time (see vite.config.ts); the session injects it via `addInitScript`. The
diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts
index b01289f9b..e1628f2b3 100644
--- a/packages/browser/src/index.ts
+++ b/packages/browser/src/index.ts
@@ -1,4 +1,5 @@
 export { BrowserSession } from "./session.js";
 export { connectToBrowser } from "./connect.js";
 export type { BrowserConnection } from "./connect.js";
+export { parseViewport } from "./parse-viewport.js";
 export type * from "./types.js";
diff --git a/packages/mcp/src/utils/parse-viewport.ts b/packages/browser/src/parse-viewport.ts
similarity index 58%
rename from packages/mcp/src/utils/parse-viewport.ts
rename to packages/browser/src/parse-viewport.ts
index 1c7950494..93ca6d4c9 100644
--- a/packages/mcp/src/utils/parse-viewport.ts
+++ b/packages/browser/src/parse-viewport.ts
@@ -1,8 +1,10 @@
-import type { Viewport } from "@react-doctor/browser";
-import { MAX_VIEWPORT_PX } from "../constants.js";
+import { MAX_VIEWPORT_PX } from "./constants.js";
+import type { Viewport } from "./types.js";
 
-// Parse a `WIDTHxHEIGHT` string into a viewport, throwing a readable message
-// (surfaced as a tool error by `runTool`) on a malformed or out-of-range value.
+// Parse a `WIDTHxHEIGHT` string (e.g. 390x844) into a viewport, throwing a
+// readable Error on a malformed or out-of-range value. Pure (no playwright), so
+// both the CLI's `--viewport` parser and the MCP tool reuse it without dragging
+// the browser engine into their bundles.
 export const parseViewport = (value: string): Viewport => {
   const match = /^(\d+)x(\d+)$/i.exec(value.trim());
   const width = match ? Number(match[1]) : 0;
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 12712302f..3189a976b 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -1,6 +1,5 @@
 import { readFile } from "node:fs/promises";
 import { fileURLToPath } from "node:url";
-import axe from "axe-core";
 import type { Browser, CDPSession, ConsoleMessage, Page, Request, Response } from "playwright-core";
 import { connectToBrowser, type BrowserConnection } from "./connect.js";
 import {
@@ -182,8 +181,9 @@ export class BrowserSession {
   }
 
   // axe is injected with `evaluate`, not a <script> tag, so a strict CSP can't
-  // block it.
+  // block it. Loaded on demand so it stays out of bundles that don't audit.
   private async runAxe(): Promise<AccessibilityViolation[]> {
+    const { default: axe } = await import("axe-core");
     await this.page.evaluate(axe.source);
     return this.page.evaluate(async (maxTargets) => {
       const runner: typeof axe = (globalThis as unknown as { axe: typeof axe }).axe;
diff --git a/packages/browser/src/utils/load-playwright.ts b/packages/browser/src/utils/load-playwright.ts
new file mode 100644
index 000000000..7a67dfce2
--- /dev/null
+++ b/packages/browser/src/utils/load-playwright.ts
@@ -0,0 +1,22 @@
+import type * as PlaywrightCore from "playwright-core";
+
+const isModuleNotFoundError = (error: unknown): boolean =>
+  error instanceof Error &&
+  "code" in error &&
+  (error.code === "ERR_MODULE_NOT_FOUND" || error.code === "MODULE_NOT_FOUND");
+
+// playwright-core is heavy and optional, so it's imported on demand here — the
+// one place a missing install becomes an actionable hint instead of a raw
+// module-not-found stack. Keeping it dynamic also keeps it out of any bundle
+// that statically imports this package: it loads only when a browser command
+// actually attaches to Chrome.
+export const loadPlaywright = async (): Promise<typeof PlaywrightCore> => {
+  try {
+    return await import("playwright-core");
+  } catch (error: unknown) {
+    if (!isModuleNotFoundError(error)) throw error;
+    throw new Error(
+      "The browser tools need playwright-core, which isn't installed. Install it with `npm i -D playwright-core`, then retry.",
+    );
+  }
+};
diff --git a/packages/mcp/src/constants.ts b/packages/mcp/src/constants.ts
index ee00c36c2..63e2f0941 100644
--- a/packages/mcp/src/constants.ts
+++ b/packages/mcp/src/constants.ts
@@ -9,10 +9,6 @@ export const DEFAULT_CDP_ENDPOINT_HINT = "http://127.0.0.1:9222";
 // summary still reports the full counts and a `truncated` flag.
 export const MAX_INLINE_DIAGNOSTICS = 100;
 
-// Upper bound on an emulated viewport dimension, so a typo can't push an
-// absurd device-metrics override into CDP (mirrors the CLI's --viewport guard).
-export const MAX_VIEWPORT_PX = 10_000;
-
 // Cap on the debug tools' HTTP calls to the local log server, so a hung or
 // unresponsive endpoint can't block the MCP tool turn indefinitely.
 export const DEBUG_FETCH_TIMEOUT_MS = 5000;
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index 371dbf2cc..198afeca1 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -2,8 +2,8 @@ import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import type { BrowserSession } from "@react-doctor/browser";
 import { z } from "zod";
+import { parseViewport } from "@react-doctor/browser";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
-import { parseViewport } from "../utils/parse-viewport.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
 
diff --git a/packages/mcp/src/tools/debug.ts b/packages/mcp/src/tools/debug.ts
index 734c1503d..539768a91 100644
--- a/packages/mcp/src/tools/debug.ts
+++ b/packages/mcp/src/tools/debug.ts
@@ -20,7 +20,9 @@ const liveServers = new Set<Server>();
 const mintedEndpoints = new Set<string>();
 
 const unknownEndpointError = (): CallToolResult =>
-  errorResult("Unknown log endpoint — call debug_serve first to get the endpoint to read or clear.");
+  errorResult(
+    "Unknown log endpoint — call debug_serve first to get the endpoint to read or clear.",
+  );
 
 let cleanupRegistered = false;
 const trackLogServer = (logServer: Server): void => {
diff --git a/packages/mcp/src/utils/load-browser.ts b/packages/mcp/src/utils/load-browser.ts
deleted file mode 100644
index 4aad547ad..000000000
--- a/packages/mcp/src/utils/load-browser.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import type * as BrowserModule from "@react-doctor/browser";
-
-const isModuleNotFoundError = (error: unknown): boolean =>
-  error instanceof Error &&
-  "code" in error &&
-  (error.code === "ERR_MODULE_NOT_FOUND" || error.code === "MODULE_NOT_FOUND");
-
-// playwright-core is heavy and optional, so the browser package is loaded on
-// demand: `doctor_scan` works without it, and the browser tools turn a missing
-// install into an actionable message instead of a stack trace.
-export const loadBrowser = async (): Promise<typeof BrowserModule> => {
-  try {
-    return await import("@react-doctor/browser");
-  } catch (error: unknown) {
-    if (!isModuleNotFoundError(error)) throw error;
-    throw new Error(
-      "The browser tools need playwright-core, which isn't installed. Install it with `npm i -D playwright-core`, then retry.",
-    );
-  }
-};
diff --git a/packages/mcp/src/utils/with-session.ts b/packages/mcp/src/utils/with-session.ts
index 0933d9a96..2ca81615a 100644
--- a/packages/mcp/src/utils/with-session.ts
+++ b/packages/mcp/src/utils/with-session.ts
@@ -1,5 +1,4 @@
-import type { BrowserSession, Viewport } from "@react-doctor/browser";
-import { loadBrowser } from "./load-browser.js";
+import { BrowserSession, type Viewport } from "@react-doctor/browser";
 
 export interface BrowserToolConnection {
   cdp?: string;
@@ -10,12 +9,13 @@ export interface BrowserToolConnection {
 // Attach a fresh session per tool call, act, then disconnect. The page lives in
 // the browser (attached over CDP), so each call is cheap and the page persists
 // across calls — the same persistent model the CLI's `browser` commands use.
+// playwright-core loads lazily inside the session, so a missing install surfaces
+// the browser package's own actionable hint (caught by `runTool`).
 export const withSession = async <ResultType>(
   connection: BrowserToolConnection,
   useSession: (session: BrowserSession) => Promise<ResultType>,
 ): Promise<ResultType> => {
-  const { BrowserSession: Session } = await loadBrowser();
-  const session = await Session.attach({
+  const session = await BrowserSession.attach({
     cdpEndpoint: connection.cdp,
     launch: connection.noLaunch === true ? false : undefined,
   });
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 7b1017c4e..e55f86315 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -1,10 +1,10 @@
-import type {
-  AccessibilityViolation,
+import {
   BrowserSession,
-  ConsoleMessageEntry,
-  NetworkRequestEntry,
-  PerformanceReport,
-  Viewport,
+  type AccessibilityViolation,
+  type ConsoleMessageEntry,
+  type NetworkRequestEntry,
+  type PerformanceReport,
+  type Viewport,
 } from "@react-doctor/browser";
 import { DEFAULT_SCREENSHOT_FILENAME, METRIC } from "../utils/constants.js";
 import { cliLogger as logger } from "../utils/cli-logger.js";
@@ -17,31 +17,14 @@ export interface BrowserCommandOptions {
   viewport?: Viewport;
 }
 
-const isModuleNotFoundError = (error: unknown): boolean =>
-  error instanceof Error &&
-  "code" in error &&
-  (error.code === "ERR_MODULE_NOT_FOUND" || error.code === "MODULE_NOT_FOUND");
-
-// playwright-core is heavy and only the browser jobs need it, so it's an
-// optional dependency loaded on demand. A missing install becomes an actionable
-// hint; any other failure (a real bug in the browser package) rethrows as-is.
-const loadBrowser = async (): Promise<typeof import("@react-doctor/browser")> => {
-  try {
-    return await import("@react-doctor/browser");
-  } catch (error: unknown) {
-    if (!isModuleNotFoundError(error)) throw error;
-    throw new Error(
-      "The browser tools need playwright-core, which isn't installed. Install it with `npm i -D playwright-core`, then retry.",
-    );
-  }
-};
-
+// playwright-core loads lazily inside @react-doctor/browser (only when a command
+// attaches to Chrome), so importing the session here costs nothing at startup
+// and a missing install surfaces the package's own actionable hint.
 const withSession = async (
   options: BrowserCommandOptions,
   useSession: (session: BrowserSession) => Promise<void>,
 ): Promise<void> => {
-  const { BrowserSession: Session } = await loadBrowser();
-  const session = await Session.attach({ cdpEndpoint: options.cdp, launch: options.launch });
+  const session = await BrowserSession.attach({ cdpEndpoint: options.cdp, launch: options.launch });
   try {
     if (options.viewport) await session.setViewport(options.viewport);
     await useSession(session);
diff --git a/packages/react-doctor/src/cli/utils/constants.ts b/packages/react-doctor/src/cli/utils/constants.ts
index 6ebf6efac..d76319f29 100644
--- a/packages/react-doctor/src/cli/utils/constants.ts
+++ b/packages/react-doctor/src/cli/utils/constants.ts
@@ -30,10 +30,6 @@ export const GIT_HOOK_EXECUTABLE_MODE = 0o755;
 // Default output path for `browser screenshot` when `--out` is omitted.
 export const DEFAULT_SCREENSHOT_FILENAME = "react-doctor-screenshot.png";
 
-// Upper bound on an emulated `--viewport` dimension, so a typo can't push an
-// absurd device-metrics override into CDP.
-export const MAX_VIEWPORT_PX = 10_000;
-
 export const AGENT_HOOK_TIMEOUT_SECONDS = 120;
 
 // Hard cap on the `gh repo view` default-branch probe. A healthy gh answers
diff --git a/packages/react-doctor/src/cli/utils/parse-viewport.ts b/packages/react-doctor/src/cli/utils/parse-viewport.ts
index c60265dae..2b73515b2 100644
--- a/packages/react-doctor/src/cli/utils/parse-viewport.ts
+++ b/packages/react-doctor/src/cli/utils/parse-viewport.ts
@@ -1,18 +1,13 @@
-import type { Viewport } from "@react-doctor/browser";
+import { parseViewport as parseViewportValue, type Viewport } from "@react-doctor/browser";
 import { InvalidArgumentError } from "commander";
-import { MAX_VIEWPORT_PX } from "./constants.js";
 
-// Throws Commander's InvalidArgumentError so a bad `--viewport WIDTHxHEIGHT`
-// value renders as a clean usage error rather than a crash report.
+// Reuse the browser package's pure parser, rethrowing as Commander's
+// InvalidArgumentError so a bad `--viewport WIDTHxHEIGHT` renders as a clean
+// usage error rather than a crash report.
 export const parseViewport = (value: string): Viewport => {
-  const match = /^(\d+)x(\d+)$/i.exec(value.trim());
-  const width = match ? Number(match[1]) : 0;
-  const height = match ? Number(match[2]) : 0;
-  if (!match || width === 0 || height === 0) {
-    throw new InvalidArgumentError(`Use WIDTHxHEIGHT in pixels, e.g. 390x844 (got "${value}").`);
+  try {
+    return parseViewportValue(value);
+  } catch (error: unknown) {
+    throw new InvalidArgumentError(error instanceof Error ? error.message : String(error));
   }
-  if (width > MAX_VIEWPORT_PX || height > MAX_VIEWPORT_PX) {
-    throw new InvalidArgumentError(`Viewport dimensions must be at most ${MAX_VIEWPORT_PX}px.`);
-  }
-  return { width, height };
 };

From c51eed33d635d45910fd1db9178c2120ab63a2f6 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Mon, 22 Jun 2026 17:18:18 -0700
Subject: [PATCH 08/38] feat(browser): add combined React + CPU profiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Record one `browser profile` pass that returns both a React render
profile (slowest commits, hottest components by self time, unnecessary
re-render counts) and a V8 CPU profile (functions ranked by self time,
via the DevTools sampler over CDP) as JSON for agents to consume — no
files written. Navigation runs before the profiler attaches so it binds
to the final document's renderer; both lenses cover the post-load +
interaction window. Surfaced through `BrowserSession.profile()`, the
`browser_profile` MCP tool, and the `react-doctor browser profile` CLI
command.
---
 .agents/skills/react-doctor/SKILL.md          |  16 ++-
 .../react-doctor/references/performance.md    |  11 +-
 .changeset/react-browser-debug-skill.md       |   2 +-
 packages/browser/src/analyze-cpu-profile.ts   |  97 +++++++++++++
 packages/browser/src/constants.ts             |  17 +++
 .../src/react-profiler/analyze-profile.ts     | 113 +++++++++++++++
 packages/browser/src/session.ts               |  54 +++++++
 packages/browser/src/types.ts                 |  64 +++++++++
 packages/browser/src/utils/round.ts           |   3 +
 .../browser/tests/analyze-cpu-profile.test.ts |  75 ++++++++++
 .../react-profiler/analyze-profile.test.ts    | 134 ++++++++++++++++++
 packages/mcp/src/tools/browser.ts             |  36 ++++-
 packages/mcp/tests/server.test.ts             |   1 +
 .../react-doctor/src/cli/commands/browser.ts  |  62 +++++++-
 packages/react-doctor/src/cli/index.ts        |  13 ++
 skills/react-doctor/SKILL.md                  |  16 ++-
 skills/react-doctor/references/performance.md |  11 +-
 17 files changed, 712 insertions(+), 13 deletions(-)
 create mode 100644 packages/browser/src/analyze-cpu-profile.ts
 create mode 100644 packages/browser/src/react-profiler/analyze-profile.ts
 create mode 100644 packages/browser/src/utils/round.ts
 create mode 100644 packages/browser/tests/analyze-cpu-profile.test.ts
 create mode 100644 packages/browser/tests/react-profiler/analyze-profile.test.ts

diff --git a/.agents/skills/react-doctor/SKILL.md b/.agents/skills/react-doctor/SKILL.md
index e85b926fa..5778fc388 100644
--- a/.agents/skills/react-doctor/SKILL.md
+++ b/.agents/skills/react-doctor/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: react-doctor
 description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
-version: "1.5.0"
+version: "1.6.0"
 ---
 
 # React Doctor
@@ -30,7 +30,7 @@ Apply these on every React edit, before any tool runs. They shape how you write
 | Signal                                                  | Job        | What it does                    |
 | ------------------------------------------------------- | ---------- | ------------------------------- |
 | "review", "before commit", "clean up", or changed files | **doctor** | static scan plus 0 to 100 score |
-| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React DevTools profiler harness |
+| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React render + CPU profilers    |
 | "broken", "crashes", "doesn't work" in the UI           | **debug**  | reproduce in a real browser     |
 | "looks off", "polish", a screenshot or pasted element   | **design** | measured UI review              |
 
@@ -41,10 +41,20 @@ doctor runs from code alone, so it is the one that fires in the background. The
 debug, design, and perf need a real Chrome. Two ways to get one:
 
 1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, and `perf` (long animation frames with per-script attribution).
+2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, `perf` (long animation frames with per-script attribution), `profile` (one recording with both lenses — a React render profile of slowest commits, hottest components, and unnecessary re-renders, plus a V8/DevTools CPU profile over CDP with the hottest JS functions ranked by self time), and `report` (every signal in one load).
 
 It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
 
+## Run as an MCP server
+
+React Doctor ships its own Model Context Protocol server over stdio so any MCP-capable agent can call the jobs directly:
+
+```bash
+npx react-doctor@latest mcp
+```
+
+It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`, `browser_audit`, `browser_console`, `browser_network`, `browser_perf`, `browser_profile`, `browser_report`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_profile` records both a React render profile and a literal Chrome DevTools CPU profile in one pass.
+
 ## doctor: scan and triage
 
 After making React changes, run a regression check and confirm the score did not drop:
diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
index e0dc85c79..01ace8d41 100644
--- a/.agents/skills/react-doctor/references/performance.md
+++ b/.agents/skills/react-doctor/references/performance.md
@@ -36,7 +36,14 @@ npx react-doctor browser open http://localhost:3000
 
 For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
 
-Drive it through `browser eval` (the Playwright `page` is in scope). `stop()` returns a JSON profiling export and resolves to `null` when nothing was recorded (a production React build records no profiling data):
+The fastest path is `browser profile`: one recording, both lenses. It returns `react` (slowest commits, components that render most/cost the most self time, and the count of unnecessary re-renders — components that re-rendered with nothing they own changed, the memoization candidates) and `cpu` (a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time):
+
+```bash
+npx react-doctor browser profile http://localhost:3000 --interaction 'page.getByText("Next").click()'
+# omit the url to profile a page already opened with `browser open`
+```
+
+The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` (the Playwright `page` is in scope):
 
 ```bash
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
@@ -44,7 +51,7 @@ npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start()
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
 ```
 
-Aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates).
+Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser profile` computes for you.
 
 ## 5. Fix, only with proof
 
diff --git a/.changeset/react-browser-debug-skill.md b/.changeset/react-browser-debug-skill.md
index 032461179..c38fa5aa7 100644
--- a/.changeset/react-browser-debug-skill.md
+++ b/.changeset/react-browser-debug-skill.md
@@ -2,4 +2,4 @@
 "react-doctor": minor
 ---
 
-Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback) for accessibility audits, console/network capture, performance traces with React DevTools profiling, snapshots, and screenshots. `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools, and the `debug_*` log server directly.
+Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback) for accessibility audits, console/network capture, performance traces, profiling (`browser profile` records both a React render profile — slowest commits, hottest components by self time, unnecessary re-render counts — and a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time, in one pass), snapshots, and screenshots. `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools (including `browser_profile`, which captures the React render profile and the CPU profile together), and the `debug_*` log server directly.
diff --git a/packages/browser/src/analyze-cpu-profile.ts b/packages/browser/src/analyze-cpu-profile.ts
new file mode 100644
index 000000000..b61999abc
--- /dev/null
+++ b/packages/browser/src/analyze-cpu-profile.ts
@@ -0,0 +1,97 @@
+import { MAX_PROFILE_FUNCTIONS } from "./constants.js";
+import type { CpuProfileAnalysis } from "./types.js";
+import { roundToHundredths } from "./utils/round.js";
+
+interface CpuProfileCallFrame {
+  functionName: string;
+  url: string;
+  lineNumber: number;
+}
+
+interface CpuProfileNode {
+  id: number;
+  callFrame: CpuProfileCallFrame;
+  hitCount?: number;
+}
+
+// The shape of `Profiler.stop`'s `profile` (a structural subset of CDP's
+// Protocol.Profiler.Profile), the same JSON DevTools writes to a `.cpuprofile`.
+export interface CdpCpuProfile {
+  nodes: CpuProfileNode[];
+  startTime: number;
+  endTime: number;
+  samples?: number[];
+  timeDeltas?: number[];
+}
+
+// A function's display key: V8's synthetic frames ("(idle)", "(program)",
+// "(garbage collector)", "(root)") have no url and are kept as-is so the
+// percentages still add up to the wall time the profile covered.
+const labelFor = (callFrame: CpuProfileCallFrame): { name: string; url: string | null } => {
+  const name = callFrame.functionName || "(anonymous)";
+  const url = callFrame.url ? `${callFrame.url}:${callFrame.lineNumber + 1}` : null;
+  return { name, url };
+};
+
+// Fold a CDP CPU profile into self-time-per-function: each sample attributes its
+// paired time delta to the function on top of the stack at that sample. This
+// approximates the self-time DevTools' bottom-up view shows — where JS wall time
+// went (attribution can shift by up to one sample interval) — without the raw
+// node tree. Totals still sum to the wall time the profile covered.
+export const analyzeCpuProfile = (profile: CdpCpuProfile): CpuProfileAnalysis => {
+  const durationMs = (profile.endTime - profile.startTime) / 1000;
+  const samples = profile.samples ?? [];
+  const timeDeltas = profile.timeDeltas ?? [];
+
+  const nodeById = new Map<number, CpuProfileNode>();
+  for (const node of profile.nodes) nodeById.set(node.id, node);
+
+  // Accumulate self time (microseconds) by function key, summing same-named
+  // frames so a function split across optimization tiers reads as one row.
+  interface SelfTimeAccumulator {
+    functionName: string;
+    url: string | null;
+    selfUs: number;
+  }
+  const accumulatorByKey = new Map<string, SelfTimeAccumulator>();
+  const addSelfTime = (node: CpuProfileNode | undefined, microseconds: number): void => {
+    if (!node) return;
+    const { name, url } = labelFor(node.callFrame);
+    const key = `${name}@${url ?? ""}`;
+    const existing = accumulatorByKey.get(key);
+    if (existing) {
+      existing.selfUs += microseconds;
+      return;
+    }
+    accumulatorByKey.set(key, { functionName: name, url, selfUs: microseconds });
+  };
+
+  if (samples.length > 0 && timeDeltas.length === samples.length) {
+    for (let index = 0; index < samples.length; index += 1) {
+      addSelfTime(nodeById.get(samples[index]), timeDeltas[index]);
+    }
+  } else {
+    // No sample stream (rare): fall back to hitCount, scaling the node's share of
+    // total hits across the measured duration.
+    const totalHits = profile.nodes.reduce((sum, node) => sum + (node.hitCount ?? 0), 0) || 1;
+    const durationUs = durationMs * 1000;
+    for (const node of profile.nodes) {
+      addSelfTime(node, ((node.hitCount ?? 0) / totalHits) * durationUs);
+    }
+  }
+
+  const topFunctions = [...accumulatorByKey.values()]
+    .map((accumulator) => {
+      const selfMs = accumulator.selfUs / 1000;
+      return {
+        functionName: accumulator.functionName,
+        url: accumulator.url,
+        selfMs: roundToHundredths(selfMs),
+        selfPercent: durationMs > 0 ? roundToHundredths((selfMs / durationMs) * 100) : 0,
+      };
+    })
+    .sort((a, b) => b.selfMs - a.selfMs)
+    .slice(0, MAX_PROFILE_FUNCTIONS);
+
+  return { durationMs: roundToHundredths(durationMs), sampleCount: samples.length, topFunctions };
+};
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 288a00f84..3bd86d5de 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -46,6 +46,23 @@ export const MAX_VIOLATION_TARGETS = 5;
 // device-metrics override into CDP.
 export const MAX_VIEWPORT_PX = 10_000;
 
+// After driving a profiled interaction, wait this long for React's commits to
+// flush (concurrent renders land asynchronously) before stopping the recording.
+export const REACT_PROFILE_FLUSH_MS = 500;
+
+// Caps on what a profile analysis returns inline, so a long recording stays a
+// readable result rather than a dump keyed by thousands of fibers. The summary
+// counts still reflect everything recorded.
+export const MAX_PROFILE_COMPONENTS = 20;
+export const MAX_PROFILE_COMMITS = 10;
+export const MAX_COMMIT_COMPONENTS = 8;
+
+// V8 CPU profiler sampling interval, matching Chrome DevTools' default (100us).
+export const DEFAULT_CPU_SAMPLING_INTERVAL_US = 100;
+
+// Functions returned inline by a CPU profile analysis, ranked by self time.
+export const MAX_PROFILE_FUNCTIONS = 20;
+
 // Built React-profiler init script, relative to the bundle that imports it.
 // `react-profiler/inject.ts` is esbuilt into this self-contained IIFE at build
 // time (see vite.config.ts); the session injects it via `addInitScript`. The
diff --git a/packages/browser/src/react-profiler/analyze-profile.ts b/packages/browser/src/react-profiler/analyze-profile.ts
new file mode 100644
index 000000000..894b34379
--- /dev/null
+++ b/packages/browser/src/react-profiler/analyze-profile.ts
@@ -0,0 +1,113 @@
+import {
+  MAX_COMMIT_COMPONENTS,
+  MAX_PROFILE_COMMITS,
+  MAX_PROFILE_COMPONENTS,
+} from "../constants.js";
+import type {
+  ReactComponentRenderStat,
+  ReactProfileAnalysis,
+  ReactProfileCommitStat,
+} from "../types.js";
+import { roundToHundredths } from "../utils/round.js";
+import type {
+  ReactProfilerChangeDescription,
+  ReactProfilerDataExport,
+} from "./types/profiling-export.js";
+
+// A render is "wasted" when the component re-rendered without anything it owns
+// changing — not its first mount, no hook/state/props/context change — i.e. it
+// rendered only because a parent did. These are the memo / useCallback targets.
+const isUnnecessaryRender = (change: ReactProfilerChangeDescription): boolean => {
+  if (change.isFirstMount || change.didHooksChange) return false;
+  const changedContext = Array.isArray(change.context)
+    ? change.context.length > 0
+    : Boolean(change.context);
+  const changedProps = change.props?.length ?? 0;
+  const changedState = change.state?.length ?? 0;
+  return changedProps === 0 && changedState === 0 && !changedContext;
+};
+
+// Fold the DevTools profiling export — per-root commits keyed by fiber id — into
+// a component-level summary an agent can act on: which components render most and
+// cost the most self-time, which commits were the slowest, and how many renders
+// were wasted (re-rendered with nothing they own changed).
+export const analyzeReactProfile = (data: ReactProfilerDataExport): ReactProfileAnalysis => {
+  const componentStats = new Map<string, ReactComponentRenderStat>();
+  const commitStats: ReactProfileCommitStat[] = [];
+  let totalCommitDurationMs = 0;
+  let unnecessaryRenderCount = 0;
+  let commitIndex = 0;
+
+  const statFor = (name: string): ReactComponentRenderStat => {
+    const existing = componentStats.get(name);
+    if (existing) return existing;
+    const created: ReactComponentRenderStat = {
+      name,
+      renderCount: 0,
+      totalSelfMs: 0,
+      totalActualMs: 0,
+      maxSelfMs: 0,
+      unnecessaryRenderCount: 0,
+    };
+    componentStats.set(name, created);
+    return created;
+  };
+
+  for (const root of data.dataForRoots) {
+    const nameByFiber = new Map<number, string>(root.elementNames);
+    const nameFor = (fiberId: number): string => nameByFiber.get(fiberId) ?? `#${fiberId}`;
+
+    for (const commit of root.commitData) {
+      totalCommitDurationMs += commit.duration;
+      const actualByFiber = new Map<number, number>(commit.fiberActualDurations);
+      const changeByFiber = new Map(commit.changeDescriptions ?? []);
+      const selfByName = new Map<string, number>();
+
+      for (const [fiberId, selfMs] of commit.fiberSelfDurations) {
+        const name = nameFor(fiberId);
+        const stat = statFor(name);
+        stat.renderCount += 1;
+        stat.totalSelfMs += selfMs;
+        stat.totalActualMs += actualByFiber.get(fiberId) ?? 0;
+        stat.maxSelfMs = Math.max(stat.maxSelfMs, selfMs);
+        selfByName.set(name, (selfByName.get(name) ?? 0) + selfMs);
+
+        const change = changeByFiber.get(fiberId);
+        if (change && isUnnecessaryRender(change)) {
+          stat.unnecessaryRenderCount += 1;
+          unnecessaryRenderCount += 1;
+        }
+      }
+
+      const components = [...selfByName.entries()]
+        .sort(([, a], [, b]) => b - a)
+        .slice(0, MAX_COMMIT_COMPONENTS)
+        .map(([name]) => name);
+      commitStats.push({ commitIndex, durationMs: roundToHundredths(commit.duration), components });
+      commitIndex += 1;
+    }
+  }
+
+  const topComponents = [...componentStats.values()]
+    .sort((a, b) => b.totalSelfMs - a.totalSelfMs)
+    .slice(0, MAX_PROFILE_COMPONENTS)
+    .map((stat) => ({
+      ...stat,
+      totalSelfMs: roundToHundredths(stat.totalSelfMs),
+      totalActualMs: roundToHundredths(stat.totalActualMs),
+      maxSelfMs: roundToHundredths(stat.maxSelfMs),
+    }));
+
+  const slowestCommits = [...commitStats]
+    .sort((a, b) => b.durationMs - a.durationMs)
+    .slice(0, MAX_PROFILE_COMMITS);
+
+  return {
+    rootCount: data.dataForRoots.length,
+    commitCount: commitIndex,
+    totalCommitDurationMs: roundToHundredths(totalCommitDurationMs),
+    unnecessaryRenderCount,
+    topComponents,
+    slowestCommits,
+  };
+};
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 3189a976b..59118705a 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -2,14 +2,18 @@ import { readFile } from "node:fs/promises";
 import { fileURLToPath } from "node:url";
 import type { Browser, CDPSession, ConsoleMessage, Page, Request, Response } from "playwright-core";
 import { connectToBrowser, type BrowserConnection } from "./connect.js";
+import { analyzeCpuProfile } from "./analyze-cpu-profile.js";
 import {
+  DEFAULT_CPU_SAMPLING_INTERVAL_US,
   MAX_VIOLATION_TARGETS,
   NAVIGATION_TIMEOUT_MS,
   PERFORMANCE_OBSERVE_WINDOW_MS,
+  REACT_PROFILE_FLUSH_MS,
   REACT_PROFILER_INJECT_FILE,
   SETTLE_TIMEOUT_MS,
 } from "./constants.js";
 import { collectPerformanceReport } from "./perf-observer.js";
+import { analyzeReactProfile } from "./react-profiler/analyze-profile.js";
 import type {
   AccessibilityViolation,
   BrowserConnectOptions,
@@ -17,8 +21,11 @@ import type {
   NetworkRequestEntry,
   PageInspection,
   PerformanceReport,
+  ProfileAnalysis,
+  ProfileOptions,
   Viewport,
 } from "./types.js";
+import { delay } from "./utils/delay.js";
 
 // Which signals to collect during a single capture load. Listeners and the perf
 // observers all attach before one navigation, so any combination costs one load.
@@ -312,6 +319,53 @@ export class BrowserSession {
     return this.measureCurrentPerformance();
   }
 
+  // Profile a page with both lenses in one recording: the V8 CPU profiler (the
+  // literal Chrome DevTools profiler, over CDP) and the React DevTools render
+  // profiler. Navigation happens first: a top-level navigation can swap renderer
+  // processes, so a profiler must attach to the *final* document — and the React
+  // profiler can only start once renderers are attached. Both lenses therefore
+  // cover the same window: post-load plus whatever `interaction` drives. React
+  // data is null on a production build or a page not opened with the profiler.
+  async profile(options: ProfileOptions = {}): Promise<ProfileAnalysis> {
+    if (options.url) {
+      await this.openWithReactProfiler(options.url);
+    } else {
+      await this.settle();
+    }
+
+    const cdpSession = await this.page.context().newCDPSession(this.page);
+    try {
+      await cdpSession.send("Profiler.enable");
+      await cdpSession.send("Profiler.setSamplingInterval", {
+        interval: options.samplingIntervalUs ?? DEFAULT_CPU_SAMPLING_INTERVAL_US,
+      });
+      await cdpSession.send("Profiler.start");
+
+      const reactStarted = await this.page.evaluate(() => {
+        if (!globalThis.__REACT_PERF__) return false;
+        globalThis.__REACT_PERF__.start();
+        return true;
+      });
+      if (options.interaction) await this.evaluate(options.interaction);
+      // Let React's commits flush (concurrent renders land async) and any
+      // interaction-triggered work run; skip the idle wait when neither applies.
+      if (reactStarted || options.interaction) await delay(REACT_PROFILE_FLUSH_MS);
+      const reactExport = reactStarted
+        ? await this.page.evaluate(() => globalThis.__REACT_PERF__?.stop() ?? null)
+        : null;
+
+      const { profile } = await cdpSession.send("Profiler.stop");
+
+      return {
+        react: reactExport ? analyzeReactProfile(reactExport) : null,
+        cpu: analyzeCpuProfile(profile),
+      };
+    } finally {
+      await cdpSession.send("Profiler.disable").catch(() => {});
+      await cdpSession.detach().catch(() => {});
+    }
+  }
+
   async inspectPage(url?: string): Promise<PageInspection> {
     const capture = await this.runCapture(url, {
       console: true,
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
index 865e5bb54..70272dd76 100644
--- a/packages/browser/src/types.ts
+++ b/packages/browser/src/types.ts
@@ -60,3 +60,67 @@ export interface PageInspection {
   performance: PerformanceReport;
   accessibility: AccessibilityViolation[];
 }
+
+export interface ProfileOptions {
+  // Load this URL with the React profiler injected before recording. Omit to
+  // profile a page already opened with the profiler (`browser open`).
+  url?: string;
+  // Playwright expression (the `page` is in scope) driven while recording, so the
+  // renders and CPU work it triggers are captured. Omit to profile post-load.
+  interaction?: string;
+  // V8 CPU sampling interval in microseconds (default like DevTools, 100us).
+  samplingIntervalUs?: number;
+}
+
+export interface ReactComponentRenderStat {
+  name: string;
+  renderCount: number;
+  totalSelfMs: number;
+  totalActualMs: number;
+  maxSelfMs: number;
+  // Renders where nothing this component owns changed — not a first mount, no
+  // hook/state/props/context change — so it re-rendered only because a parent
+  // did. These are the memo / useCallback / context-split targets.
+  unnecessaryRenderCount: number;
+}
+
+export interface ReactProfileCommitStat {
+  commitIndex: number;
+  durationMs: number;
+  // Components that rendered in this commit, slowest self-time first.
+  components: string[];
+}
+
+export interface ReactProfileAnalysis {
+  rootCount: number;
+  commitCount: number;
+  totalCommitDurationMs: number;
+  // Total wasted renders across all components (see ReactComponentRenderStat).
+  unnecessaryRenderCount: number;
+  topComponents: ReactComponentRenderStat[];
+  slowestCommits: ReactProfileCommitStat[];
+}
+
+export interface CpuProfileFunctionStat {
+  functionName: string;
+  // Source `url:line` (1-based), or null for V8 synthetic frames ((idle), etc.).
+  url: string | null;
+  selfMs: number;
+  selfPercent: number;
+}
+
+export interface CpuProfileAnalysis {
+  durationMs: number;
+  sampleCount: number;
+  // Functions ranked by self time — where JS wall time actually went, the same
+  // signal as DevTools' bottom-up view.
+  topFunctions: CpuProfileFunctionStat[];
+}
+
+// One recording, both lenses: the React render profile (which components
+// re-rendered and why) and the V8 CPU profile (which JS functions cost time).
+// `react` is null on a production React build or a page without the profiler.
+export interface ProfileAnalysis {
+  react: ReactProfileAnalysis | null;
+  cpu: CpuProfileAnalysis;
+}
diff --git a/packages/browser/src/utils/round.ts b/packages/browser/src/utils/round.ts
new file mode 100644
index 000000000..84a4b459f
--- /dev/null
+++ b/packages/browser/src/utils/round.ts
@@ -0,0 +1,3 @@
+// Round a millisecond/percent value to two decimals, so profile analyses read
+// cleanly instead of carrying float noise like 3.1999999999998.
+export const roundToHundredths = (value: number): number => Math.round(value * 100) / 100;
diff --git a/packages/browser/tests/analyze-cpu-profile.test.ts b/packages/browser/tests/analyze-cpu-profile.test.ts
new file mode 100644
index 000000000..cd2a23455
--- /dev/null
+++ b/packages/browser/tests/analyze-cpu-profile.test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, it } from "vite-plus/test";
+import { analyzeCpuProfile, type CdpCpuProfile } from "../src/analyze-cpu-profile.js";
+
+const frame = (
+  functionName: string,
+  url = "",
+  lineNumber = 0,
+): CdpCpuProfile["nodes"][number]["callFrame"] => ({
+  functionName,
+  url,
+  lineNumber,
+});
+
+describe("analyzeCpuProfile", () => {
+  it("attributes self time per function from samples and time deltas", () => {
+    const profile: CdpCpuProfile = {
+      startTime: 0,
+      endTime: 10_000,
+      nodes: [
+        { id: 1, callFrame: frame("(root)") },
+        { id: 2, callFrame: frame("render", "app.js", 41) },
+        { id: 3, callFrame: frame("parse", "lib.js", 9) },
+      ],
+      samples: [2, 3, 2],
+      timeDeltas: [4000, 3000, 3000],
+    };
+
+    const analysis = analyzeCpuProfile(profile);
+    expect(analysis.durationMs).toBe(10);
+    expect(analysis.sampleCount).toBe(3);
+
+    const render = analysis.topFunctions[0];
+    expect(render?.functionName).toBe("render");
+    expect(render?.url).toBe("app.js:42");
+    expect(render?.selfMs).toBe(7); // 4000us + 3000us
+    expect(render?.selfPercent).toBe(70);
+
+    const parse = analysis.topFunctions[1];
+    expect(parse?.functionName).toBe("parse");
+    expect(parse?.selfMs).toBe(3);
+  });
+
+  it("labels anonymous functions and null urls for synthetic frames", () => {
+    const profile: CdpCpuProfile = {
+      startTime: 0,
+      endTime: 2000,
+      nodes: [
+        { id: 1, callFrame: frame("") },
+        { id: 2, callFrame: frame("(idle)") },
+      ],
+      samples: [1, 2],
+      timeDeltas: [1000, 1000],
+    };
+    const analysis = analyzeCpuProfile(profile);
+    const anon = analysis.topFunctions.find((stat) => stat.functionName === "(anonymous)");
+    const idle = analysis.topFunctions.find((stat) => stat.functionName === "(idle)");
+    expect(anon?.url).toBeNull();
+    expect(idle?.url).toBeNull();
+  });
+
+  it("falls back to hitCount when no sample stream is present", () => {
+    const profile: CdpCpuProfile = {
+      startTime: 0,
+      endTime: 4000,
+      nodes: [
+        { id: 1, callFrame: frame("a", "a.js", 0), hitCount: 3 },
+        { id: 2, callFrame: frame("b", "b.js", 0), hitCount: 1 },
+      ],
+    };
+    const analysis = analyzeCpuProfile(profile);
+    expect(analysis.sampleCount).toBe(0);
+    expect(analysis.topFunctions[0]?.functionName).toBe("a");
+    expect(analysis.topFunctions[0]?.selfMs).toBe(3); // 3/4 of 4ms
+  });
+});
diff --git a/packages/browser/tests/react-profiler/analyze-profile.test.ts b/packages/browser/tests/react-profiler/analyze-profile.test.ts
new file mode 100644
index 000000000..3faa13abb
--- /dev/null
+++ b/packages/browser/tests/react-profiler/analyze-profile.test.ts
@@ -0,0 +1,134 @@
+import { describe, expect, it } from "vite-plus/test";
+import { analyzeReactProfile } from "../../src/react-profiler/analyze-profile.js";
+import type {
+  ReactProfilerCommitDataExport,
+  ReactProfilerDataExport,
+} from "../../src/react-profiler/types/profiling-export.js";
+
+const commit = (
+  overrides: Partial<ReactProfilerCommitDataExport> = {},
+): ReactProfilerCommitDataExport => ({
+  changeDescriptions: null,
+  duration: 1,
+  effectDuration: null,
+  fiberActualDurations: [],
+  fiberSelfDurations: [],
+  passiveEffectDuration: null,
+  priorityLevel: "Normal",
+  timestamp: 0,
+  updaters: null,
+  ...overrides,
+});
+
+const exportWith = (commits: ReactProfilerCommitDataExport[]): ReactProfilerDataExport => ({
+  version: 5,
+  dataForRoots: [
+    {
+      rootID: 1,
+      displayName: "App",
+      commitData: commits,
+      initialTreeBaseDurations: [],
+      elementNames: [
+        [2, "List"],
+        [3, "Row"],
+      ],
+    },
+  ],
+});
+
+describe("analyzeReactProfile", () => {
+  it("aggregates self-time, render counts, and slowest commits per component", () => {
+    const analysis = analyzeReactProfile(
+      exportWith([
+        commit({
+          duration: 5,
+          fiberSelfDurations: [
+            [2, 3],
+            [3, 2],
+          ],
+          fiberActualDurations: [
+            [2, 5],
+            [3, 2],
+          ],
+        }),
+        commit({ duration: 1, fiberSelfDurations: [[3, 1]], fiberActualDurations: [[3, 1]] }),
+      ]),
+    );
+
+    expect(analysis.rootCount).toBe(1);
+    expect(analysis.commitCount).toBe(2);
+    expect(analysis.totalCommitDurationMs).toBe(6);
+
+    const list = analysis.topComponents.find((stat) => stat.name === "List");
+    const row = analysis.topComponents.find((stat) => stat.name === "Row");
+    expect(list).toMatchObject({ renderCount: 1, totalSelfMs: 3, maxSelfMs: 3 });
+    expect(row).toMatchObject({ renderCount: 2, totalSelfMs: 3 });
+    // List sorts first: higher self time in a single commit.
+    expect(analysis.topComponents[0]?.name).toBe("List");
+    // Slowest commit first.
+    expect(analysis.slowestCommits[0]?.durationMs).toBe(5);
+    expect(analysis.slowestCommits[0]?.components).toEqual(["List", "Row"]);
+  });
+
+  it("counts a render with no owned change as unnecessary", () => {
+    const analysis = analyzeReactProfile(
+      exportWith([
+        commit({
+          duration: 2,
+          fiberSelfDurations: [[3, 2]],
+          changeDescriptions: [
+            [
+              3,
+              { context: null, didHooksChange: false, isFirstMount: false, props: [], state: [] },
+            ],
+          ],
+        }),
+      ]),
+    );
+    expect(analysis.unnecessaryRenderCount).toBe(1);
+    expect(analysis.topComponents.find((stat) => stat.name === "Row")?.unnecessaryRenderCount).toBe(
+      1,
+    );
+  });
+
+  it("does not flag a first mount or a real prop/state/hook/context change", () => {
+    const analysis = analyzeReactProfile(
+      exportWith([
+        commit({
+          fiberSelfDurations: [
+            [2, 1],
+            [3, 1],
+          ],
+          changeDescriptions: [
+            [
+              2,
+              {
+                context: null,
+                didHooksChange: false,
+                isFirstMount: true,
+                props: null,
+                state: null,
+              },
+            ],
+            [
+              3,
+              {
+                context: null,
+                didHooksChange: false,
+                isFirstMount: false,
+                props: ["value"],
+                state: null,
+              },
+            ],
+          ],
+        }),
+      ]),
+    );
+    expect(analysis.unnecessaryRenderCount).toBe(0);
+  });
+
+  it("falls back to a fiber id when a name is unresolved", () => {
+    const analysis = analyzeReactProfile(exportWith([commit({ fiberSelfDurations: [[99, 1]] })]));
+    expect(analysis.topComponents[0]?.name).toBe("#99");
+  });
+});
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index 198afeca1..3dd4c8820 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -78,7 +78,7 @@ export const registerBrowserTools = (server: McpServer): void => {
     {
       title: "Open a URL with the React profiler",
       description:
-        "Open a URL in the attached Chrome and keep the page, injecting the React DevTools profiler so browser_eval can drive window.__REACT_PERF__ (start()/stop()) for render profiling. Attaches to your running Chrome over CDP, launching a dedicated one only as a fallback.",
+        "Open a URL in the attached Chrome and keep the page, injecting the React DevTools profiler. Use browser_profile for a one-shot record + analysis; for manual control, browser_eval can drive window.__REACT_PERF__ (start()/stop()). Attaches to your running Chrome over CDP, launching a dedicated one only as a fallback.",
       inputSchema: { url: z.string().describe("URL to open"), ...connectionShape },
       annotations: { openWorldHint: true },
     },
@@ -91,6 +91,40 @@ export const registerBrowserTools = (server: McpServer): void => {
       }),
   );
 
+  server.registerTool(
+    "browser_profile",
+    {
+      title: "Profile React renders and CPU in one recording",
+      description:
+        "Record one profile with both lenses and return { react, cpu }. `react`: the React render profile — slowest commits, components that render most/cost the most self time, and unnecessary re-renders (re-rendered with nothing they own changed: memo/useCallback/context targets); null on a production React build. `cpu`: a Chrome DevTools CPU profile (V8 sampler over CDP) with functions ranked by self time (DevTools' bottom-up view). Pass `url` to load and profile a page, and/or `interaction` (a Playwright expression, `page` in scope) to record what it triggers. Omit `url` to profile a page already opened with browser_open.",
+      inputSchema: {
+        url: z
+          .string()
+          .optional()
+          .describe(
+            "URL to load with the profiler before recording; omit to profile the open page",
+          ),
+        interaction: z
+          .string()
+          .optional()
+          .describe(
+            'Playwright expression to drive while recording, e.g. page.getByRole("button").click()',
+          ),
+        ...connectionShape,
+        ...viewportShape,
+      },
+      annotations: { openWorldHint: true },
+    },
+    (args) =>
+      runTool(async () =>
+        jsonResult(
+          await withSession(toConnection(args), (session) =>
+            session.profile({ url: args.url, interaction: args.interaction }),
+          ),
+        ),
+      ),
+  );
+
   server.registerTool(
     "browser_eval",
     {
diff --git a/packages/mcp/tests/server.test.ts b/packages/mcp/tests/server.test.ts
index e35a38dc2..0e9e8f767 100644
--- a/packages/mcp/tests/server.test.ts
+++ b/packages/mcp/tests/server.test.ts
@@ -25,6 +25,7 @@ test("registers the doctor, browser, and debug tools", async () => {
     "browser_network",
     "browser_open",
     "browser_perf",
+    "browser_profile",
     "browser_report",
     "browser_screenshot",
     "browser_snapshot",
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index e55f86315..2ba16406b 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -2,8 +2,10 @@ import {
   BrowserSession,
   type AccessibilityViolation,
   type ConsoleMessageEntry,
+  type CpuProfileAnalysis,
   type NetworkRequestEntry,
   type PerformanceReport,
+  type ReactProfileAnalysis,
   type Viewport,
 } from "@react-doctor/browser";
 import { DEFAULT_SCREENSHOT_FILENAME, METRIC } from "../utils/constants.js";
@@ -15,6 +17,7 @@ export interface BrowserCommandOptions {
   launch?: boolean;
   out?: string;
   viewport?: Viewport;
+  interaction?: string;
 }
 
 // playwright-core loads lazily inside @react-doctor/browser (only when a command
@@ -47,7 +50,7 @@ export const browserOpenAction = async (
     await session.openWithReactProfiler(url);
     logger.success(`Opened ${url}`);
     logger.log(
-      "React profiler ready: `browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'`, run a scenario, then `stop()` for the DevTools profiling export.",
+      "React profiler ready: `browser profile --interaction '...'` for a one-shot record + analysis, or drive it manually with `browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'` then `stop()`.",
     );
     if (session.launched) {
       logger.log(
@@ -138,6 +141,63 @@ const printPerformanceReport = (report: PerformanceReport): void => {
   }
 };
 
+const printReactProfile = (analysis: ReactProfileAnalysis): void => {
+  logger.log(
+    `${analysis.commitCount} commit(s) across ${analysis.rootCount} root(s), ${analysis.totalCommitDurationMs}ms total render time, ${analysis.unnecessaryRenderCount} unnecessary render(s)`,
+  );
+  if (analysis.topComponents.length > 0) {
+    logger.log("Hottest components (self time):");
+    for (const component of analysis.topComponents) {
+      const wasted =
+        component.unnecessaryRenderCount > 0
+          ? `, ${component.unnecessaryRenderCount} unnecessary`
+          : "";
+      logger.log(
+        `  ${component.totalSelfMs}ms  ${component.name} — ${component.renderCount} render(s)${wasted}`,
+      );
+    }
+  }
+  if (analysis.slowestCommits.length > 0) {
+    logger.log("Slowest commits:");
+    for (const commit of analysis.slowestCommits) {
+      logger.log(`  ${commit.durationMs}ms — ${commit.components.join(", ") || "(no components)"}`);
+    }
+  }
+};
+
+const printCpuProfile = (analysis: CpuProfileAnalysis): void => {
+  logger.log(`${analysis.durationMs}ms profiled, ${analysis.sampleCount} sample(s)`);
+  if (analysis.topFunctions.length > 0) {
+    logger.log("Hottest functions (self time):");
+    for (const fn of analysis.topFunctions) {
+      const location = fn.url ? ` — ${fn.url}` : "";
+      logger.log(`  ${fn.selfMs}ms (${fn.selfPercent}%)  ${fn.functionName}${location}`);
+    }
+  }
+};
+
+export const browserProfileAction = async (
+  url: string | undefined,
+  options: BrowserCommandOptions,
+): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.profile" });
+  await withSession(options, async (session) => {
+    const analysis = await session.profile({ url, interaction: options.interaction });
+
+    logger.log("# React renders");
+    if (analysis.react) {
+      printReactProfile(analysis.react);
+    } else {
+      logger.log(
+        "(no React data — needs a development build of React and renders during the recording)",
+      );
+    }
+
+    logger.log("\n# CPU");
+    printCpuProfile(analysis.cpu);
+  });
+};
+
 export const browserAuditAction = async (
   url: string | undefined,
   options: BrowserCommandOptions,
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index 4e536a248..b61be04ef 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -8,6 +8,7 @@ import {
   browserNetworkAction,
   browserOpenAction,
   browserPerfAction,
+  browserProfileAction,
   browserReportAction,
   browserScreenshotAction,
   browserSnapshotAction,
@@ -313,6 +314,18 @@ withRenderOptions(
     ),
 ).action(browserPerfAction);
 
+withRenderOptions(
+  browser
+    .command("profile [url]")
+    .description(
+      "Profile React renders and CPU in one recording: slowest commits, hottest components, unnecessary re-renders, and the hottest JS functions",
+    )
+    .option(
+      "--interaction <expression>",
+      "Playwright expression to drive while recording, e.g. 'page.getByText(\"Next\").click()'",
+    ),
+).action(browserProfileAction);
+
 withRenderOptions(
   browser
     .command("report [url]")
diff --git a/skills/react-doctor/SKILL.md b/skills/react-doctor/SKILL.md
index e85b926fa..5778fc388 100644
--- a/skills/react-doctor/SKILL.md
+++ b/skills/react-doctor/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: react-doctor
 description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
-version: "1.5.0"
+version: "1.6.0"
 ---
 
 # React Doctor
@@ -30,7 +30,7 @@ Apply these on every React edit, before any tool runs. They shape how you write
 | Signal                                                  | Job        | What it does                    |
 | ------------------------------------------------------- | ---------- | ------------------------------- |
 | "review", "before commit", "clean up", or changed files | **doctor** | static scan plus 0 to 100 score |
-| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React DevTools profiler harness |
+| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React render + CPU profilers    |
 | "broken", "crashes", "doesn't work" in the UI           | **debug**  | reproduce in a real browser     |
 | "looks off", "polish", a screenshot or pasted element   | **design** | measured UI review              |
 
@@ -41,10 +41,20 @@ doctor runs from code alone, so it is the one that fires in the background. The
 debug, design, and perf need a real Chrome. Two ways to get one:
 
 1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, and `perf` (long animation frames with per-script attribution).
+2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, `perf` (long animation frames with per-script attribution), `profile` (one recording with both lenses — a React render profile of slowest commits, hottest components, and unnecessary re-renders, plus a V8/DevTools CPU profile over CDP with the hottest JS functions ranked by self time), and `report` (every signal in one load).
 
 It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
 
+## Run as an MCP server
+
+React Doctor ships its own Model Context Protocol server over stdio so any MCP-capable agent can call the jobs directly:
+
+```bash
+npx react-doctor@latest mcp
+```
+
+It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`, `browser_audit`, `browser_console`, `browser_network`, `browser_perf`, `browser_profile`, `browser_report`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_profile` records both a React render profile and a literal Chrome DevTools CPU profile in one pass.
+
 ## doctor: scan and triage
 
 After making React changes, run a regression check and confirm the score did not drop:
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
index e0dc85c79..01ace8d41 100644
--- a/skills/react-doctor/references/performance.md
+++ b/skills/react-doctor/references/performance.md
@@ -36,7 +36,14 @@ npx react-doctor browser open http://localhost:3000
 
 For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
 
-Drive it through `browser eval` (the Playwright `page` is in scope). `stop()` returns a JSON profiling export and resolves to `null` when nothing was recorded (a production React build records no profiling data):
+The fastest path is `browser profile`: one recording, both lenses. It returns `react` (slowest commits, components that render most/cost the most self time, and the count of unnecessary re-renders — components that re-rendered with nothing they own changed, the memoization candidates) and `cpu` (a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time):
+
+```bash
+npx react-doctor browser profile http://localhost:3000 --interaction 'page.getByText("Next").click()'
+# omit the url to profile a page already opened with `browser open`
+```
+
+The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` (the Playwright `page` is in scope):
 
 ```bash
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
@@ -44,7 +51,7 @@ npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start()
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
 ```
 
-Aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates).
+Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser profile` computes for you.
 
 ## 5. Fix, only with proof
 

From a7c4d4a923fa8bb564e5786590c3ea2ddea89f15 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Mon, 22 Jun 2026 17:21:55 -0700
Subject: [PATCH 09/38] fix(cli): allowlist browser profile's --interaction
 flag (bugbot)

The pre-parse strip omitted --interaction, so its Playwright expression
leaked in as a positional and `browser profile --interaction` failed with
too-many-arguments. Add it to BROWSER_FLAG_SPEC with a regression test.
---
 .../src/cli/utils/strip-unknown-cli-flags.ts    | 12 ++++++------
 .../tests/strip-unknown-cli-flags.test.ts       | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
index bece4fa1a..b4e9f7b66 100644
--- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
+++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
@@ -100,14 +100,14 @@ const WHY_FLAG_SPEC: CliFlagSpec = {
 };
 
 // Union of every flag across the `browser` subcommands (open / eval / snapshot /
-// screenshot / audit / console / network / perf / report). The sub-subcommand
-// name and any URL / expression positional pass through untouched; only these
-// options need to survive the pre-parse strip so Commander can route them —
-// without this, `--cdp <endpoint>` is dropped and its value leaks in as a stray
-// positional.
+// screenshot / audit / console / network / perf / profile / report). The
+// sub-subcommand name and any URL / expression positional pass through
+// untouched; only these options need to survive the pre-parse strip so Commander
+// can route them — without this, e.g. `--cdp <endpoint>` is dropped and its
+// value leaks in as a stray positional.
 const BROWSER_FLAG_SPEC: CliFlagSpec = {
   longOptionsWithoutValues: new Set(["--help", "--no-launch"]),
-  longOptionsWithRequiredValues: new Set(["--cdp", "--out", "--viewport"]),
+  longOptionsWithRequiredValues: new Set(["--cdp", "--interaction", "--out", "--viewport"]),
   longOptionsWithOptionalValues: new Set(),
   shortOptionsWithoutValues: new Set(["-h"]),
   shortOptionsWithRequiredValues: new Set(),
diff --git a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
index ae73df1f2..ffdd89162 100644
--- a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
+++ b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
@@ -158,6 +158,23 @@ describe("stripUnknownCliFlags", () => {
     expect(
       stripUserArguments(["browser", "eval", 'page.locator("a").click()', "--cdp", "http://x"]),
     ).toEqual(["browser", "eval", 'page.locator("a").click()', "--cdp", "http://x"]);
+    // Regression: `--interaction`'s Playwright expression must not leak as a
+    // positional, or `browser profile` rejects it as too many arguments.
+    expect(
+      stripUserArguments([
+        "browser",
+        "profile",
+        "https://example.com",
+        "--interaction",
+        'page.getByText("Next").click()',
+      ]),
+    ).toEqual([
+      "browser",
+      "profile",
+      "https://example.com",
+      "--interaction",
+      'page.getByText("Next").click()',
+    ]);
   });
 
   it("keeps debug serve flags and consumes their values (no value leaks as a positional)", () => {

From c4e565851dddd381045f7f6e9cb9f73b442ced39 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Mon, 22 Jun 2026 20:01:19 -0700
Subject: [PATCH 10/38] refactor: drop comments that restate names or duplicate
 doc

Remove two constant comments that just restated NAVIGATION_TIMEOUT_MS /
LAUNCH_READY_TIMEOUT_MS, and a CLI comment that duplicated the
openWithReactProfiler explanation already on the method it calls.
---
 packages/browser/src/constants.ts                 | 3 ---
 packages/react-doctor/src/cli/commands/browser.ts | 5 -----
 2 files changed, 8 deletions(-)

diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 3bd86d5de..24473cd8f 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -9,7 +9,6 @@ export const DEFAULT_CDP_ENDPOINT = `http://127.0.0.1:${DEFAULT_CDP_PORT}`;
 // How long to wait for a CDP attach before falling back to launching Chrome.
 export const CONNECT_TIMEOUT_MS = 5_000;
 
-// How long a single page navigation may take before we give up.
 export const NAVIGATION_TIMEOUT_MS = 30_000;
 
 // Upper bound on waiting for the page to settle (network quiet + fonts) before
@@ -29,8 +28,6 @@ export const LAUNCHED_CHROME_PROFILE_DIRECTORY = join(
   "chrome-profile",
 );
 
-// How long to wait for a freshly launched Chrome to expose its CDP endpoint,
-// and how often to poll for it.
 export const LAUNCH_READY_TIMEOUT_MS = 20_000;
 export const LAUNCH_POLL_INTERVAL_MS = 100;
 
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 2ba16406b..808e72ffc 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -42,11 +42,6 @@ export const browserOpenAction = async (
 ): Promise<void> => {
   recordCount(METRIC.cliInvoked, 1, { command: "browser.open" });
   await withSession(options, async (session) => {
-    // Wires the DevTools profiler before the page's React loads, drives this one
-    // load, then removes the init-script registration — so it doesn't stack on
-    // repeated opens or re-run on a later `perf`/`report` navigation in the same
-    // persistent Chrome. The page persists, so later `eval`s reach
-    // `window.__REACT_PERF__`.
     await session.openWithReactProfiler(url);
     logger.success(`Opened ${url}`);
     logger.log(

From 35e366118efada875849dba1cc371ee65d139654 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 14:57:41 -0700
Subject: [PATCH 11/38] fix(browser): stop the React profiler when a profiled
 interaction throws

profile() only stopped the React render profiler on the success path, so a
throwing `interaction` left the renderer mid-recording. Because React's
startProfiling no-ops while already profiling, the next `browser profile`
run on the persistent page was skewed until reload. Stop it in a finally
around the interaction window (bugbot).
---
 packages/browser/src/session.ts | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 59118705a..22df72897 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -14,6 +14,7 @@ import {
 } from "./constants.js";
 import { collectPerformanceReport } from "./perf-observer.js";
 import { analyzeReactProfile } from "./react-profiler/analyze-profile.js";
+import type { ReactProfilerDataExport } from "./react-profiler/types/profiling-export.js";
 import type {
   AccessibilityViolation,
   BrowserConnectOptions,
@@ -346,13 +347,24 @@ export class BrowserSession {
         globalThis.__REACT_PERF__.start();
         return true;
       });
-      if (options.interaction) await this.evaluate(options.interaction);
-      // Let React's commits flush (concurrent renders land async) and any
-      // interaction-triggered work run; skip the idle wait when neither applies.
-      if (reactStarted || options.interaction) await delay(REACT_PROFILE_FLUSH_MS);
-      const reactExport = reactStarted
-        ? await this.page.evaluate(() => globalThis.__REACT_PERF__?.stop() ?? null)
-        : null;
+
+      let reactExport: ReactProfilerDataExport | null = null;
+      try {
+        if (options.interaction) await this.evaluate(options.interaction);
+        // Let React's commits flush (concurrent renders land async) and any
+        // interaction-triggered work run; skip the idle wait when neither applies.
+        if (reactStarted || options.interaction) await delay(REACT_PROFILE_FLUSH_MS);
+      } finally {
+        // Always stop the React profiler, even if `interaction` threw: the
+        // renderer profiles the persistent page, and `start()` no-ops while
+        // already profiling, so a left-running recording would skew later runs
+        // until the page reloads.
+        if (reactStarted) {
+          reactExport = await this.page
+            .evaluate(() => globalThis.__REACT_PERF__?.stop() ?? null)
+            .catch(() => null);
+        }
+      }
 
       const { profile } = await cdpSession.send("Profiler.stop");
 

From b02f1020446ad376105934acf28084e91386f49d Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 18:03:51 -0700
Subject: [PATCH 12/38] refactor(browser): collapse the capture commands into
 eval --profile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ten browser commands (and the confusing --interaction flag) became four:
open, eval, snapshot, screenshot. Measurement is now one verb — `eval
[expression] --profile` runs the expression while recording, then returns
the whole runtime picture (console, network, performance, accessibility,
React + CPU profiles) in a single pass. A bare `eval` still just drives or
reads the page. This replaces the audit/console/network/perf/profile/report
commands and the six matching browser_* MCP tools (now a `profile` arg on
browser_eval), so agents have one powerful API instead of many narrow ones.

Internally, six BrowserSession capture methods collapse into one inspect(),
and a shared formatEvalValue util removes the triplicated result formatting.
Open the CDP session before attaching listeners (no leak if it throws),
detach listeners before the axe audit (no self-pollution), and guard a bare
`eval` before launching Chrome.
---
 .agents/skills/react-doctor/SKILL.md          |   6 +-
 .../skills/react-doctor/references/debug.md   |   7 +-
 .../skills/react-doctor/references/design.md  |   8 +-
 .../react-doctor/references/performance.md    |  23 +--
 .changeset/react-browser-debug-skill.md       |   2 +-
 packages/browser/src/constants.ts             |   4 -
 packages/browser/src/index.ts                 |   1 +
 packages/browser/src/session.ts               | 161 +++++-------------
 packages/browser/src/types.ts                 |  16 +-
 .../browser/src/utils/format-eval-value.ts    |   5 +
 packages/mcp/src/tools/browser.ts             | 141 +++------------
 packages/mcp/tests/server.test.ts             |   6 -
 .../react-doctor/src/cli/commands/browser.ts  | 154 ++++++-----------
 packages/react-doctor/src/cli/index.ts        |  60 +------
 .../src/cli/utils/strip-unknown-cli-flags.ts  |  13 +-
 .../tests/strip-unknown-cli-flags.test.ts     |  30 +---
 skills/react-doctor/SKILL.md                  |   6 +-
 skills/react-doctor/references/debug.md       |   7 +-
 skills/react-doctor/references/design.md      |   8 +-
 skills/react-doctor/references/performance.md |  23 +--
 20 files changed, 182 insertions(+), 499 deletions(-)
 create mode 100644 packages/browser/src/utils/format-eval-value.ts

diff --git a/.agents/skills/react-doctor/SKILL.md b/.agents/skills/react-doctor/SKILL.md
index 5778fc388..4d3298dab 100644
--- a/.agents/skills/react-doctor/SKILL.md
+++ b/.agents/skills/react-doctor/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: react-doctor
 description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
-version: "1.6.0"
+version: "1.7.0"
 ---
 
 # React Doctor
@@ -41,7 +41,7 @@ doctor runs from code alone, so it is the one that fires in the background. The
 debug, design, and perf need a real Chrome. Two ways to get one:
 
 1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, `perf` (long animation frames with per-script attribution), `profile` (one recording with both lenses — a React render profile of slowest commits, hottest components, and unnecessary re-renders, plus a V8/DevTools CPU profile over CDP with the hottest JS functions ranked by self time), and `report` (every signal in one load).
+2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, leave the page, React profiler injected), `eval` (run a Playwright expression with `page` in scope — returns its value), `snapshot` (accessibility tree), and `screenshot`. Add `--profile` to `eval` to record the whole runtime picture while the expression runs — console, network, performance (long animation frames with per-script attribution, LCP, CLS), an axe-core accessibility audit, a React render profile (slowest commits, hottest components, unnecessary re-renders), and a V8/DevTools CPU profile over CDP (hottest JS functions by self time). Run `eval --profile` with no expression to measure the live page as it is.
 
 It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
 
@@ -53,7 +53,7 @@ React Doctor ships its own Model Context Protocol server over stdio so any MCP-c
 npx react-doctor@latest mcp
 ```
 
-It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`, `browser_audit`, `browser_console`, `browser_network`, `browser_perf`, `browser_profile`, `browser_report`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_profile` records both a React render profile and a literal Chrome DevTools CPU profile in one pass.
+It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes a `profile: true` argument that records the whole runtime picture — console, network, performance, accessibility, a React render profile, and a literal Chrome DevTools CPU profile — in one pass while the expression runs.
 
 ## doctor: scan and triage
 
diff --git a/.agents/skills/react-doctor/references/debug.md b/.agents/skills/react-doctor/references/debug.md
index afd5431e2..26cefe4ef 100644
--- a/.agents/skills/react-doctor/references/debug.md
+++ b/.agents/skills/react-doctor/references/debug.md
@@ -51,15 +51,14 @@ Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup la
 
 Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
 
-- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser console` and `browser network` hand you the runtime console (with uncaught errors) and the request waterfall with failures flagged, often the evidence you need before instrumenting at all. To get the whole picture in one pass, `browser report` captures console, network, performance, and accessibility in a single page load instead of reloading once per command; prefer it over running the four separately. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
 
 ```bash
 npx react-doctor browser open http://localhost:3000           # attach + open the page
-npx react-doctor browser report http://localhost:3000         # console + network + perf + a11y in one load
-npx react-doctor browser console http://localhost:3000        # console output + uncaught errors
-npx react-doctor browser network http://localhost:3000        # request waterfall, failures flagged
+npx react-doctor browser eval --profile                       # console + network + perf + a11y + React/CPU in one pass
 npx react-doctor browser snapshot                             # what rendered, by role + name
 npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()'
+npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()' --profile  # drive + measure it
 npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
 ```
 
diff --git a/.agents/skills/react-doctor/references/design.md b/.agents/skills/react-doctor/references/design.md
index 74f50928f..8a6e6a405 100644
--- a/.agents/skills/react-doctor/references/design.md
+++ b/.agents/skills/react-doctor/references/design.md
@@ -9,10 +9,10 @@ The value here is what a screenshot and the live DOM let you measure that readin
 ```bash
 npx react-doctor browser open http://localhost:3000
 npx react-doctor browser screenshot --out review.png   # what the user actually sees
-npx react-doctor browser audit                          # axe-core: contrast, names, landmarks
+npx react-doctor browser eval --profile                # full picture incl. axe-core a11y: contrast, names, landmarks
 ```
 
-Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, `audit`, or `perf`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
+Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, or `eval`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
 
 ```bash
 npx react-doctor browser screenshot --viewport 390x844 --out mobile.png
@@ -24,7 +24,7 @@ Look at the screenshot, then measure specifics with `eval` (computed styles, bou
 npx react-doctor browser eval 'page.evaluate(() => getComputedStyle(document.querySelector("button")).fontSize)'
 ```
 
-`browser audit` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
+`browser eval --profile` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors in its Accessibility section. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
 
 ## What to check
 
@@ -44,7 +44,7 @@ Then craft, drawing on the bundled design rules:
 
 ## The loop
 
-Build or fix, screenshot, re-audit, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
+Build or fix, screenshot, re-check, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
 
 ## Working rules
 
diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
index 01ace8d41..63363d7c0 100644
--- a/.agents/skills/react-doctor/references/performance.md
+++ b/.agents/skills/react-doctor/references/performance.md
@@ -10,15 +10,16 @@ Why is it slow, and where? Common React causes: unstable callback or object prop
 
 ## 2. Capture (no app changes)
 
-`browser perf` arms the LoAF, LCP, and CLS observers, loads the page, watches briefly past load, then reports the worst frames first with per-script attribution:
+`browser eval --profile` arms every observer (LoAF/LCP/CLS, the React render profiler, and a V8 CPU profiler), runs the expression you pass while it records, then reports the worst frames first with per-script attribution. Drive a fresh load by passing the navigation, or omit the expression to read the page as it is now without reloading:
 
 ```bash
-npx react-doctor browser perf http://localhost:3000   # measures the current page if URL omitted
+npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
+npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The output leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The performance section leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
 
-To attribute interaction jank (a slow click, scroll, or keypress), drive the repro between load and the read: `browser open`, then `browser eval` the interaction, then `browser perf` with no URL. Without a URL it does not reload; it reads the long frames already buffered in the timeline, so the jank from your interaction is included.
+To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
 
 ## 3. Analyze the worst frame first
 
@@ -28,22 +29,16 @@ The output is already sorted worst-first. The script with the largest duration i
 
 ## 4. Zoom into React renders (optional)
 
-When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop:
+When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop — then drive the repro with `browser eval --profile`:
 
 ```bash
 npx react-doctor browser open http://localhost:3000
+npx react-doctor browser eval 'page.getByText("Next").click()' --profile
 ```
 
 For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
 
-The fastest path is `browser profile`: one recording, both lenses. It returns `react` (slowest commits, components that render most/cost the most self time, and the count of unnecessary re-renders — components that re-rendered with nothing they own changed, the memoization candidates) and `cpu` (a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time):
-
-```bash
-npx react-doctor browser profile http://localhost:3000 --interaction 'page.getByText("Next").click()'
-# omit the url to profile a page already opened with `browser open`
-```
-
-The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` (the Playwright `page` is in scope):
+`browser eval --profile` records one pass with both lenses. The `react` lens reports the slowest commits, the components that render most/cost the most self time, and the count of unnecessary re-renders (components that re-rendered with nothing they own changed — the memoization candidates). The `cpu` lens is a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time. The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` without `--profile` (the Playwright `page` is in scope):
 
 ```bash
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
@@ -51,7 +46,7 @@ npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start()
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
 ```
 
-Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser profile` computes for you.
+Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser eval --profile` computes for you.
 
 ## 5. Fix, only with proof
 
diff --git a/.changeset/react-browser-debug-skill.md b/.changeset/react-browser-debug-skill.md
index c38fa5aa7..7b754c07b 100644
--- a/.changeset/react-browser-debug-skill.md
+++ b/.changeset/react-browser-debug-skill.md
@@ -2,4 +2,4 @@
 "react-doctor": minor
 ---
 
-Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback) for accessibility audits, console/network capture, performance traces, profiling (`browser profile` records both a React render profile — slowest commits, hottest components by self time, unnecessary re-render counts — and a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time, in one pass), snapshots, and screenshots. `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools (including `browser_profile`, which captures the React render profile and the CPU profile together), and the `debug_*` log server directly.
+Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback): `open` a page, `eval` a Playwright expression, `snapshot` the accessibility tree, and `screenshot`. Adding `--profile` to `eval` records the whole runtime picture in one pass while the expression runs — console, network, performance traces (long animation frames with per-script attribution, LCP, CLS), an axe-core accessibility audit, a React render profile (slowest commits, hottest components by self time, unnecessary re-render counts), and a Chrome DevTools CPU profile via V8's sampling profiler over CDP (the hottest JS functions ranked by self time). `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools (`browser_eval` takes a `profile: true` argument that captures every signal together), and the `debug_*` log server directly.
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 24473cd8f..0fc8a2a5f 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -43,10 +43,6 @@ export const MAX_VIOLATION_TARGETS = 5;
 // device-metrics override into CDP.
 export const MAX_VIEWPORT_PX = 10_000;
 
-// After driving a profiled interaction, wait this long for React's commits to
-// flush (concurrent renders land asynchronously) before stopping the recording.
-export const REACT_PROFILE_FLUSH_MS = 500;
-
 // Caps on what a profile analysis returns inline, so a long recording stays a
 // readable result rather than a dump keyed by thousands of fibers. The summary
 // counts still reflect everything recorded.
diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts
index e1628f2b3..ae35d96d9 100644
--- a/packages/browser/src/index.ts
+++ b/packages/browser/src/index.ts
@@ -2,4 +2,5 @@ export { BrowserSession } from "./session.js";
 export { connectToBrowser } from "./connect.js";
 export type { BrowserConnection } from "./connect.js";
 export { parseViewport } from "./parse-viewport.js";
+export { formatEvalValue } from "./utils/format-eval-value.js";
 export type * from "./types.js";
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 22df72897..2821dbd3f 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -8,7 +8,6 @@ import {
   MAX_VIOLATION_TARGETS,
   NAVIGATION_TIMEOUT_MS,
   PERFORMANCE_OBSERVE_WINDOW_MS,
-  REACT_PROFILE_FLUSH_MS,
   REACT_PROFILER_INJECT_FILE,
   SETTLE_TIMEOUT_MS,
 } from "./constants.js";
@@ -22,25 +21,8 @@ import type {
   NetworkRequestEntry,
   PageInspection,
   PerformanceReport,
-  ProfileAnalysis,
-  ProfileOptions,
   Viewport,
 } from "./types.js";
-import { delay } from "./utils/delay.js";
-
-// Which signals to collect during a single capture load. Listeners and the perf
-// observers all attach before one navigation, so any combination costs one load.
-interface CaptureSignals {
-  console: boolean;
-  network: boolean;
-  performance: boolean;
-}
-
-interface CaptureResult {
-  console: ConsoleMessageEntry[];
-  network: NetworkRequestEntry[];
-  performance: PerformanceReport;
-}
 
 const emptyPerformanceReport = (): PerformanceReport => ({
   longAnimationFrames: [],
@@ -179,15 +161,6 @@ export class BrowserSession {
     return this.page.screenshot({ path });
   }
 
-  async audit(url?: string): Promise<AccessibilityViolation[]> {
-    if (url) {
-      await this.navigate(url);
-    } else {
-      await this.settle();
-    }
-    return this.runAxe();
-  }
-
   // axe is injected with `evaluate`, not a <script> tag, so a strict CSP can't
   // block it. Loaded on demand so it stays out of bundles that don't audit.
   private async runAxe(): Promise<AccessibilityViolation[]> {
@@ -256,31 +229,6 @@ export class BrowserSession {
     };
   }
 
-  // Arm every requested observer before a single navigation, drive that one
-  // load, then read everything back — so capturing N signals costs ONE load,
-  // not N. Listeners detach in `finally` so a navigation error can't leak them.
-  private async runCapture(
-    url: string | undefined,
-    signals: CaptureSignals,
-  ): Promise<CaptureResult> {
-    const consoleEntries: ConsoleMessageEntry[] = [];
-    const networkByRequest = new Map<Request, NetworkRequestEntry>();
-    const detachers: Array<() => void> = [];
-    if (signals.console) detachers.push(this.collectConsole(consoleEntries));
-    if (signals.network) detachers.push(this.collectNetwork(networkByRequest));
-    let performance = emptyPerformanceReport();
-    try {
-      await this.navigate(url);
-      // Measure perf inside the try so console/network listeners stay attached
-      // through its observation window (the collector waits internally), catching
-      // post-load errors and requests. `buffered: true` replays the load's frames.
-      if (signals.performance) performance = await this.measureCurrentPerformance();
-    } finally {
-      for (const detach of detachers) detach();
-    }
-    return { console: consoleEntries, network: [...networkByRequest.values()], performance };
-  }
-
   // A per-page watermark inside collectPerformanceReport keeps a repeated
   // no-reload measurement from re-counting frames an earlier command already
   // reported on the same persistent page.
@@ -288,57 +236,27 @@ export class BrowserSession {
     return this.page.evaluate(collectPerformanceReport, PERFORMANCE_OBSERVE_WINDOW_MS);
   }
 
-  async captureConsole(url?: string): Promise<ConsoleMessageEntry[]> {
-    const { console } = await this.runCapture(url, {
-      console: true,
-      network: false,
-      performance: false,
-    });
-    return console;
-  }
-
-  async captureNetwork(url?: string): Promise<NetworkRequestEntry[]> {
-    const { network } = await this.runCapture(url, {
-      console: false,
-      network: true,
-      performance: false,
-    });
-    return network;
-  }
-
-  // Without a `url`, measure the page as it is now with no reload — a reload
-  // would wipe a just-performed `eval` interaction and its jank.
-  async measurePerformance(url?: string): Promise<PerformanceReport> {
-    if (url) {
-      const { performance } = await this.runCapture(url, {
-        console: false,
-        network: false,
-        performance: true,
-      });
-      return performance;
-    }
-    return this.measureCurrentPerformance();
-  }
-
-  // Profile a page with both lenses in one recording: the V8 CPU profiler (the
-  // literal Chrome DevTools profiler, over CDP) and the React DevTools render
-  // profiler. Navigation happens first: a top-level navigation can swap renderer
-  // processes, so a profiler must attach to the *final* document — and the React
-  // profiler can only start once renderers are attached. Both lenses therefore
-  // cover the same window: post-load plus whatever `interaction` drives. React
-  // data is null on a production build or a page not opened with the profiler.
-  async profile(options: ProfileOptions = {}): Promise<ProfileAnalysis> {
-    if (options.url) {
-      await this.openWithReactProfiler(options.url);
-    } else {
-      await this.settle();
-    }
-
+  // Drive the current page (optionally running `expression` — the same Playwright
+  // code `evaluate` takes) while recording the whole runtime picture in one pass:
+  // console + network listeners, a V8 CPU profile (the literal Chrome DevTools
+  // profiler over CDP), the React DevTools render profile, page performance, and
+  // an accessibility audit. This never navigates on its own — drive a fresh load
+  // with `inspect("page.goto('...')")`, or `open` a URL first then inspect an
+  // action on it. React data is null on a production build or a page not opened
+  // with the profiler; it covers the driven action, not the initial mount.
+  async inspect(expression?: string): Promise<PageInspection> {
+    const consoleEntries: ConsoleMessageEntry[] = [];
+    const networkByRequest = new Map<Request, NetworkRequestEntry>();
+    // Open the CDP session before attaching listeners: if `newCDPSession` throws,
+    // the listeners are never bound, so they can't leak onto the persistent page.
     const cdpSession = await this.page.context().newCDPSession(this.page);
+    const detachers: Array<() => void> = [];
     try {
+      detachers.push(this.collectConsole(consoleEntries), this.collectNetwork(networkByRequest));
+      await this.settle();
       await cdpSession.send("Profiler.enable");
       await cdpSession.send("Profiler.setSamplingInterval", {
-        interval: options.samplingIntervalUs ?? DEFAULT_CPU_SAMPLING_INTERVAL_US,
+        interval: DEFAULT_CPU_SAMPLING_INTERVAL_US,
       });
       await cdpSession.send("Profiler.start");
 
@@ -348,14 +266,17 @@ export class BrowserSession {
         return true;
       });
 
+      let result: unknown = null;
+      let performance = emptyPerformanceReport();
       let reactExport: ReactProfilerDataExport | null = null;
       try {
-        if (options.interaction) await this.evaluate(options.interaction);
-        // Let React's commits flush (concurrent renders land async) and any
-        // interaction-triggered work run; skip the idle wait when neither applies.
-        if (reactStarted || options.interaction) await delay(REACT_PROFILE_FLUSH_MS);
+        if (expression) result = (await this.evaluate(expression)) ?? null;
+        // The perf observe window doubles as the recording window: it runs after
+        // the driven action so post-action jank, React commits (concurrent
+        // renders land async), and CPU samples all land before we stop.
+        performance = await this.measureCurrentPerformance();
       } finally {
-        // Always stop the React profiler, even if `interaction` threw: the
+        // Always stop the React profiler, even if the expression threw: the
         // renderer profiles the persistent page, and `start()` no-ops while
         // already profiling, so a left-running recording would skew later runs
         // until the page reloads.
@@ -368,30 +289,30 @@ export class BrowserSession {
 
       const { profile } = await cdpSession.send("Profiler.stop");
 
+      // Detach the page listeners before the accessibility audit so axe's injected
+      // evaluate (and anything it logs) can't land in the captured signals.
+      for (const detach of detachers) detach();
+      detachers.length = 0;
+      const accessibility = await this.runAxe();
+
       return {
-        react: reactExport ? analyzeReactProfile(reactExport) : null,
-        cpu: analyzeCpuProfile(profile),
+        result,
+        console: consoleEntries,
+        network: [...networkByRequest.values()],
+        performance,
+        accessibility,
+        profile: {
+          react: reactExport ? analyzeReactProfile(reactExport) : null,
+          cpu: analyzeCpuProfile(profile),
+        },
       };
     } finally {
       await cdpSession.send("Profiler.disable").catch(() => {});
       await cdpSession.detach().catch(() => {});
+      for (const detach of detachers) detach();
     }
   }
 
-  async inspectPage(url?: string): Promise<PageInspection> {
-    const capture = await this.runCapture(url, {
-      console: true,
-      network: true,
-      performance: true,
-    });
-    return {
-      console: capture.console,
-      network: capture.network,
-      performance: capture.performance,
-      accessibility: await this.runAxe(),
-    };
-  }
-
   // Drop our CDP connection. This only disconnects — it never kills the browser,
   // whether the user had it open or we launched it — so the page stays alive and
   // the next `browser` command reattaches to the same live session.
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
index 70272dd76..d7f5377fe 100644
--- a/packages/browser/src/types.ts
+++ b/packages/browser/src/types.ts
@@ -54,22 +54,16 @@ export interface PerformanceReport {
   cumulativeLayoutShift: number;
 }
 
+// The full runtime picture from one `inspect` pass: the driven expression's
+// return value plus every signal recorded while it ran.
 export interface PageInspection {
+  // The `expression`'s return value, or null when none was driven or it had none.
+  result: unknown;
   console: ConsoleMessageEntry[];
   network: NetworkRequestEntry[];
   performance: PerformanceReport;
   accessibility: AccessibilityViolation[];
-}
-
-export interface ProfileOptions {
-  // Load this URL with the React profiler injected before recording. Omit to
-  // profile a page already opened with the profiler (`browser open`).
-  url?: string;
-  // Playwright expression (the `page` is in scope) driven while recording, so the
-  // renders and CPU work it triggers are captured. Omit to profile post-load.
-  interaction?: string;
-  // V8 CPU sampling interval in microseconds (default like DevTools, 100us).
-  samplingIntervalUs?: number;
+  profile: ProfileAnalysis;
 }
 
 export interface ReactComponentRenderStat {
diff --git a/packages/browser/src/utils/format-eval-value.ts b/packages/browser/src/utils/format-eval-value.ts
new file mode 100644
index 000000000..160d5ffc6
--- /dev/null
+++ b/packages/browser/src/utils/format-eval-value.ts
@@ -0,0 +1,5 @@
+// Render an `evaluate`/`inspect` result for display: strings pass through as-is,
+// everything else is pretty-printed JSON. Callers handle the empty (undefined /
+// null) case themselves, since each surface signals "no value" differently.
+export const formatEvalValue = (value: unknown): string =>
+  typeof value === "string" ? value : JSON.stringify(value, null, 2);
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index 3dd4c8820..71e172769 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -1,8 +1,6 @@
-import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
-import type { BrowserSession } from "@react-doctor/browser";
 import { z } from "zod";
-import { parseViewport } from "@react-doctor/browser";
+import { formatEvalValue, parseViewport } from "@react-doctor/browser";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
@@ -25,13 +23,6 @@ const viewportShape = {
     .describe("Emulate a viewport for this call, WIDTHxHEIGHT in pixels (e.g. 390x844)"),
 };
 
-const urlShape = {
-  url: z
-    .string()
-    .optional()
-    .describe("URL to load; omit to read the current page without reloading"),
-};
-
 interface ConnectionArgs {
   cdp?: string;
   noLaunch?: boolean;
@@ -44,41 +35,13 @@ const toConnection = (args: ConnectionArgs): BrowserToolConnection => ({
   viewport: args.viewport ? parseViewport(args.viewport) : undefined,
 });
 
-interface PageToolDefinition {
-  name: string;
-  title: string;
-  description: string;
-  // Build the result inside the session scope (the session is disposed once
-  // this resolves), against the optional `url` to load.
-  run: (session: BrowserSession, url: string | undefined) => Promise<CallToolResult>;
-}
-
-// The read-only "load a page (or read the current one) and report" tools all
-// share the same url + connection + viewport inputs and session lifecycle, so
-// they register through this table rather than repeating the scaffolding.
-const registerPageTool = (server: McpServer, definition: PageToolDefinition): void => {
-  server.registerTool(
-    definition.name,
-    {
-      title: definition.title,
-      description: definition.description,
-      inputSchema: { ...urlShape, ...connectionShape, ...viewportShape },
-      annotations: { readOnlyHint: true, openWorldHint: true },
-    },
-    (args) =>
-      runTool(() =>
-        withSession(toConnection(args), (session) => definition.run(session, args.url)),
-      ),
-  );
-};
-
 export const registerBrowserTools = (server: McpServer): void => {
   server.registerTool(
     "browser_open",
     {
       title: "Open a URL with the React profiler",
       description:
-        "Open a URL in the attached Chrome and keep the page, injecting the React DevTools profiler. Use browser_profile for a one-shot record + analysis; for manual control, browser_eval can drive window.__REACT_PERF__ (start()/stop()). Attaches to your running Chrome over CDP, launching a dedicated one only as a fallback.",
+        "Open a URL in the attached Chrome and keep the page, injecting the React DevTools profiler so a later browser_eval with profile:true can capture React renders. Attaches to your running Chrome over CDP, launching a dedicated one only as a fallback.",
       inputSchema: { url: z.string().describe("URL to open"), ...connectionShape },
       annotations: { openWorldHint: true },
     },
@@ -86,64 +49,49 @@ export const registerBrowserTools = (server: McpServer): void => {
       runTool(async () => {
         await withSession(toConnection(args), (session) => session.openWithReactProfiler(args.url));
         return textResult(
-          `Opened ${args.url}. React profiler ready: call browser_eval with "page.evaluate(() => window.__REACT_PERF__.start())", drive a scenario, then stop() for the DevTools profiling export.`,
+          `Opened ${args.url}. To measure an action, call browser_eval with profile:true and an expression, e.g. page.getByText("Load more").click().`,
         );
       }),
   );
 
   server.registerTool(
-    "browser_profile",
+    "browser_eval",
     {
-      title: "Profile React renders and CPU in one recording",
+      title: "Run Playwright code, optionally profiling it",
       description:
-        "Record one profile with both lenses and return { react, cpu }. `react`: the React render profile — slowest commits, components that render most/cost the most self time, and unnecessary re-renders (re-rendered with nothing they own changed: memo/useCallback/context targets); null on a production React build. `cpu`: a Chrome DevTools CPU profile (V8 sampler over CDP) with functions ranked by self time (DevTools' bottom-up view). Pass `url` to load and profile a page, and/or `interaction` (a Playwright expression, `page` in scope) to record what it triggers. Omit `url` to profile a page already opened with browser_open.",
+        'Run an async expression with the Playwright `page` in scope (e.g. page.getByText("Login").click()) against the attached page. Two modes: by default it returns the expression\'s value — use it to locate, read, or drive the page. Set profile:true to instead record and return the full runtime picture while the expression runs. Open the page first with browser_open for React render data.',
       inputSchema: {
-        url: z
+        expression: z
           .string()
           .optional()
           .describe(
-            "URL to load with the profiler before recording; omit to profile the open page",
+            "Async expression with the Playwright `page` in scope; omit together with profile:true to measure the live page idle",
           ),
-        interaction: z
-          .string()
+        profile: z
+          .boolean()
           .optional()
           .describe(
-            'Playwright expression to drive while recording, e.g. page.getByRole("button").click()',
+            "Set true to record and return the full runtime picture while the expression runs — console, network, performance (jank/LCP/CLS), accessibility, the React render profile (slow commits, hot components, unnecessary re-renders), and a V8 CPU profile. Omit for just the expression's return value.",
           ),
         ...connectionShape,
         ...viewportShape,
       },
       annotations: { openWorldHint: true },
     },
-    (args) =>
-      runTool(async () =>
-        jsonResult(
-          await withSession(toConnection(args), (session) =>
-            session.profile({ url: args.url, interaction: args.interaction }),
-          ),
-        ),
-      ),
-  );
-
-  server.registerTool(
-    "browser_eval",
-    {
-      title: "Evaluate Playwright code on the page",
-      description:
-        'Run an async expression with the Playwright `page` in scope (e.g. page.locator("text=Login").click()) against the attached page. Use to drive the exact repro between opening a page and measuring it.',
-      inputSchema: {
-        expression: z.string().describe("Async expression with the Playwright `page` in scope"),
-        ...connectionShape,
-      },
-      annotations: { openWorldHint: true },
-    },
     (args) =>
       runTool(async () => {
+        if (args.profile) {
+          return jsonResult(
+            await withSession(toConnection(args), (session) => session.inspect(args.expression)),
+          );
+        }
+        if (args.expression === undefined) return textResult("(no value)");
+        const expression = args.expression;
         const result = await withSession(toConnection(args), (session) =>
-          session.evaluate(args.expression),
+          session.evaluate(expression),
         );
         if (result === undefined) return textResult("(no value)");
-        return textResult(typeof result === "string" ? result : JSON.stringify(result, null, 2));
+        return textResult(formatEvalValue(result));
       }),
   );
 
@@ -180,53 +128,4 @@ export const registerBrowserTools = (server: McpServer): void => {
         };
       }),
   );
-
-  registerPageTool(server, {
-    name: "browser_audit",
-    title: "Run an accessibility audit",
-    description:
-      "Run an axe-core accessibility audit on the attached page (or a URL; reloads the current page when no URL is given) and return the violations with impact, help text, and affected element targets.",
-    run: async (session, url) => {
-      const violations = await session.audit(url);
-      return jsonResult({ violationCount: violations.length, violations });
-    },
-  });
-
-  registerPageTool(server, {
-    name: "browser_console",
-    title: "Capture console output",
-    description:
-      "Capture console messages and page errors during a load of the attached page (or a URL; reloads when no URL is given).",
-    run: async (session, url) => {
-      const messages = await session.captureConsole(url);
-      return jsonResult({ messageCount: messages.length, messages });
-    },
-  });
-
-  registerPageTool(server, {
-    name: "browser_network",
-    title: "Capture network requests",
-    description:
-      "Capture network requests during a load of the attached page (or a URL; reloads when no URL is given), flagging failures and non-2xx/3xx responses.",
-    run: async (session, url) => {
-      const requests = await session.captureNetwork(url);
-      return jsonResult({ requestCount: requests.length, requests });
-    },
-  });
-
-  registerPageTool(server, {
-    name: "browser_perf",
-    title: "Measure runtime performance (jank)",
-    description:
-      "Capture long animation frames (>50ms main-thread jank) with per-script attribution, plus LCP and CLS. Loads a URL when given; omit the URL to measure the current page without reloading (so a browser_eval interaction's jank is included).",
-    run: async (session, url) => jsonResult(await session.measurePerformance(url)),
-  });
-
-  registerPageTool(server, {
-    name: "browser_report",
-    title: "Capture a full page report",
-    description:
-      "Capture console, network, performance, and accessibility in a single load — the efficient path when you want the whole runtime picture at once. Always loads (a URL when given, otherwise reloads the current page); to measure after a browser_eval interaction without reloading, use browser_perf.",
-    run: async (session, url) => jsonResult(await session.inspectPage(url)),
-  });
 };
diff --git a/packages/mcp/tests/server.test.ts b/packages/mcp/tests/server.test.ts
index 0e9e8f767..c36d568ce 100644
--- a/packages/mcp/tests/server.test.ts
+++ b/packages/mcp/tests/server.test.ts
@@ -19,14 +19,8 @@ const listToolNames = async (): Promise<string[]> => {
 
 test("registers the doctor, browser, and debug tools", async () => {
   expect(await listToolNames()).toEqual([
-    "browser_audit",
-    "browser_console",
     "browser_eval",
-    "browser_network",
     "browser_open",
-    "browser_perf",
-    "browser_profile",
-    "browser_report",
     "browser_screenshot",
     "browser_snapshot",
     "debug_clear_logs",
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 808e72ffc..360e5dceb 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -1,9 +1,11 @@
 import {
   BrowserSession,
+  formatEvalValue,
   type AccessibilityViolation,
   type ConsoleMessageEntry,
   type CpuProfileAnalysis,
   type NetworkRequestEntry,
+  type PageInspection,
   type PerformanceReport,
   type ReactProfileAnalysis,
   type Viewport,
@@ -17,7 +19,7 @@ export interface BrowserCommandOptions {
   launch?: boolean;
   out?: string;
   viewport?: Viewport;
-  interaction?: string;
+  profile?: boolean;
 }
 
 // playwright-core loads lazily inside @react-doctor/browser (only when a command
@@ -45,7 +47,7 @@ export const browserOpenAction = async (
     await session.openWithReactProfiler(url);
     logger.success(`Opened ${url}`);
     logger.log(
-      "React profiler ready: `browser profile --interaction '...'` for a one-shot record + analysis, or drive it manually with `browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'` then `stop()`.",
+      "React profiler ready: `browser eval '<action>' --profile` records + analyzes that action, or drive it manually with `browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'` then `stop()`.",
     );
     if (session.launched) {
       logger.log(
@@ -56,14 +58,26 @@ export const browserOpenAction = async (
 };
 
 export const browserEvalAction = async (
-  expression: string,
+  expression: string | undefined,
   options: BrowserCommandOptions,
 ): Promise<void> => {
   recordCount(METRIC.cliInvoked, 1, { command: "browser.eval" });
+  if (options.profile) {
+    await withSession(options, async (session) => {
+      printInspection(await session.inspect(expression));
+    });
+    return;
+  }
+  // Without --profile, an expression is required: guard before attaching (or
+  // launching) Chrome so a bare `browser eval` doesn't spin one up to do nothing.
+  if (expression === undefined) {
+    logger.log("Pass an expression to run, or --profile to measure the page.");
+    return;
+  }
   await withSession(options, async (session) => {
     const result = await session.evaluate(expression);
     if (result === undefined) return;
-    logger.log(typeof result === "string" ? result : JSON.stringify(result, null, 2));
+    logger.log(formatEvalValue(result));
   });
 };
 
@@ -83,9 +97,9 @@ export const browserScreenshotAction = async (options: BrowserCommandOptions): P
   });
 };
 
-// Section printers, shared by the focused commands and the combined `report` so
-// the line format lives in one place. Each prints the section body only; the
-// callers decide on headers and empty-state messaging.
+// Section printers for `eval --profile`, one per section of the inspection, so
+// the line format lives in one place. `printInspection` owns the headers and the
+// "(none)" line for the list sections that can be empty.
 const printAuditViolations = (violations: AccessibilityViolation[]): void => {
   for (const violation of violations) {
     const impact = violation.impact ? `[${violation.impact}] ` : "";
@@ -171,107 +185,39 @@ const printCpuProfile = (analysis: CpuProfileAnalysis): void => {
   }
 };
 
-export const browserProfileAction = async (
-  url: string | undefined,
-  options: BrowserCommandOptions,
-): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.profile" });
-  await withSession(options, async (session) => {
-    const analysis = await session.profile({ url, interaction: options.interaction });
-
-    logger.log("# React renders");
-    if (analysis.react) {
-      printReactProfile(analysis.react);
-    } else {
-      logger.log(
-        "(no React data — needs a development build of React and renders during the recording)",
-      );
-    }
-
-    logger.log("\n# CPU");
-    printCpuProfile(analysis.cpu);
-  });
-};
-
-export const browserAuditAction = async (
-  url: string | undefined,
-  options: BrowserCommandOptions,
-): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.audit" });
-  await withSession(options, async (session) => {
-    const violations = await session.audit(url);
-    if (violations.length === 0) {
-      logger.success("No accessibility violations found");
-      return;
-    }
-    logger.log(`${violations.length} accessibility violation(s):\n`);
-    printAuditViolations(violations);
-  });
-};
-
-export const browserConsoleAction = async (
-  url: string | undefined,
-  options: BrowserCommandOptions,
-): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.console" });
-  await withSession(options, async (session) => {
-    const messages = await session.captureConsole(url);
-    if (messages.length === 0) {
-      logger.success("No console output captured");
-      return;
-    }
-    printConsoleMessages(messages);
-  });
-};
-
-export const browserNetworkAction = async (
-  url: string | undefined,
-  options: BrowserCommandOptions,
-): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.network" });
-  await withSession(options, async (session) => {
-    const requests = await session.captureNetwork(url);
-    if (requests.length === 0) {
-      logger.success("No network requests captured");
-      return;
-    }
-    printNetworkRequests(requests);
-  });
-};
+// The whole runtime picture from one `eval --profile` recording, printed
+// section by section. Each section reuses the shared printers above.
+const printInspection = (inspection: PageInspection): void => {
+  if (inspection.result !== null) {
+    logger.log("# Result");
+    logger.log(formatEvalValue(inspection.result));
+    logger.log("");
+  }
 
-export const browserPerfAction = async (
-  url: string | undefined,
-  options: BrowserCommandOptions,
-): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.perf" });
-  await withSession(options, async (session) => {
-    printPerformanceReport(await session.measurePerformance(url));
-  });
-};
+  logger.log("# Console");
+  if (inspection.console.length === 0) logger.log("(none)");
+  else printConsoleMessages(inspection.console);
 
-// One navigation, every signal — the efficient path when an agent wants the
-// whole runtime picture instead of reloading the page once per command.
-export const browserReportAction = async (
-  url: string | undefined,
-  options: BrowserCommandOptions,
-): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.report" });
-  await withSession(options, async (session) => {
-    const inspection = await session.inspectPage(url);
+  logger.log("\n# Network");
+  if (inspection.network.length === 0) logger.log("(none)");
+  else printNetworkRequests(inspection.network);
 
-    logger.log("# Console");
-    if (inspection.console.length === 0) logger.log("(none)");
-    else printConsoleMessages(inspection.console);
+  logger.log("\n# Performance");
+  printPerformanceReport(inspection.performance);
 
-    logger.log("\n# Network");
-    if (inspection.network.length === 0) logger.log("(none)");
-    else printNetworkRequests(inspection.network);
+  logger.log("\n# Accessibility");
+  if (inspection.accessibility.length === 0) logger.log("(none)");
+  else printAuditViolations(inspection.accessibility);
 
-    logger.log("\n# Performance");
-    printPerformanceReport(inspection.performance);
+  logger.log("\n# React renders");
+  if (inspection.profile.react) {
+    printReactProfile(inspection.profile.react);
+  } else {
+    logger.log(
+      "(no React data — needs a development build of React and renders during the recording)",
+    );
+  }
 
-    logger.log("\n# Accessibility");
-    if (inspection.accessibility.length === 0) logger.log("(none)");
-    else printAuditViolations(inspection.accessibility);
-  });
+  logger.log("\n# CPU");
+  printCpuProfile(inspection.profile.cpu);
 };
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index b61be04ef..7071f5027 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -2,14 +2,8 @@ import { Command, Option } from "commander";
 import { CANONICAL_GITHUB_URL, highlighter } from "@react-doctor/core";
 import { flushSentry, initializeSentry } from "../instrument.js";
 import {
-  browserAuditAction,
-  browserConsoleAction,
   browserEvalAction,
-  browserNetworkAction,
   browserOpenAction,
-  browserPerfAction,
-  browserProfileAction,
-  browserReportAction,
   browserScreenshotAction,
   browserSnapshotAction,
 } from "./commands/browser.js";
@@ -269,9 +263,13 @@ withConnectionOptions(
 
 withRenderOptions(
   browser
-    .command("eval <expression>")
+    .command("eval [expression]")
     .description(
-      "Run an expression with the Playwright `page` in scope, e.g. 'page.locator(\"text=Login\").click()'",
+      "Run an expression with the Playwright `page` in scope, e.g. 'page.getByText(\"Login\").click()'. Add --profile to also record the full runtime picture.",
+    )
+    .option(
+      "--profile",
+      "record console, network, performance, accessibility, and the React + CPU profiles while the expression runs (omit the expression to measure the live page idle)",
     ),
 ).action(browserEvalAction);
 
@@ -288,52 +286,6 @@ withRenderOptions(
     .option("--out <path>", "output file path (default react-doctor-screenshot.png)"),
 ).action(browserScreenshotAction);
 
-withRenderOptions(
-  browser
-    .command("audit [url]")
-    .description("Run an accessibility audit (axe-core) on the page or a URL"),
-).action(browserAuditAction);
-
-withRenderOptions(
-  browser
-    .command("console [url]")
-    .description("Capture console output and page errors during a load (reloads if no URL)"),
-).action(browserConsoleAction);
-
-withRenderOptions(
-  browser
-    .command("network [url]")
-    .description("Capture network requests during a load, flagging failures (reloads if no URL)"),
-).action(browserNetworkAction);
-
-withRenderOptions(
-  browser
-    .command("perf [url]")
-    .description(
-      "Capture long animation frames (jank) with per-script attribution, plus LCP/CLS (reloads if no URL)",
-    ),
-).action(browserPerfAction);
-
-withRenderOptions(
-  browser
-    .command("profile [url]")
-    .description(
-      "Profile React renders and CPU in one recording: slowest commits, hottest components, unnecessary re-renders, and the hottest JS functions",
-    )
-    .option(
-      "--interaction <expression>",
-      "Playwright expression to drive while recording, e.g. 'page.getByText(\"Next\").click()'",
-    ),
-).action(browserProfileAction);
-
-withRenderOptions(
-  browser
-    .command("report [url]")
-    .description(
-      "Capture console, network, performance, and accessibility in a single load (reloads if no URL)",
-    ),
-).action(browserReportAction);
-
 const debug = program
   .command("debug")
   .description("Runtime debugging tools for the debug job (NDJSON logging server)");
diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
index b4e9f7b66..5f2bee229 100644
--- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
+++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
@@ -100,14 +100,13 @@ const WHY_FLAG_SPEC: CliFlagSpec = {
 };
 
 // Union of every flag across the `browser` subcommands (open / eval / snapshot /
-// screenshot / audit / console / network / perf / profile / report). The
-// sub-subcommand name and any URL / expression positional pass through
-// untouched; only these options need to survive the pre-parse strip so Commander
-// can route them — without this, e.g. `--cdp <endpoint>` is dropped and its
-// value leaks in as a stray positional.
+// screenshot). The sub-subcommand name and any URL / expression positional pass
+// through untouched; only these options need to survive the pre-parse strip so
+// Commander can route them — without this, e.g. `--cdp <endpoint>` is dropped and
+// its value leaks in as a stray positional.
 const BROWSER_FLAG_SPEC: CliFlagSpec = {
-  longOptionsWithoutValues: new Set(["--help", "--no-launch"]),
-  longOptionsWithRequiredValues: new Set(["--cdp", "--interaction", "--out", "--viewport"]),
+  longOptionsWithoutValues: new Set(["--help", "--no-launch", "--profile"]),
+  longOptionsWithRequiredValues: new Set(["--cdp", "--out", "--viewport"]),
   longOptionsWithOptionalValues: new Set(),
   shortOptionsWithoutValues: new Set(["-h"]),
   shortOptionsWithRequiredValues: new Set(),
diff --git a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
index ffdd89162..9c82269d9 100644
--- a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
+++ b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
@@ -132,17 +132,11 @@ describe("stripUnknownCliFlags", () => {
 
   it("keeps browser subcommand flags and consumes --cdp's value (no value leaks as a positional)", () => {
     // Regression: without a browser flag spec, --cdp is dropped and its endpoint
-    // value leaks in as a second positional, so `browser audit <url> --cdp <endpoint>`
+    // value leaks in as a second positional, so `browser eval <expr> --cdp <endpoint>`
     // makes Commander throw "too many arguments".
     expect(
-      stripUserArguments([
-        "browser",
-        "audit",
-        "https://example.com",
-        "--cdp",
-        "http://127.0.0.1:9456",
-      ]),
-    ).toEqual(["browser", "audit", "https://example.com", "--cdp", "http://127.0.0.1:9456"]);
+      stripUserArguments(["browser", "eval", "page.title()", "--cdp", "http://127.0.0.1:9456"]),
+    ).toEqual(["browser", "eval", "page.title()", "--cdp", "http://127.0.0.1:9456"]);
     expect(stripUserArguments(["browser", "open", "https://example.com", "--no-launch"])).toEqual([
       "browser",
       "open",
@@ -158,23 +152,17 @@ describe("stripUnknownCliFlags", () => {
     expect(
       stripUserArguments(["browser", "eval", 'page.locator("a").click()', "--cdp", "http://x"]),
     ).toEqual(["browser", "eval", 'page.locator("a").click()', "--cdp", "http://x"]);
-    // Regression: `--interaction`'s Playwright expression must not leak as a
-    // positional, or `browser profile` rejects it as too many arguments.
+    // `--profile` is a boolean (no value), so the expression positional after it
+    // must stay a positional and the flag must not swallow the next argument.
     expect(
       stripUserArguments([
         "browser",
-        "profile",
-        "https://example.com",
-        "--interaction",
+        "eval",
         'page.getByText("Next").click()',
+        "--profile",
+        "--offline",
       ]),
-    ).toEqual([
-      "browser",
-      "profile",
-      "https://example.com",
-      "--interaction",
-      'page.getByText("Next").click()',
-    ]);
+    ).toEqual(["browser", "eval", 'page.getByText("Next").click()', "--profile"]);
   });
 
   it("keeps debug serve flags and consumes their values (no value leaks as a positional)", () => {
diff --git a/skills/react-doctor/SKILL.md b/skills/react-doctor/SKILL.md
index 5778fc388..4d3298dab 100644
--- a/skills/react-doctor/SKILL.md
+++ b/skills/react-doctor/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: react-doctor
 description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
-version: "1.6.0"
+version: "1.7.0"
 ---
 
 # React Doctor
@@ -41,7 +41,7 @@ doctor runs from code alone, so it is the one that fires in the background. The
 debug, design, and perf need a real Chrome. Two ways to get one:
 
 1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. It covers `open`, `eval`, `snapshot`, `screenshot`, `console`, `network`, an axe-core `audit`, `perf` (long animation frames with per-script attribution), `profile` (one recording with both lenses — a React render profile of slowest commits, hottest components, and unnecessary re-renders, plus a V8/DevTools CPU profile over CDP with the hottest JS functions ranked by self time), and `report` (every signal in one load).
+2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, leave the page, React profiler injected), `eval` (run a Playwright expression with `page` in scope — returns its value), `snapshot` (accessibility tree), and `screenshot`. Add `--profile` to `eval` to record the whole runtime picture while the expression runs — console, network, performance (long animation frames with per-script attribution, LCP, CLS), an axe-core accessibility audit, a React render profile (slowest commits, hottest components, unnecessary re-renders), and a V8/DevTools CPU profile over CDP (hottest JS functions by self time). Run `eval --profile` with no expression to measure the live page as it is.
 
 It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
 
@@ -53,7 +53,7 @@ React Doctor ships its own Model Context Protocol server over stdio so any MCP-c
 npx react-doctor@latest mcp
 ```
 
-It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`, `browser_audit`, `browser_console`, `browser_network`, `browser_perf`, `browser_profile`, `browser_report`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_profile` records both a React render profile and a literal Chrome DevTools CPU profile in one pass.
+It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes a `profile: true` argument that records the whole runtime picture — console, network, performance, accessibility, a React render profile, and a literal Chrome DevTools CPU profile — in one pass while the expression runs.
 
 ## doctor: scan and triage
 
diff --git a/skills/react-doctor/references/debug.md b/skills/react-doctor/references/debug.md
index afd5431e2..26cefe4ef 100644
--- a/skills/react-doctor/references/debug.md
+++ b/skills/react-doctor/references/debug.md
@@ -51,15 +51,14 @@ Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup la
 
 Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
 
-- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser console` and `browser network` hand you the runtime console (with uncaught errors) and the request waterfall with failures flagged, often the evidence you need before instrumenting at all. To get the whole picture in one pass, `browser report` captures console, network, performance, and accessibility in a single page load instead of reloading once per command; prefer it over running the four separately. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
 
 ```bash
 npx react-doctor browser open http://localhost:3000           # attach + open the page
-npx react-doctor browser report http://localhost:3000         # console + network + perf + a11y in one load
-npx react-doctor browser console http://localhost:3000        # console output + uncaught errors
-npx react-doctor browser network http://localhost:3000        # request waterfall, failures flagged
+npx react-doctor browser eval --profile                       # console + network + perf + a11y + React/CPU in one pass
 npx react-doctor browser snapshot                             # what rendered, by role + name
 npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()'
+npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()' --profile  # drive + measure it
 npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
 ```
 
diff --git a/skills/react-doctor/references/design.md b/skills/react-doctor/references/design.md
index 74f50928f..8a6e6a405 100644
--- a/skills/react-doctor/references/design.md
+++ b/skills/react-doctor/references/design.md
@@ -9,10 +9,10 @@ The value here is what a screenshot and the live DOM let you measure that readin
 ```bash
 npx react-doctor browser open http://localhost:3000
 npx react-doctor browser screenshot --out review.png   # what the user actually sees
-npx react-doctor browser audit                          # axe-core: contrast, names, landmarks
+npx react-doctor browser eval --profile                # full picture incl. axe-core a11y: contrast, names, landmarks
 ```
 
-Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, `audit`, or `perf`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
+Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, or `eval`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
 
 ```bash
 npx react-doctor browser screenshot --viewport 390x844 --out mobile.png
@@ -24,7 +24,7 @@ Look at the screenshot, then measure specifics with `eval` (computed styles, bou
 npx react-doctor browser eval 'page.evaluate(() => getComputedStyle(document.querySelector("button")).fontSize)'
 ```
 
-`browser audit` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
+`browser eval --profile` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors in its Accessibility section. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
 
 ## What to check
 
@@ -44,7 +44,7 @@ Then craft, drawing on the bundled design rules:
 
 ## The loop
 
-Build or fix, screenshot, re-audit, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
+Build or fix, screenshot, re-check, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
 
 ## Working rules
 
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
index 01ace8d41..63363d7c0 100644
--- a/skills/react-doctor/references/performance.md
+++ b/skills/react-doctor/references/performance.md
@@ -10,15 +10,16 @@ Why is it slow, and where? Common React causes: unstable callback or object prop
 
 ## 2. Capture (no app changes)
 
-`browser perf` arms the LoAF, LCP, and CLS observers, loads the page, watches briefly past load, then reports the worst frames first with per-script attribution:
+`browser eval --profile` arms every observer (LoAF/LCP/CLS, the React render profiler, and a V8 CPU profiler), runs the expression you pass while it records, then reports the worst frames first with per-script attribution. Drive a fresh load by passing the navigation, or omit the expression to read the page as it is now without reloading:
 
 ```bash
-npx react-doctor browser perf http://localhost:3000   # measures the current page if URL omitted
+npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
+npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The output leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The performance section leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
 
-To attribute interaction jank (a slow click, scroll, or keypress), drive the repro between load and the read: `browser open`, then `browser eval` the interaction, then `browser perf` with no URL. Without a URL it does not reload; it reads the long frames already buffered in the timeline, so the jank from your interaction is included.
+To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
 
 ## 3. Analyze the worst frame first
 
@@ -28,22 +29,16 @@ The output is already sorted worst-first. The script with the largest duration i
 
 ## 4. Zoom into React renders (optional)
 
-When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop:
+When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop — then drive the repro with `browser eval --profile`:
 
 ```bash
 npx react-doctor browser open http://localhost:3000
+npx react-doctor browser eval 'page.getByText("Next").click()' --profile
 ```
 
 For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
 
-The fastest path is `browser profile`: one recording, both lenses. It returns `react` (slowest commits, components that render most/cost the most self time, and the count of unnecessary re-renders — components that re-rendered with nothing they own changed, the memoization candidates) and `cpu` (a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time):
-
-```bash
-npx react-doctor browser profile http://localhost:3000 --interaction 'page.getByText("Next").click()'
-# omit the url to profile a page already opened with `browser open`
-```
-
-The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` (the Playwright `page` is in scope):
+`browser eval --profile` records one pass with both lenses. The `react` lens reports the slowest commits, the components that render most/cost the most self time, and the count of unnecessary re-renders (components that re-rendered with nothing they own changed — the memoization candidates). The `cpu` lens is a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time. The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` without `--profile` (the Playwright `page` is in scope):
 
 ```bash
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
@@ -51,7 +46,7 @@ npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start()
 npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
 ```
 
-Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser profile` computes for you.
+Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser eval --profile` computes for you.
 
 ## 5. Fix, only with proof
 

From d47f33e4571177ea09d49bbd26cbba0153f469cb Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 18:06:57 -0700
Subject: [PATCH 13/38] fix(browser): stop V8 sampling on the error path in
 inspect

When the driven expression or perf measurement throws, the happy-path
Profiler.stop is skipped, and Profiler.disable alone could leave the
persistent page sampling and skew later eval --profile runs. Stop in the
finally too (symmetric with the React profiler), a no-op after a clean run.
---
 packages/browser/src/session.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 2821dbd3f..1830c8a71 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -307,6 +307,11 @@ export class BrowserSession {
         },
       };
     } finally {
+      // Stop V8 sampling before disabling, so a throw before the happy-path
+      // `Profiler.stop` above can't leave the persistent page recording and skew
+      // later runs. A second stop after a clean run just returns an ignored
+      // profile, so this is safe on every path.
+      await cdpSession.send("Profiler.stop").catch(() => {});
       await cdpSession.send("Profiler.disable").catch(() => {});
       await cdpSession.detach().catch(() => {});
       for (const detach of detachers) detach();

From 7f6f088d33ca7c3560fc550bc49fdce2edf3fbb8 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 18:25:14 -0700
Subject: [PATCH 14/38] feat(browser): capture a DevTools timeline trace in
 eval --profile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alongside the CPU/React profiles, eval --profile now records a Chrome
DevTools timeline trace over the same window. It rolls the trace up into the
perf report as forced-reflow cost — total/count/longest for style-recalc,
layout, hit-test, and paint — which the script-level LoAF rows can't isolate
(where getComputedStyle/getBoundingClientRect/elementsFromPoint land). The
raw trace is written to a file (--out, default react-doctor-trace.json) that
loads in the DevTools Performance panel.

Tracing runs on the same CDP session as the Profiler domain (categories
exclude the V8 cpu_profiler to avoid collision), is best-effort (a failed
start just yields an empty timeline and no file), and is stopped on the
error path like the other profilers.
---
 .../react-doctor/references/performance.md    |  2 +
 .changeset/react-browser-debug-skill.md       |  2 +-
 .../browser/src/analyze-timeline-trace.ts     | 50 ++++++++++
 packages/browser/src/constants.ts             | 21 ++++
 packages/browser/src/index.ts                 |  1 +
 packages/browser/src/perf-observer.ts         |  8 +-
 packages/browser/src/session.ts               | 98 +++++++++++++++----
 packages/browser/src/types.ts                 | 33 +++++++
 .../browser/src/utils/write-trace-file.ts     | 10 ++
 .../tests/analyze-timeline-trace.test.ts      | 40 ++++++++
 packages/mcp/src/tools/browser.ts             | 17 +++-
 .../react-doctor/src/cli/commands/browser.ts  | 32 +++++-
 packages/react-doctor/src/cli/index.ts        |  6 +-
 skills/react-doctor/references/performance.md |  2 +
 14 files changed, 293 insertions(+), 29 deletions(-)
 create mode 100644 packages/browser/src/analyze-timeline-trace.ts
 create mode 100644 packages/browser/src/utils/write-trace-file.ts
 create mode 100644 packages/browser/tests/analyze-timeline-trace.test.ts

diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
index 63363d7c0..da3830a3c 100644
--- a/.agents/skills/react-doctor/references/performance.md
+++ b/.agents/skills/react-doctor/references/performance.md
@@ -19,6 +19,8 @@ npx react-doctor browser eval --profile   # measures the current page, no reload
 
 It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The performance section leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
 
+It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
+
 To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
 
 ## 3. Analyze the worst frame first
diff --git a/.changeset/react-browser-debug-skill.md b/.changeset/react-browser-debug-skill.md
index 7b754c07b..9fb3d3858 100644
--- a/.changeset/react-browser-debug-skill.md
+++ b/.changeset/react-browser-debug-skill.md
@@ -2,4 +2,4 @@
 "react-doctor": minor
 ---
 
-Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback): `open` a page, `eval` a Playwright expression, `snapshot` the accessibility tree, and `screenshot`. Adding `--profile` to `eval` records the whole runtime picture in one pass while the expression runs — console, network, performance traces (long animation frames with per-script attribution, LCP, CLS), an axe-core accessibility audit, a React render profile (slowest commits, hottest components by self time, unnecessary re-render counts), and a Chrome DevTools CPU profile via V8's sampling profiler over CDP (the hottest JS functions ranked by self time). `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools (`browser_eval` takes a `profile: true` argument that captures every signal together), and the `debug_*` log server directly.
+Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback): `open` a page, `eval` a Playwright expression, `snapshot` the accessibility tree, and `screenshot`. Adding `--profile` to `eval` records the whole runtime picture in one pass while the expression runs — console, network, performance (long animation frames with per-script attribution, LCP, CLS, plus a DevTools timeline roll-up of forced style-recalc/layout/hit-test/paint cost), an axe-core accessibility audit, a React render profile (slowest commits, hottest components by self time, unnecessary re-render counts), and a Chrome DevTools CPU profile via V8's sampling profiler over CDP (the hottest JS functions ranked by self time). It also writes the raw DevTools timeline trace to a file (`--out`, default `react-doctor-trace.json`) that loads in the DevTools Performance panel. `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools (`browser_eval` takes a `profile: true` argument that captures every signal together), and the `debug_*` log server directly.
diff --git a/packages/browser/src/analyze-timeline-trace.ts b/packages/browser/src/analyze-timeline-trace.ts
new file mode 100644
index 000000000..e43aa3c3c
--- /dev/null
+++ b/packages/browser/src/analyze-timeline-trace.ts
@@ -0,0 +1,50 @@
+import type { TimelineAnalysis, TimelinePhaseStat } from "./types.js";
+import { roundToHundredths } from "./utils/round.js";
+
+// CDP types trace events loosely (string maps), so we narrow `name`/`dur`
+// ourselves. Complete events (`ph: "X"`) carry a microsecond `dur`; the rest of
+// the event shape is written to the trace file verbatim but ignored here.
+interface TraceEvent {
+  name?: unknown;
+  dur?: unknown;
+}
+
+// Trace event names that represent a forced/scheduled reflow phase. `Layout` and
+// `UpdateLayoutTree` (style recalc) are the cost of reading layout on a dirty
+// page; `HitTest` is what `elementsFromPoint` triggers; `Paint` follows both.
+const PHASE_BY_EVENT_NAME: Record<string, keyof TimelineAnalysis> = {
+  UpdateLayoutTree: "styleRecalc",
+  RecalculateStyles: "styleRecalc",
+  Layout: "layout",
+  HitTest: "hitTest",
+  Paint: "paint",
+};
+
+const emptyPhase = (): TimelinePhaseStat => ({ totalMs: 0, count: 0, longestMs: 0 });
+
+// Roll a Chrome DevTools timeline trace up into per-phase wall time, so the
+// native style/layout/hit-test cost a forced reflow incurs is a number in the
+// perf report rather than something you can only see in the trace file.
+export const analyzeTimelineTrace = (events: TraceEvent[]): TimelineAnalysis => {
+  const phases: TimelineAnalysis = {
+    styleRecalc: emptyPhase(),
+    layout: emptyPhase(),
+    hitTest: emptyPhase(),
+    paint: emptyPhase(),
+  };
+  for (const event of events) {
+    if (typeof event.name !== "string" || typeof event.dur !== "number") continue;
+    const phaseKey = PHASE_BY_EVENT_NAME[event.name];
+    if (!phaseKey) continue;
+    const durationMs = event.dur / 1000;
+    const phase = phases[phaseKey];
+    phase.totalMs += durationMs;
+    phase.count += 1;
+    if (durationMs > phase.longestMs) phase.longestMs = durationMs;
+  }
+  for (const phase of Object.values(phases)) {
+    phase.totalMs = roundToHundredths(phase.totalMs);
+    phase.longestMs = roundToHundredths(phase.longestMs);
+  }
+  return phases;
+};
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 0fc8a2a5f..346fb8356 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -53,6 +53,27 @@ export const MAX_COMMIT_COMPONENTS = 8;
 // V8 CPU profiler sampling interval, matching Chrome DevTools' default (100us).
 export const DEFAULT_CPU_SAMPLING_INTERVAL_US = 100;
 
+// Trace categories for the timeline recording captured alongside the CPU profile.
+// `-*` drops everything, then we opt into the DevTools timeline events
+// (style/layout/hit-test/paint, with their triggering JS stacks) — but NOT
+// `disabled-by-default-v8.cpu_profiler`, which would collide with the Profiler
+// domain we already run for the CPU analysis. The result loads in the DevTools
+// Performance panel and carries the forced-reflow events we roll up.
+export const TIMELINE_TRACE_CATEGORIES = [
+  "-*",
+  "devtools.timeline",
+  "disabled-by-default-devtools.timeline",
+  "disabled-by-default-devtools.timeline.frame",
+  "disabled-by-default-devtools.timeline.stack",
+  "blink.user_timing",
+  "latencyInfo",
+  "loading",
+  "toplevel",
+].join(",");
+
+// Default file the raw timeline trace is written to (in the working directory).
+export const DEFAULT_TRACE_FILENAME = "react-doctor-trace.json";
+
 // Functions returned inline by a CPU profile analysis, ranked by self time.
 export const MAX_PROFILE_FUNCTIONS = 20;
 
diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts
index ae35d96d9..a19143c06 100644
--- a/packages/browser/src/index.ts
+++ b/packages/browser/src/index.ts
@@ -3,4 +3,5 @@ export { connectToBrowser } from "./connect.js";
 export type { BrowserConnection } from "./connect.js";
 export { parseViewport } from "./parse-viewport.js";
 export { formatEvalValue } from "./utils/format-eval-value.js";
+export { DEFAULT_TRACE_FILENAME } from "./constants.js";
 export type * from "./types.js";
diff --git a/packages/browser/src/perf-observer.ts b/packages/browser/src/perf-observer.ts
index 1fd83fac4..fb4c98faa 100644
--- a/packages/browser/src/perf-observer.ts
+++ b/packages/browser/src/perf-observer.ts
@@ -1,4 +1,4 @@
-import type { PerformanceReport } from "./types.js";
+import type { PageVitals } from "./types.js";
 
 // Runs in the page (via evaluate) and resolves after `windowMs`. Installs fresh
 // LoAF / LCP / CLS observers with `buffered: true`, so frames already in the
@@ -12,7 +12,7 @@ import type { PerformanceReport } from "./types.js";
 // anything at or below it: the first run after an interaction still captures its
 // frames, a second run sees only what fired since. LoAF fields are not in
 // lib.dom, so the casts here are unavoidable.
-export const collectPerformanceReport = (windowMs: number): Promise<PerformanceReport> => {
+export const collectPerformanceReport = (windowMs: number): Promise<PageVitals> => {
   interface ScriptTiming {
     sourceURL?: string;
     sourceFunctionName?: string;
@@ -31,7 +31,7 @@ export const collectPerformanceReport = (windowMs: number): Promise<PerformanceR
     hadRecentInput: boolean;
   }
   interface MutableReport {
-    longAnimationFrames: PerformanceReport["longAnimationFrames"];
+    longAnimationFrames: PageVitals["longAnimationFrames"];
     largestContentfulPaintMs: number | null;
     cumulativeLayoutShift: number;
   }
@@ -42,7 +42,7 @@ export const collectPerformanceReport = (windowMs: number): Promise<PerformanceR
   }
   const WATERMARK_KEY = "__REACT_DOCTOR_PERF_WATERMARK__";
 
-  return new Promise<PerformanceReport>((resolve) => {
+  return new Promise<PageVitals>((resolve) => {
     const report: MutableReport = {
       longAnimationFrames: [],
       largestContentfulPaintMs: null,
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 1830c8a71..27f1319e3 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -3,6 +3,7 @@ import { fileURLToPath } from "node:url";
 import type { Browser, CDPSession, ConsoleMessage, Page, Request, Response } from "playwright-core";
 import { connectToBrowser, type BrowserConnection } from "./connect.js";
 import { analyzeCpuProfile } from "./analyze-cpu-profile.js";
+import { analyzeTimelineTrace } from "./analyze-timeline-trace.js";
 import {
   DEFAULT_CPU_SAMPLING_INTERVAL_US,
   MAX_VIOLATION_TARGETS,
@@ -10,26 +11,36 @@ import {
   PERFORMANCE_OBSERVE_WINDOW_MS,
   REACT_PROFILER_INJECT_FILE,
   SETTLE_TIMEOUT_MS,
+  TIMELINE_TRACE_CATEGORIES,
 } from "./constants.js";
 import { collectPerformanceReport } from "./perf-observer.js";
+import { writeTraceFile } from "./utils/write-trace-file.js";
 import { analyzeReactProfile } from "./react-profiler/analyze-profile.js";
 import type { ReactProfilerDataExport } from "./react-profiler/types/profiling-export.js";
 import type {
   AccessibilityViolation,
   BrowserConnectOptions,
   ConsoleMessageEntry,
+  InspectOptions,
   NetworkRequestEntry,
   PageInspection,
-  PerformanceReport,
+  PageVitals,
   Viewport,
 } from "./types.js";
 
-const emptyPerformanceReport = (): PerformanceReport => ({
+const emptyVitals = (): PageVitals => ({
   longAnimationFrames: [],
   largestContentfulPaintMs: null,
   cumulativeLayoutShift: 0,
 });
 
+// A Chrome DevTools trace event as it streams over CDP (loosely typed there as a
+// string map). The full record — every field — is written to the trace file; the
+// roll-up only reads `name`/`dur`, which it narrows itself.
+interface TraceEventRecord {
+  [key: string]: unknown;
+}
+
 const resolveActivePage = async (browser: Browser): Promise<Page> => {
   for (const context of browser.contexts()) {
     const [firstPage] = context.pages();
@@ -232,25 +243,66 @@ export class BrowserSession {
   // A per-page watermark inside collectPerformanceReport keeps a repeated
   // no-reload measurement from re-counting frames an earlier command already
   // reported on the same persistent page.
-  private measureCurrentPerformance(): Promise<PerformanceReport> {
+  private measureCurrentPerformance(): Promise<PageVitals> {
     return this.page.evaluate(collectPerformanceReport, PERFORMANCE_OBSERVE_WINDOW_MS);
   }
 
-  // Drive the current page (optionally running `expression` — the same Playwright
-  // code `evaluate` takes) while recording the whole runtime picture in one pass:
-  // console + network listeners, a V8 CPU profile (the literal Chrome DevTools
-  // profiler over CDP), the React DevTools render profile, page performance, and
-  // an accessibility audit. This never navigates on its own — drive a fresh load
-  // with `inspect("page.goto('...')")`, or `open` a URL first then inspect an
-  // action on it. React data is null on a production build or a page not opened
-  // with the profiler; it covers the driven action, not the initial mount.
-  async inspect(expression?: string): Promise<PageInspection> {
+  // Begin a best-effort DevTools timeline trace on the CDP session, returning a
+  // `stop()` that resolves the collected events (empty if tracing never started),
+  // so the caller can bracket exactly the recording window. Runs alongside the
+  // Profiler domain — the categories deliberately exclude the V8 CPU profiler so
+  // the two don't collide.
+  private async startTimelineTrace(
+    cdpSession: CDPSession,
+  ): Promise<() => Promise<TraceEventRecord[]>> {
+    const events: TraceEventRecord[] = [];
+    const onData = (payload: { value?: TraceEventRecord[] }): void => {
+      if (payload.value) events.push(...payload.value);
+    };
+    cdpSession.on("Tracing.dataCollected", onData);
+    const started = await cdpSession
+      .send("Tracing.start", {
+        categories: TIMELINE_TRACE_CATEGORIES,
+        transferMode: "ReportEvents",
+      })
+      .then(() => true)
+      .catch(() => false);
+    if (!started) {
+      cdpSession.off("Tracing.dataCollected", onData);
+      return async () => [];
+    }
+    let stopped = false;
+    return async () => {
+      if (stopped) return events;
+      stopped = true;
+      await new Promise<void>((resolve) => {
+        cdpSession.once("Tracing.tracingComplete", () => resolve());
+        cdpSession.send("Tracing.end").catch(() => resolve());
+      });
+      cdpSession.off("Tracing.dataCollected", onData);
+      return events;
+    };
+  }
+
+  // Drive the current page (optionally running `options.expression` — the same
+  // Playwright code `evaluate` takes) while recording the whole runtime picture
+  // in one pass: console + network listeners, a V8 CPU profile (the literal
+  // Chrome DevTools profiler over CDP), a DevTools timeline trace (style/layout/
+  // hit-test cost, written to `options.tracePath` and rolled up into the perf
+  // report), the React DevTools render profile, page performance, and an
+  // accessibility audit. This never navigates on its own — drive a fresh load
+  // with `inspect({ expression: "page.goto('...')" })`, or `open` a URL first
+  // then inspect an action on it. React data is null on a production build or a
+  // page not opened with the profiler; it covers the driven action, not mount.
+  async inspect(options: InspectOptions = {}): Promise<PageInspection> {
+    const { expression, tracePath } = options;
     const consoleEntries: ConsoleMessageEntry[] = [];
     const networkByRequest = new Map<Request, NetworkRequestEntry>();
     // Open the CDP session before attaching listeners: if `newCDPSession` throws,
     // the listeners are never bound, so they can't leak onto the persistent page.
     const cdpSession = await this.page.context().newCDPSession(this.page);
     const detachers: Array<() => void> = [];
+    let stopTimelineTrace: (() => Promise<TraceEventRecord[]>) | null = null;
     try {
       detachers.push(this.collectConsole(consoleEntries), this.collectNetwork(networkByRequest));
       await this.settle();
@@ -259,6 +311,7 @@ export class BrowserSession {
         interval: DEFAULT_CPU_SAMPLING_INTERVAL_US,
       });
       await cdpSession.send("Profiler.start");
+      stopTimelineTrace = await this.startTimelineTrace(cdpSession);
 
       const reactStarted = await this.page.evaluate(() => {
         if (!globalThis.__REACT_PERF__) return false;
@@ -267,14 +320,14 @@ export class BrowserSession {
       });
 
       let result: unknown = null;
-      let performance = emptyPerformanceReport();
+      let vitals = emptyVitals();
       let reactExport: ReactProfilerDataExport | null = null;
       try {
         if (expression) result = (await this.evaluate(expression)) ?? null;
         // The perf observe window doubles as the recording window: it runs after
         // the driven action so post-action jank, React commits (concurrent
         // renders land async), and CPU samples all land before we stop.
-        performance = await this.measureCurrentPerformance();
+        vitals = await this.measureCurrentPerformance();
       } finally {
         // Always stop the React profiler, even if the expression threw: the
         // renderer profiles the persistent page, and `start()` no-ops while
@@ -287,8 +340,13 @@ export class BrowserSession {
         }
       }
 
+      const traceEvents = await stopTimelineTrace();
+      stopTimelineTrace = null;
       const { profile } = await cdpSession.send("Profiler.stop");
 
+      const writtenTracePath =
+        tracePath && traceEvents.length > 0 ? await writeTraceFile(tracePath, traceEvents) : null;
+
       // Detach the page listeners before the accessibility audit so axe's injected
       // evaluate (and anything it logs) can't land in the captured signals.
       for (const detach of detachers) detach();
@@ -299,18 +357,20 @@ export class BrowserSession {
         result,
         console: consoleEntries,
         network: [...networkByRequest.values()],
-        performance,
+        performance: { ...vitals, timeline: analyzeTimelineTrace(traceEvents) },
         accessibility,
+        tracePath: writtenTracePath,
         profile: {
           react: reactExport ? analyzeReactProfile(reactExport) : null,
           cpu: analyzeCpuProfile(profile),
         },
       };
     } finally {
-      // Stop V8 sampling before disabling, so a throw before the happy-path
-      // `Profiler.stop` above can't leave the persistent page recording and skew
-      // later runs. A second stop after a clean run just returns an ignored
-      // profile, so this is safe on every path.
+      // Stop the timeline trace and V8 sampling before disabling, so a throw
+      // before the happy-path stops above can't leave the persistent page
+      // recording and skew later runs. A second stop after a clean run is a
+      // no-op / ignored, so this is safe on every path.
+      if (stopTimelineTrace) await stopTimelineTrace().catch(() => []);
       await cdpSession.send("Profiler.stop").catch(() => {});
       await cdpSession.send("Profiler.disable").catch(() => {});
       await cdpSession.detach().catch(() => {});
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
index d7f5377fe..9961cc7b5 100644
--- a/packages/browser/src/types.ts
+++ b/packages/browser/src/types.ts
@@ -48,10 +48,41 @@ export interface LongAnimationFrame {
   scripts: PerformanceScriptAttribution[];
 }
 
+// One timeline phase rolled up from the Chrome DevTools trace: how much wall
+// time the page spent in it during the recording, and the single longest event.
+export interface TimelinePhaseStat {
+  totalMs: number;
+  count: number;
+  longestMs: number;
+}
+
+// Forced reflows show up in the timeline trace as style-recalc / layout /
+// hit-test events; this is where `getComputedStyle` / `getBoundingClientRect` /
+// `elementsFromPoint` cost lands, which the script-level LoAF rows can't isolate.
+export interface TimelineAnalysis {
+  styleRecalc: TimelinePhaseStat;
+  layout: TimelinePhaseStat;
+  hitTest: TimelinePhaseStat;
+  paint: TimelinePhaseStat;
+}
+
 export interface PerformanceReport {
   longAnimationFrames: LongAnimationFrame[];
   largestContentfulPaintMs: number | null;
   cumulativeLayoutShift: number;
+  timeline: TimelineAnalysis;
+}
+
+// Everything `inspect` observes that the page itself reports (LoAF / LCP / CLS),
+// before the trace-derived `timeline` is folded in to form the PerformanceReport.
+export type PageVitals = Omit<PerformanceReport, "timeline">;
+
+export interface InspectOptions {
+  // Async expression with the Playwright `page` in scope, driven while recording.
+  expression?: string;
+  // Where to write the raw Chrome DevTools timeline trace (loadable in the
+  // DevTools Performance panel). Omit to skip writing the file.
+  tracePath?: string;
 }
 
 // The full runtime picture from one `inspect` pass: the driven expression's
@@ -63,6 +94,8 @@ export interface PageInspection {
   network: NetworkRequestEntry[];
   performance: PerformanceReport;
   accessibility: AccessibilityViolation[];
+  // Absolute path the raw timeline trace was written to, or null when none was.
+  tracePath: string | null;
   profile: ProfileAnalysis;
 }
 
diff --git a/packages/browser/src/utils/write-trace-file.ts b/packages/browser/src/utils/write-trace-file.ts
new file mode 100644
index 000000000..c3ca44b1f
--- /dev/null
+++ b/packages/browser/src/utils/write-trace-file.ts
@@ -0,0 +1,10 @@
+import { writeFile } from "node:fs/promises";
+import { resolve } from "node:path";
+
+// Write raw Chrome DevTools trace events to `path` in the `{ traceEvents }`
+// shape the DevTools Performance panel imports, returning the absolute path.
+export const writeTraceFile = async (path: string, traceEvents: unknown[]): Promise<string> => {
+  const absolutePath = resolve(path);
+  await writeFile(absolutePath, JSON.stringify({ traceEvents }));
+  return absolutePath;
+};
diff --git a/packages/browser/tests/analyze-timeline-trace.test.ts b/packages/browser/tests/analyze-timeline-trace.test.ts
new file mode 100644
index 000000000..45f8a8d22
--- /dev/null
+++ b/packages/browser/tests/analyze-timeline-trace.test.ts
@@ -0,0 +1,40 @@
+import { describe, expect, it } from "vite-plus/test";
+import { analyzeTimelineTrace } from "../src/analyze-timeline-trace.js";
+
+describe("analyzeTimelineTrace", () => {
+  it("rolls up duration, count, and longest per phase (microseconds to ms)", () => {
+    const timeline = analyzeTimelineTrace([
+      { name: "UpdateLayoutTree", dur: 60_000 },
+      { name: "UpdateLayoutTree", dur: 25_500 },
+      { name: "Layout", dur: 40_000 },
+      { name: "HitTest", dur: 68_000 },
+      { name: "Paint", dur: 5_000 },
+    ]);
+
+    expect(timeline.styleRecalc).toEqual({ totalMs: 85.5, count: 2, longestMs: 60 });
+    expect(timeline.layout).toEqual({ totalMs: 40, count: 1, longestMs: 40 });
+    expect(timeline.hitTest).toEqual({ totalMs: 68, count: 1, longestMs: 68 });
+    expect(timeline.paint).toEqual({ totalMs: 5, count: 1, longestMs: 5 });
+  });
+
+  it("ignores unrelated events and entries without a numeric duration", () => {
+    const timeline = analyzeTimelineTrace([
+      { name: "RunTask", dur: 99_000 },
+      { name: "Layout" },
+      { name: "Layout", dur: "nope" },
+      { name: 42, dur: 10_000 },
+    ]);
+
+    expect(timeline.layout).toEqual({ totalMs: 0, count: 0, longestMs: 0 });
+  });
+
+  it("returns a fully-zeroed analysis for an empty trace", () => {
+    const timeline = analyzeTimelineTrace([]);
+    expect(timeline).toEqual({
+      styleRecalc: { totalMs: 0, count: 0, longestMs: 0 },
+      layout: { totalMs: 0, count: 0, longestMs: 0 },
+      hitTest: { totalMs: 0, count: 0, longestMs: 0 },
+      paint: { totalMs: 0, count: 0, longestMs: 0 },
+    });
+  });
+});
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index 71e172769..92bc3c20f 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -1,6 +1,6 @@
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import { formatEvalValue, parseViewport } from "@react-doctor/browser";
+import { DEFAULT_TRACE_FILENAME, formatEvalValue, parseViewport } from "@react-doctor/browser";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
@@ -71,7 +71,13 @@ export const registerBrowserTools = (server: McpServer): void => {
           .boolean()
           .optional()
           .describe(
-            "Set true to record and return the full runtime picture while the expression runs — console, network, performance (jank/LCP/CLS), accessibility, the React render profile (slow commits, hot components, unnecessary re-renders), and a V8 CPU profile. Omit for just the expression's return value.",
+            "Set true to record and return the full runtime picture while the expression runs — console, network, performance (LoAF jank/LCP/CLS plus a `timeline` roll-up of forced style-recalc/layout/hit-test/paint cost from a DevTools trace), accessibility, the React render profile (slow commits, hot components, unnecessary re-renders), and a V8 CPU profile. Also writes the raw timeline trace to `out` (loadable in DevTools) and returns its path as `tracePath`. Omit for just the expression's return value.",
+          ),
+        out: z
+          .string()
+          .optional()
+          .describe(
+            `With profile:true, write the raw DevTools timeline trace here (default ${DEFAULT_TRACE_FILENAME} in the working directory)`,
           ),
         ...connectionShape,
         ...viewportShape,
@@ -82,7 +88,12 @@ export const registerBrowserTools = (server: McpServer): void => {
       runTool(async () => {
         if (args.profile) {
           return jsonResult(
-            await withSession(toConnection(args), (session) => session.inspect(args.expression)),
+            await withSession(toConnection(args), (session) =>
+              session.inspect({
+                expression: args.expression,
+                tracePath: args.out ?? DEFAULT_TRACE_FILENAME,
+              }),
+            ),
           );
         }
         if (args.expression === undefined) return textResult("(no value)");
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 360e5dceb..086b06200 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -1,5 +1,6 @@
 import {
   BrowserSession,
+  DEFAULT_TRACE_FILENAME,
   formatEvalValue,
   type AccessibilityViolation,
   type ConsoleMessageEntry,
@@ -8,6 +9,8 @@ import {
   type PageInspection,
   type PerformanceReport,
   type ReactProfileAnalysis,
+  type TimelineAnalysis,
+  type TimelinePhaseStat,
   type Viewport,
 } from "@react-doctor/browser";
 import { DEFAULT_SCREENSHOT_FILENAME, METRIC } from "../utils/constants.js";
@@ -63,8 +66,9 @@ export const browserEvalAction = async (
 ): Promise<void> => {
   recordCount(METRIC.cliInvoked, 1, { command: "browser.eval" });
   if (options.profile) {
+    const tracePath = options.out ?? DEFAULT_TRACE_FILENAME;
     await withSession(options, async (session) => {
-      printInspection(await session.inspect(expression));
+      printInspection(await session.inspect({ expression, tracePath }));
     });
     return;
   }
@@ -130,6 +134,7 @@ const printNetworkRequests = (requests: NetworkRequestEntry[]): void => {
 const printPerformanceReport = (report: PerformanceReport): void => {
   const lcp = report.largestContentfulPaintMs;
   logger.log(`LCP: ${lcp === null ? "n/a" : `${lcp}ms`}   CLS: ${report.cumulativeLayoutShift}`);
+  printTimelineAnalysis(report.timeline);
   if (report.longAnimationFrames.length === 0) {
     logger.log("No long animation frames (>50ms) — no main-thread jank captured");
     return;
@@ -150,6 +155,25 @@ const printPerformanceReport = (report: PerformanceReport): void => {
   }
 };
 
+// Trace-derived forced-reflow cost: each phase is the native style/layout/
+// hit-test/paint wall time the recording spent, naming where reads on a dirty
+// page land (getComputedStyle/getBoundingClientRect → style-recalc/layout;
+// elementsFromPoint → hit-test). Phases with no events are dropped.
+const printTimelineAnalysis = (timeline: TimelineAnalysis): void => {
+  const phases: Array<[string, TimelinePhaseStat]> = [
+    ["style-recalc", timeline.styleRecalc],
+    ["layout", timeline.layout],
+    ["hit-test", timeline.hitTest],
+    ["paint", timeline.paint],
+  ];
+  const recorded = phases.filter(([, stat]) => stat.count > 0);
+  if (recorded.length === 0) return;
+  logger.log("Timeline (trace), forced-reflow cost:");
+  for (const [label, stat] of recorded) {
+    logger.log(`  ${label}: ${stat.totalMs}ms across ${stat.count} (longest ${stat.longestMs}ms)`);
+  }
+};
+
 const printReactProfile = (analysis: ReactProfileAnalysis): void => {
   logger.log(
     `${analysis.commitCount} commit(s) across ${analysis.rootCount} root(s), ${analysis.totalCommitDurationMs}ms total render time, ${analysis.unnecessaryRenderCount} unnecessary render(s)`,
@@ -220,4 +244,10 @@ const printInspection = (inspection: PageInspection): void => {
 
   logger.log("\n# CPU");
   printCpuProfile(inspection.profile.cpu);
+
+  if (inspection.tracePath) {
+    logger.log(
+      `\nTimeline trace written to ${inspection.tracePath} (load in DevTools → Performance).`,
+    );
+  }
 };
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index 7071f5027..3a5e2a01f 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -269,7 +269,11 @@ withRenderOptions(
     )
     .option(
       "--profile",
-      "record console, network, performance, accessibility, and the React + CPU profiles while the expression runs (omit the expression to measure the live page idle)",
+      "record console, network, performance (incl. a DevTools timeline trace), accessibility, and the React + CPU profiles while the expression runs (omit the expression to measure the live page idle)",
+    )
+    .option(
+      "--out <path>",
+      "with --profile, write the raw timeline trace here for DevTools (default react-doctor-trace.json)",
     ),
 ).action(browserEvalAction);
 
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
index 63363d7c0..da3830a3c 100644
--- a/skills/react-doctor/references/performance.md
+++ b/skills/react-doctor/references/performance.md
@@ -19,6 +19,8 @@ npx react-doctor browser eval --profile   # measures the current page, no reload
 
 It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The performance section leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
 
+It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
+
 To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
 
 ## 3. Analyze the worst frame first

From 08c8da4aa513c7bf190bcdee12d733bb6ab5ba5f Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 19:05:44 -0700
Subject: [PATCH 15/38] fix(browser): harden inspect from dogfooding eval
 --profile

Surfaced testing eval --profile against a 40k-element page:

- The launch fallback reused the default debug port even when another
  app held it, dooming the spawn. Launch on a free port when the
  default is taken and persist the endpoint so later commands reattach.
- The long-animation-frame report ranked by total duration, surfacing
  multi-second non-blocking frames over real jank. Rank by blocking
  time and drop non-blocking frames (the standardized jank signal).
- The React profiler export serialized in-page while the V8 CPU
  profiler was still sampling, polluting the CPU profile with our own
  overhead. Stop the CPU profiler and timeline trace before the export.
---
 .../react-doctor/references/performance.md    |  2 +-
 packages/browser/src/connect.ts               | 80 ++++++++++++++-----
 packages/browser/src/constants.ts             | 15 +++-
 packages/browser/src/perf-observer.ts         | 11 ++-
 packages/browser/src/session.ts               | 35 +++++---
 .../browser/src/utils/find-available-port.ts  | 15 ++++
 .../browser/src/utils/is-port-available.ts    | 14 ++++
 .../src/utils/read-launched-endpoint.ts       | 14 ++++
 .../src/utils/write-launched-endpoint.ts      | 13 +++
 .../react-doctor/src/cli/commands/browser.ts  |  6 +-
 skills/react-doctor/references/performance.md |  2 +-
 11 files changed, 166 insertions(+), 41 deletions(-)
 create mode 100644 packages/browser/src/utils/find-available-port.ts
 create mode 100644 packages/browser/src/utils/is-port-available.ts
 create mode 100644 packages/browser/src/utils/read-launched-endpoint.ts
 create mode 100644 packages/browser/src/utils/write-launched-endpoint.ts

diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
index da3830a3c..49829b303 100644
--- a/.agents/skills/react-doctor/references/performance.md
+++ b/.agents/skills/react-doctor/references/performance.md
@@ -17,7 +17,7 @@ npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
 npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The performance section leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one (which lands on a free port automatically if 9222 is taken, and later commands reattach to it). The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
 
 It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
 
diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
index b6fb6b24c..d85a2f366 100644
--- a/packages/browser/src/connect.ts
+++ b/packages/browser/src/connect.ts
@@ -3,14 +3,32 @@ import { CONNECT_TIMEOUT_MS, DEFAULT_CDP_ENDPOINT } from "./constants.js";
 import { launchPersistentChrome } from "./launch.js";
 import type { BrowserConnectOptions } from "./types.js";
 import { cdpPortFromEndpoint } from "./utils/cdp-port.js";
+import { findAvailablePort } from "./utils/find-available-port.js";
 import { isLoopbackEndpoint } from "./utils/is-loopback-endpoint.js";
+import { isPortAvailable } from "./utils/is-port-available.js";
 import { loadPlaywright } from "./utils/load-playwright.js";
+import { readLaunchedEndpoint } from "./utils/read-launched-endpoint.js";
+import { writeLaunchedEndpoint } from "./utils/write-launched-endpoint.js";
 
 export interface BrowserConnection {
   browser: Browser;
   launched: boolean;
 }
 
+// The endpoint to launch our own Chrome on. The default port is often held by
+// another app (some Chromium-based browsers squat on 9222), and reusing a port
+// we just failed to attach to is what doomed the launch — so when it isn't free,
+// pick one that is. An explicit --cdp is honored exactly: the user asked for that
+// port, so a busy one should surface as a clear failure, not move silently.
+const resolveLaunchEndpoint = async (endpoint: string): Promise<string> => {
+  const port = Number(cdpPortFromEndpoint(endpoint));
+  if (await isPortAvailable(port)) return endpoint;
+  const freePort = await findAvailablePort();
+  const url = new URL(endpoint);
+  url.port = String(freePort);
+  return url.origin;
+};
+
 // Attach to a debuggable Chrome over CDP. If none is reachable on a local
 // endpoint, launch our own persistent, reattachable instance and attach to
 // that. We always end up attached over CDP — never holding a launched process
@@ -19,29 +37,49 @@ export interface BrowserConnection {
 export const connectToBrowser = async (
   options: BrowserConnectOptions = {},
 ): Promise<BrowserConnection> => {
-  const endpoint = options.cdpEndpoint ?? DEFAULT_CDP_ENDPOINT;
   const { chromium } = await loadPlaywright();
-  try {
-    const browser = await chromium.connectOverCDP(endpoint, { timeout: CONNECT_TIMEOUT_MS });
-    return { browser, launched: false };
-  } catch (attachError) {
-    // Only launch for a loopback endpoint — we can't spawn Chrome on a remote host.
-    if (options.launch === false || !isLoopbackEndpoint(endpoint)) {
-      throw new Error(
-        `Could not attach to Chrome at ${endpoint}. Start Chrome with --remote-debugging-port=${cdpPortFromEndpoint(endpoint)}, or allow launching a local browser.`,
-        { cause: attachError },
-      );
-    }
-    const reachableEndpoint = await launchPersistentChrome(endpoint);
+
+  // Without an explicit --cdp, prefer the instance we previously launched (which
+  // may be on a non-default port) before the well-known default.
+  const launchedEndpoint = readLaunchedEndpoint();
+  const attachCandidates = options.cdpEndpoint
+    ? [options.cdpEndpoint]
+    : launchedEndpoint && launchedEndpoint !== DEFAULT_CDP_ENDPOINT
+      ? [launchedEndpoint, DEFAULT_CDP_ENDPOINT]
+      : [DEFAULT_CDP_ENDPOINT];
+
+  let lastAttachError: unknown;
+  for (const candidate of attachCandidates) {
     try {
-      return {
-        browser: await chromium.connectOverCDP(reachableEndpoint, { timeout: CONNECT_TIMEOUT_MS }),
-        launched: true,
-      };
-    } catch (launchedAttachError) {
-      throw new Error(`Launched Chrome at ${reachableEndpoint} but could not attach to it.`, {
-        cause: launchedAttachError,
-      });
+      const browser = await chromium.connectOverCDP(candidate, { timeout: CONNECT_TIMEOUT_MS });
+      return { browser, launched: false };
+    } catch (attachError) {
+      lastAttachError = attachError;
     }
   }
+
+  const fallbackEndpoint = options.cdpEndpoint ?? DEFAULT_CDP_ENDPOINT;
+  // Only launch for a loopback endpoint — we can't spawn Chrome on a remote host.
+  if (options.launch === false || !isLoopbackEndpoint(fallbackEndpoint)) {
+    throw new Error(
+      `Could not attach to Chrome at ${fallbackEndpoint}. Start Chrome with --remote-debugging-port=${cdpPortFromEndpoint(fallbackEndpoint)}, or allow launching a local browser.`,
+      { cause: lastAttachError },
+    );
+  }
+
+  const launchEndpoint = options.cdpEndpoint
+    ? options.cdpEndpoint
+    : await resolveLaunchEndpoint(fallbackEndpoint);
+  const reachableEndpoint = await launchPersistentChrome(launchEndpoint);
+  writeLaunchedEndpoint(reachableEndpoint);
+  try {
+    return {
+      browser: await chromium.connectOverCDP(reachableEndpoint, { timeout: CONNECT_TIMEOUT_MS }),
+      launched: true,
+    };
+  } catch (launchedAttachError) {
+    throw new Error(`Launched Chrome at ${reachableEndpoint} but could not attach to it.`, {
+      cause: launchedAttachError,
+    });
+  }
 };
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 346fb8356..7f9059036 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -16,18 +16,27 @@ export const NAVIGATION_TIMEOUT_MS = 30_000;
 // polling, analytics) hits this cap and we proceed anyway.
 export const SETTLE_TIMEOUT_MS = 10_000;
 
+export const REACT_DOCTOR_CACHE_DIRECTORY = join(homedir(), ".cache", "react-doctor");
+
 // Dedicated Chrome profile for the browser we launch ourselves. Mirrors how
 // Chrome DevTools MCP keeps a persistent profile out of the user's real one, so
 // our launched instance is reattachable across commands and never touches their
 // main browsing data. (Chrome also refuses --remote-debugging-port on the
 // default profile, so a dedicated dir is required regardless.)
 export const LAUNCHED_CHROME_PROFILE_DIRECTORY = join(
-  homedir(),
-  ".cache",
-  "react-doctor",
+  REACT_DOCTOR_CACHE_DIRECTORY,
   "chrome-profile",
 );
 
+// Where we remember the endpoint of the Chrome we launched. The default port may
+// be taken by another app, so the launch can land on a free port instead; the
+// next command reads this to reattach to that same persistent instance before
+// falling back to the well-known default.
+export const LAUNCHED_CHROME_ENDPOINT_FILE = join(
+  REACT_DOCTOR_CACHE_DIRECTORY,
+  "launched-endpoint",
+);
+
 export const LAUNCH_READY_TIMEOUT_MS = 20_000;
 export const LAUNCH_POLL_INTERVAL_MS = 100;
 
diff --git a/packages/browser/src/perf-observer.ts b/packages/browser/src/perf-observer.ts
index fb4c98faa..e25849516 100644
--- a/packages/browser/src/perf-observer.ts
+++ b/packages/browser/src/perf-observer.ts
@@ -105,9 +105,14 @@ export const collectPerformanceReport = (windowMs: number): Promise<PageVitals>
       for (const observer of observers) observer.disconnect();
       windowScope[WATERMARK_KEY] = nextWatermark;
       resolve({
-        longAnimationFrames: report.longAnimationFrames.sort(
-          (left, right) => right.durationMs - left.durationMs,
-        ),
+        // Blocking duration — not total duration — is the jank signal: a long
+        // frame that blocks nothing (an idle/backgrounded render, the first
+        // frame after navigation) isn't main-thread jank, and ranking by total
+        // duration buries the frames that actually stalled input behind those
+        // artifacts. Drop the non-blocking frames and rank by what blocked.
+        longAnimationFrames: report.longAnimationFrames
+          .filter((frame) => frame.blockingDurationMs > 0)
+          .sort((left, right) => right.blockingDurationMs - left.blockingDurationMs),
         largestContentfulPaintMs: report.largestContentfulPaintMs,
         cumulativeLayoutShift: Math.round(report.cumulativeLayoutShift * 1000) / 1000,
       });
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 27f1319e3..1b539ec51 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -2,7 +2,7 @@ import { readFile } from "node:fs/promises";
 import { fileURLToPath } from "node:url";
 import type { Browser, CDPSession, ConsoleMessage, Page, Request, Response } from "playwright-core";
 import { connectToBrowser, type BrowserConnection } from "./connect.js";
-import { analyzeCpuProfile } from "./analyze-cpu-profile.js";
+import { analyzeCpuProfile, type CdpCpuProfile } from "./analyze-cpu-profile.js";
 import { analyzeTimelineTrace } from "./analyze-timeline-trace.js";
 import {
   DEFAULT_CPU_SAMPLING_INTERVAL_US,
@@ -21,6 +21,7 @@ import type {
   AccessibilityViolation,
   BrowserConnectOptions,
   ConsoleMessageEntry,
+  CpuProfileAnalysis,
   InspectOptions,
   NetworkRequestEntry,
   PageInspection,
@@ -34,6 +35,15 @@ const emptyVitals = (): PageVitals => ({
   cumulativeLayoutShift: 0,
 });
 
+// Used only when `Profiler.stop` fails (the recording is otherwise still useful):
+// the rest of the inspection — console, network, React, axe — shouldn't be lost
+// over a CPU-profile hiccup, so the CPU lens degrades to empty instead of throwing.
+const emptyCpuAnalysis = (): CpuProfileAnalysis => ({
+  durationMs: 0,
+  sampleCount: 0,
+  topFunctions: [],
+});
+
 // A Chrome DevTools trace event as it streams over CDP (loosely typed there as a
 // string map). The full record — every field — is written to the trace file; the
 // roll-up only reads `name`/`dur`, which it narrows itself.
@@ -322,6 +332,8 @@ export class BrowserSession {
       let result: unknown = null;
       let vitals = emptyVitals();
       let reactExport: ReactProfilerDataExport | null = null;
+      let traceEvents: TraceEventRecord[] = [];
+      let cpuProfile: CdpCpuProfile | null = null;
       try {
         if (expression) result = (await this.evaluate(expression)) ?? null;
         // The perf observe window doubles as the recording window: it runs after
@@ -329,10 +341,17 @@ export class BrowserSession {
         // renders land async), and CPU samples all land before we stop.
         vitals = await this.measureCurrentPerformance();
       } finally {
-        // Always stop the React profiler, even if the expression threw: the
-        // renderer profiles the persistent page, and `start()` no-ops while
-        // already profiling, so a left-running recording would skew later runs
-        // until the page reloads.
+        // Stop the recorders BEFORE reading the React profile, and always (even
+        // if the expression threw — a left-running recording on the persistent
+        // page would skew later runs). The React export serializes its data
+        // in-page (a large structured clone), so stopping the V8 CPU profiler
+        // and the timeline trace first keeps that serialization out of the
+        // user-facing profiles instead of dominating them as our own overhead.
+        if (stopTimelineTrace) {
+          traceEvents = await stopTimelineTrace().catch(() => []);
+          stopTimelineTrace = null;
+        }
+        cpuProfile = (await cdpSession.send("Profiler.stop").catch(() => null))?.profile ?? null;
         if (reactStarted) {
           reactExport = await this.page
             .evaluate(() => globalThis.__REACT_PERF__?.stop() ?? null)
@@ -340,10 +359,6 @@ export class BrowserSession {
         }
       }
 
-      const traceEvents = await stopTimelineTrace();
-      stopTimelineTrace = null;
-      const { profile } = await cdpSession.send("Profiler.stop");
-
       const writtenTracePath =
         tracePath && traceEvents.length > 0 ? await writeTraceFile(tracePath, traceEvents) : null;
 
@@ -362,7 +377,7 @@ export class BrowserSession {
         tracePath: writtenTracePath,
         profile: {
           react: reactExport ? analyzeReactProfile(reactExport) : null,
-          cpu: analyzeCpuProfile(profile),
+          cpu: cpuProfile ? analyzeCpuProfile(cpuProfile) : emptyCpuAnalysis(),
         },
       };
     } finally {
diff --git a/packages/browser/src/utils/find-available-port.ts b/packages/browser/src/utils/find-available-port.ts
new file mode 100644
index 000000000..3fafc6720
--- /dev/null
+++ b/packages/browser/src/utils/find-available-port.ts
@@ -0,0 +1,15 @@
+import { createServer, type AddressInfo } from "node:net";
+
+// Ask the OS for an unused loopback port (bind to port 0, read what it assigned).
+// There is an inherent race between releasing it here and Chrome claiming it, but
+// it is far less likely to collide than reusing a port we already know is taken.
+export const findAvailablePort = (): Promise<number> =>
+  new Promise<number>((resolve, reject) => {
+    const server = createServer();
+    server.once("error", reject);
+    server.once("listening", () => {
+      const { port } = server.address() as AddressInfo;
+      server.close(() => resolve(port));
+    });
+    server.listen(0, "127.0.0.1");
+  });
diff --git a/packages/browser/src/utils/is-port-available.ts b/packages/browser/src/utils/is-port-available.ts
new file mode 100644
index 000000000..46232a2f7
--- /dev/null
+++ b/packages/browser/src/utils/is-port-available.ts
@@ -0,0 +1,14 @@
+import { createServer } from "node:net";
+
+// True when a TCP server can bind the port on loopback. Used to detect that the
+// well-known debug port is already held (by another app or a stale Chrome)
+// before we launch our own instance onto it.
+export const isPortAvailable = (port: number): Promise<boolean> =>
+  new Promise<boolean>((resolve) => {
+    const server = createServer();
+    server.once("error", () => resolve(false));
+    server.once("listening", () => {
+      server.close(() => resolve(true));
+    });
+    server.listen(port, "127.0.0.1");
+  });
diff --git a/packages/browser/src/utils/read-launched-endpoint.ts b/packages/browser/src/utils/read-launched-endpoint.ts
new file mode 100644
index 000000000..32b3f567e
--- /dev/null
+++ b/packages/browser/src/utils/read-launched-endpoint.ts
@@ -0,0 +1,14 @@
+import { readFileSync } from "node:fs";
+import { LAUNCHED_CHROME_ENDPOINT_FILE } from "../constants.js";
+
+// The endpoint of the Chrome we last launched, so a later command reattaches to
+// that same instance (which may be on a non-default port) before trying the
+// well-known default. Best-effort: a missing file just means none was launched.
+export const readLaunchedEndpoint = (): string | null => {
+  try {
+    const endpoint = readFileSync(LAUNCHED_CHROME_ENDPOINT_FILE, "utf8").trim();
+    return endpoint.length > 0 ? endpoint : null;
+  } catch {
+    return null;
+  }
+};
diff --git a/packages/browser/src/utils/write-launched-endpoint.ts b/packages/browser/src/utils/write-launched-endpoint.ts
new file mode 100644
index 000000000..c50f2aeb9
--- /dev/null
+++ b/packages/browser/src/utils/write-launched-endpoint.ts
@@ -0,0 +1,13 @@
+import { mkdirSync, writeFileSync } from "node:fs";
+import { dirname } from "node:path";
+import { LAUNCHED_CHROME_ENDPOINT_FILE } from "../constants.js";
+
+// Remember where the Chrome we just launched is reachable so the next command
+// reattaches to it. Best-effort: an unwritable cache dir just means we fall back
+// to the default endpoint (and relaunch if needed) next time.
+export const writeLaunchedEndpoint = (endpoint: string): void => {
+  try {
+    mkdirSync(dirname(LAUNCHED_CHROME_ENDPOINT_FILE), { recursive: true });
+    writeFileSync(LAUNCHED_CHROME_ENDPOINT_FILE, endpoint);
+  } catch {}
+};
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 086b06200..22122194d 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -136,10 +136,12 @@ const printPerformanceReport = (report: PerformanceReport): void => {
   logger.log(`LCP: ${lcp === null ? "n/a" : `${lcp}ms`}   CLS: ${report.cumulativeLayoutShift}`);
   printTimelineAnalysis(report.timeline);
   if (report.longAnimationFrames.length === 0) {
-    logger.log("No long animation frames (>50ms) — no main-thread jank captured");
+    logger.log("No blocking long animation frames — no main-thread jank captured");
     return;
   }
-  logger.log(`${report.longAnimationFrames.length} long animation frame(s), worst first:`);
+  logger.log(
+    `${report.longAnimationFrames.length} blocking long animation frame(s), most blocking first:`,
+  );
   for (const frame of report.longAnimationFrames) {
     logger.log(
       `${frame.durationMs}ms frame (blocking ${frame.blockingDurationMs}ms) @ ${frame.startTimeMs}ms`,
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
index da3830a3c..49829b303 100644
--- a/skills/react-doctor/references/performance.md
+++ b/skills/react-doctor/references/performance.md
@@ -17,7 +17,7 @@ npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
 npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one. The performance section leads with the worst frame (duration plus input-blocking time), then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no long frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one (which lands on a free port automatically if 9222 is taken, and later commands reattach to it). The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
 
 It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
 

From cdcf38c57b6f6b631871b8e38b3dd972e2665ab9 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 19:23:56 -0700
Subject: [PATCH 16/38] fix(browser): render environment failures as actionable
 user errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A newcomer running a browser command on a fresh machine without Chrome
(or without the optional playwright-core) hit the crash path: "Something
went wrong — open this prefilled GitHub issue" plus a Sentry report, when
the real fix is "install Chrome" / "npm i -D playwright-core". The
environmental failures (no Chrome to launch, playwright-core missing, no
debuggable Chrome to attach to) were thrown as plain Errors, so the CLI
classified them as bugs.

Throw a typed BrowserEnvironmentError at those sites and teach
isExpectedUserError about it (mirroring CliInputError), so they render as
a plain, actionable message and stay out of crash reporting.
---
 .../browser/src/browser-environment-error.ts     | 16 ++++++++++++++++
 packages/browser/src/connect.ts                  | 12 +++++++-----
 packages/browser/src/index.ts                    |  2 ++
 packages/browser/src/launch.ts                   | 16 ++++++++++++----
 packages/browser/src/utils/load-playwright.ts    |  3 ++-
 .../src/cli/utils/is-expected-user-error.ts      |  9 ++++++++-
 .../tests/is-expected-user-error.test.ts         | 14 ++++++++++++++
 7 files changed, 61 insertions(+), 11 deletions(-)
 create mode 100644 packages/browser/src/browser-environment-error.ts

diff --git a/packages/browser/src/browser-environment-error.ts b/packages/browser/src/browser-environment-error.ts
new file mode 100644
index 000000000..e0cb3ae5b
--- /dev/null
+++ b/packages/browser/src/browser-environment-error.ts
@@ -0,0 +1,16 @@
+// A browser failure caused by the machine's environment, not a react-doctor bug:
+// no Google Chrome to launch, the optional `playwright-core` dependency not
+// installed, or no debuggable Chrome to attach to. The CLI renders these as a
+// plain, actionable message and keeps them out of crash reporting (Sentry + the
+// error-rate metric) — see the CLI's `isExpectedUserError`. The message is the
+// fix instruction; throw sites phrase it for the user.
+export class BrowserEnvironmentError extends Error {
+  override readonly name = "BrowserEnvironmentError";
+
+  constructor(message: string, options?: ErrorOptions) {
+    super(message, options);
+  }
+}
+
+export const isBrowserEnvironmentError = (error: unknown): error is BrowserEnvironmentError =>
+  error instanceof BrowserEnvironmentError;
diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
index d85a2f366..28179133f 100644
--- a/packages/browser/src/connect.ts
+++ b/packages/browser/src/connect.ts
@@ -1,4 +1,5 @@
 import type { Browser } from "playwright-core";
+import { BrowserEnvironmentError } from "./browser-environment-error.js";
 import { CONNECT_TIMEOUT_MS, DEFAULT_CDP_ENDPOINT } from "./constants.js";
 import { launchPersistentChrome } from "./launch.js";
 import type { BrowserConnectOptions } from "./types.js";
@@ -61,7 +62,7 @@ export const connectToBrowser = async (
   const fallbackEndpoint = options.cdpEndpoint ?? DEFAULT_CDP_ENDPOINT;
   // Only launch for a loopback endpoint — we can't spawn Chrome on a remote host.
   if (options.launch === false || !isLoopbackEndpoint(fallbackEndpoint)) {
-    throw new Error(
+    throw new BrowserEnvironmentError(
       `Could not attach to Chrome at ${fallbackEndpoint}. Start Chrome with --remote-debugging-port=${cdpPortFromEndpoint(fallbackEndpoint)}, or allow launching a local browser.`,
       { cause: lastAttachError },
     );
@@ -70,7 +71,7 @@ export const connectToBrowser = async (
   const launchEndpoint = options.cdpEndpoint
     ? options.cdpEndpoint
     : await resolveLaunchEndpoint(fallbackEndpoint);
-  const reachableEndpoint = await launchPersistentChrome(launchEndpoint);
+  const reachableEndpoint = await launchPersistentChrome(launchEndpoint, options.headless ?? true);
   writeLaunchedEndpoint(reachableEndpoint);
   try {
     return {
@@ -78,8 +79,9 @@ export const connectToBrowser = async (
       launched: true,
     };
   } catch (launchedAttachError) {
-    throw new Error(`Launched Chrome at ${reachableEndpoint} but could not attach to it.`, {
-      cause: launchedAttachError,
-    });
+    throw new BrowserEnvironmentError(
+      `Launched Chrome at ${reachableEndpoint} but could not attach to it. Update Chrome (or playwright-core), or start Chrome yourself with --remote-debugging-port and pass --cdp.`,
+      { cause: launchedAttachError },
+    );
   }
 };
diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts
index a19143c06..6a97fe3b5 100644
--- a/packages/browser/src/index.ts
+++ b/packages/browser/src/index.ts
@@ -1,6 +1,8 @@
 export { BrowserSession } from "./session.js";
+export { BrowserEnvironmentError, isBrowserEnvironmentError } from "./browser-environment-error.js";
 export { connectToBrowser } from "./connect.js";
 export type { BrowserConnection } from "./connect.js";
+export { closeLaunchedBrowser } from "./close-launched-browser.js";
 export { parseViewport } from "./parse-viewport.js";
 export { formatEvalValue } from "./utils/format-eval-value.js";
 export { DEFAULT_TRACE_FILENAME } from "./constants.js";
diff --git a/packages/browser/src/launch.ts b/packages/browser/src/launch.ts
index 693e91a70..39ac1ad17 100644
--- a/packages/browser/src/launch.ts
+++ b/packages/browser/src/launch.ts
@@ -1,5 +1,6 @@
 import { spawn } from "node:child_process";
 import { existsSync } from "node:fs";
+import { BrowserEnvironmentError } from "./browser-environment-error.js";
 import {
   LAUNCH_POLL_INTERVAL_MS,
   LAUNCH_READY_TIMEOUT_MS,
@@ -38,7 +39,7 @@ const resolveChromeExecutable = (): string => {
     (candidate): candidate is string => typeof candidate === "string" && existsSync(candidate),
   );
   if (!executable) {
-    throw new Error(
+    throw new BrowserEnvironmentError(
       "Could not find Google Chrome to launch. Install Chrome, set CHROME_PATH, or start Chrome with --remote-debugging-port and pass --cdp to attach to it.",
     );
   }
@@ -72,19 +73,26 @@ const waitForCdpEndpoint = async (endpoint: string): Promise<string> => {
     }
     await delay(LAUNCH_POLL_INTERVAL_MS);
   }
-  throw new Error(`Launched Chrome but it never exposed its debugger at ${endpoint}.`);
+  throw new BrowserEnvironmentError(
+    `Launched Chrome but it never exposed its debugger at ${endpoint}. Start Chrome yourself with --remote-debugging-port and pass --cdp, or set CHROME_PATH to a working Chrome.`,
+  );
 };
 
 // Detached and unref'd on success so the browser outlives this process and the
 // next `browser` command reattaches over CDP — the persistent model Chrome
-// DevTools MCP uses to keep state across calls.
-export const launchPersistentChrome = async (endpoint: string): Promise<string> => {
+// DevTools MCP uses to keep state across calls. Headless by default (an agent
+// rarely needs the window); `headless: false` shows it for a human to watch.
+export const launchPersistentChrome = async (
+  endpoint: string,
+  headless: boolean,
+): Promise<string> => {
   const executable = resolveChromeExecutable();
   const args = [
     `--remote-debugging-port=${cdpPortFromEndpoint(endpoint)}`,
     `--user-data-dir=${LAUNCHED_CHROME_PROFILE_DIRECTORY}`,
     "--no-first-run",
     "--no-default-browser-check",
+    ...(headless ? ["--headless=new"] : []),
   ];
 
   const child = spawn(executable, args, { detached: true, stdio: "ignore" });
diff --git a/packages/browser/src/utils/load-playwright.ts b/packages/browser/src/utils/load-playwright.ts
index 7a67dfce2..ef52f96e3 100644
--- a/packages/browser/src/utils/load-playwright.ts
+++ b/packages/browser/src/utils/load-playwright.ts
@@ -1,4 +1,5 @@
 import type * as PlaywrightCore from "playwright-core";
+import { BrowserEnvironmentError } from "../browser-environment-error.js";
 
 const isModuleNotFoundError = (error: unknown): boolean =>
   error instanceof Error &&
@@ -15,7 +16,7 @@ export const loadPlaywright = async (): Promise<typeof PlaywrightCore> => {
     return await import("playwright-core");
   } catch (error: unknown) {
     if (!isModuleNotFoundError(error)) throw error;
-    throw new Error(
+    throw new BrowserEnvironmentError(
       "The browser tools need playwright-core, which isn't installed. Install it with `npm i -D playwright-core`, then retry.",
     );
   }
diff --git a/packages/react-doctor/src/cli/utils/is-expected-user-error.ts b/packages/react-doctor/src/cli/utils/is-expected-user-error.ts
index b5671610e..e6202eed8 100644
--- a/packages/react-doctor/src/cli/utils/is-expected-user-error.ts
+++ b/packages/react-doctor/src/cli/utils/is-expected-user-error.ts
@@ -1,3 +1,4 @@
+import { isBrowserEnvironmentError } from "@react-doctor/browser";
 import { isProjectDiscoveryError, isReactDoctorError } from "@react-doctor/core";
 import { CliInputError } from "./cli-input-error.js";
 
@@ -21,13 +22,19 @@ import { CliInputError } from "./cli-input-error.js";
  *   `--project` name.
  * - **Bad `--diff` input** (`GitBaseBranchInvalid` / `GitBaseBranchMissing`)
  *   stays the tagged `ReactDoctorError`, so dispatch on the reason `_tag`.
+ * - **Browser environment failures** (`BrowserEnvironmentError`): no Chrome to
+ *   launch, `playwright-core` not installed, or no debuggable Chrome to attach
+ *   to. The message is the fix ("install Chrome", "npm i -D playwright-core"),
+ *   so a newcomer running a `browser` command on a fresh machine gets that —
+ *   not a "this is a bug, file an issue" crash report.
  *
- * This composes the existing core narrowers rather than introducing a new
+ * This composes the existing narrowers rather than introducing a new
  * error-shape helper (AGENTS.md): it encodes CLI-layer reporting policy, not
  * knowledge of the `ReactDoctorError` shape.
  */
 export const isExpectedUserError = (error: unknown): boolean =>
   error instanceof CliInputError ||
+  isBrowserEnvironmentError(error) ||
   isProjectDiscoveryError(error) ||
   (isReactDoctorError(error) &&
     (error.reason._tag === "GitBaseBranchInvalid" || error.reason._tag === "GitBaseBranchMissing"));
diff --git a/packages/react-doctor/tests/is-expected-user-error.test.ts b/packages/react-doctor/tests/is-expected-user-error.test.ts
index 3bb6f0f92..b73d9410e 100644
--- a/packages/react-doctor/tests/is-expected-user-error.test.ts
+++ b/packages/react-doctor/tests/is-expected-user-error.test.ts
@@ -10,6 +10,7 @@ import {
   ProjectNotFoundError,
   ReactDoctorError,
 } from "@react-doctor/core";
+import { BrowserEnvironmentError } from "@react-doctor/browser";
 import { CliInputError } from "../src/cli/utils/cli-input-error.js";
 import { isExpectedUserError } from "../src/cli/utils/is-expected-user-error.js";
 
@@ -58,6 +59,19 @@ describe("isExpectedUserError", () => {
     ).toBe(true);
   });
 
+  it("classifies browser environment failures as expected user errors (a fresh machine missing Chrome / playwright-core gets the fix, not a crash report)", () => {
+    expect(
+      isExpectedUserError(
+        new BrowserEnvironmentError(
+          "The browser tools need playwright-core, which isn't installed.",
+        ),
+      ),
+    ).toBe(true);
+    expect(
+      isExpectedUserError(new BrowserEnvironmentError("Could not find Google Chrome to launch.")),
+    ).toBe(true);
+  });
+
   it("does not mask genuine bugs (those stay reportable)", () => {
     expect(isExpectedUserError(new Error("boom"))).toBe(false);
     expect(isExpectedUserError(undefined)).toBe(false);

From 4d2de1782ec45b43cabc9ad39e2d0fa9af7eb288 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 19:31:26 -0700
Subject: [PATCH 17/38] fix(browser): add the missing close-launched-browser
 source files

cdcf38c5 exported closeLaunchedBrowser from index.ts but left its
implementation (close-launched-browser.ts + its clear-launched-endpoint
helper) untracked, so the build couldn't resolve the import. Commit the
two files to restore a buildable tree.
---
 .../browser/src/close-launched-browser.ts     | 24 +++++++++++++++++++
 .../src/utils/clear-launched-endpoint.ts      | 10 ++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 packages/browser/src/close-launched-browser.ts
 create mode 100644 packages/browser/src/utils/clear-launched-endpoint.ts

diff --git a/packages/browser/src/close-launched-browser.ts b/packages/browser/src/close-launched-browser.ts
new file mode 100644
index 000000000..9f08aa63f
--- /dev/null
+++ b/packages/browser/src/close-launched-browser.ts
@@ -0,0 +1,24 @@
+import { CONNECT_TIMEOUT_MS } from "./constants.js";
+import { clearLaunchedEndpoint } from "./utils/clear-launched-endpoint.js";
+import { loadPlaywright } from "./utils/load-playwright.js";
+import { readLaunchedEndpoint } from "./utils/read-launched-endpoint.js";
+
+// Terminate the persistent Chrome we launched. `dispose()` only disconnects (the
+// persistent model keeps the page alive across commands), so this is the one path
+// that actually stops it — the cleanup a headless instance needs since there's no
+// window to quit. It targets ONLY our recorded endpoint, never a browser the user
+// started, so it can't kill their Chrome. Returns whether it closed anything; the
+// recorded endpoint is forgotten either way (a stale one shouldn't linger).
+export const closeLaunchedBrowser = async (): Promise<boolean> => {
+  const endpoint = readLaunchedEndpoint();
+  if (!endpoint) return false;
+  const { chromium } = await loadPlaywright();
+  const browser = await chromium
+    .connectOverCDP(endpoint, { timeout: CONNECT_TIMEOUT_MS })
+    .catch(() => null);
+  clearLaunchedEndpoint();
+  if (!browser) return false;
+  const cdpSession = await browser.newBrowserCDPSession();
+  await cdpSession.send("Browser.close").catch(() => {});
+  return true;
+};
diff --git a/packages/browser/src/utils/clear-launched-endpoint.ts b/packages/browser/src/utils/clear-launched-endpoint.ts
new file mode 100644
index 000000000..80eea3c10
--- /dev/null
+++ b/packages/browser/src/utils/clear-launched-endpoint.ts
@@ -0,0 +1,10 @@
+import { rmSync } from "node:fs";
+import { LAUNCHED_CHROME_ENDPOINT_FILE } from "../constants.js";
+
+// Forget the persisted launched-Chrome endpoint so the next command stops trying
+// to reattach to it (called after we close that instance, or when it's stale).
+export const clearLaunchedEndpoint = (): void => {
+  try {
+    rmSync(LAUNCHED_CHROME_ENDPOINT_FILE, { force: true });
+  } catch {}
+};

From 7cc684f108909009799433e4e4222ac9775a6605 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 19:38:42 -0700
Subject: [PATCH 18/38] feat(browser): memory snapshot + headless launch/close
 + richer network
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wires up the eval --profile capture to answer "can it analyze network and
memory too", and makes the launched Chrome work on a fresh/headless machine:

- Memory: capture the CDP Performance counters after the action (JS heap
  used/total, DOM nodes, listeners, documents/frames) as a # Memory
  section. Re-run on the same page and watch them climb for a leak signal.
- Network: fold each request's resource timing and encoded transfer size
  onto the entries, and flag slow (>500ms) / heavy (>1MB) requests in the
  summary — not just failures.
- Launch: the dedicated Chrome is headless by default (an agent rarely
  needs the window) so it works on a box with no display; --headed (CLI) /
  headed (MCP) shows it, and `browser close` / browser_close stops the
  headless instance the persistent model would otherwise leave running.

Also ignores the local react-doctor-trace.json / -screenshot.png artifacts.
---
 .../skills/react-doctor/references/debug.md   |  2 +-
 .../react-doctor/references/performance.md    |  6 +-
 .gitignore                                    |  5 ++
 packages/browser/src/session.ts               | 60 ++++++++++++++++++-
 packages/browser/src/types.ts                 | 24 ++++++++
 packages/mcp/src/tools/browser.ts             | 35 ++++++++++-
 packages/mcp/src/utils/with-session.ts        |  2 +
 .../react-doctor/src/cli/commands/browser.ts  | 59 ++++++++++++++++--
 packages/react-doctor/src/cli/index.ts        | 14 ++++-
 .../react-doctor/src/cli/utils/constants.ts   |  7 +++
 .../src/cli/utils/strip-unknown-cli-flags.ts  |  4 +-
 .../tests/strip-unknown-cli-flags.test.ts     |  9 +++
 skills/react-doctor/references/debug.md       |  2 +-
 skills/react-doctor/references/performance.md |  6 +-
 14 files changed, 220 insertions(+), 15 deletions(-)

diff --git a/.agents/skills/react-doctor/references/debug.md b/.agents/skills/react-doctor/references/debug.md
index 26cefe4ef..b95d30e79 100644
--- a/.agents/skills/react-doctor/references/debug.md
+++ b/.agents/skills/react-doctor/references/debug.md
@@ -51,7 +51,7 @@ Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup la
 
 Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
 
-- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile, headless — pass `--headed` to watch it, and `browser close` to stop it) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, memory (heap, DOM nodes, listeners), accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
 
 ```bash
 npx react-doctor browser open http://localhost:3000           # attach + open the page
diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
index 49829b303..a2c2b5bd9 100644
--- a/.agents/skills/react-doctor/references/performance.md
+++ b/.agents/skills/react-doctor/references/performance.md
@@ -17,10 +17,14 @@ npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
 npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one (which lands on a free port automatically if 9222 is taken, and later commands reattach to it). The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one — launched headless (pass `--headed` to watch the window), landing on a free port automatically if 9222 is taken, with later commands reattaching to it and `browser close` stopping it when you're done. The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
 
 It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
 
+The `# Memory` section snapshots the page's runtime footprint after the action — JS heap used/total, DOM node count, event listeners, and document/frame counts (the CDP Performance counters). For a leak, re-run on the same page with no reload (`browser eval --profile`) and watch these climb: growing DOM nodes mean detached subtrees retained, growing listeners/heap mean leaked closures, growing documents/frames mean orphaned iframes.
+
+The `# Network` section lists each request with its outcome (status or failure), and — once it has settled — its time and encoded transfer size, with a summary counting failed, slow (>500ms), and heavy (>1MB) requests. Use it to spot a blocking waterfall or an oversized bundle/asset; a cache hit or an unfinished request shows no size/time.
+
 To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
 
 ## 3. Analyze the worst frame first
diff --git a/.gitignore b/.gitignore
index db97f6942..25e55dce9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -46,3 +46,8 @@ review-*.md
 /scripts/print-batch-input.mjs
 /scripts/rule-prompts/
 /rules.json
+
+# Local-only artifacts written by `react-doctor browser` (timeline trace,
+# screenshot) when run from the repo root during development.
+/react-doctor-trace.json
+/react-doctor-screenshot.png
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 1b539ec51..8c8ff4ecd 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -23,6 +23,7 @@ import type {
   ConsoleMessageEntry,
   CpuProfileAnalysis,
   InspectOptions,
+  MemoryStats,
   NetworkRequestEntry,
   PageInspection,
   PageVitals,
@@ -44,6 +45,17 @@ const emptyCpuAnalysis = (): CpuProfileAnalysis => ({
   topFunctions: [],
 });
 
+// When the CDP Performance domain is unavailable (older Chrome, a failed
+// enable): a zeroed snapshot degrades the memory lens without losing the rest.
+const emptyMemory = (): MemoryStats => ({
+  jsHeapUsedBytes: 0,
+  jsHeapTotalBytes: 0,
+  domNodes: 0,
+  jsEventListeners: 0,
+  documents: 0,
+  frames: 0,
+});
+
 // A Chrome DevTools trace event as it streams over CDP (loosely typed there as a
 // string map). The full record — every field — is written to the trace file; the
 // roll-up only reads `name`/`dur`, which it narrows itself.
@@ -230,6 +242,8 @@ export class BrowserSession {
         resourceType: request.resourceType(),
         status: null,
         failure: null,
+        durationMs: null,
+        encodedBytes: null,
       });
     };
     const onResponse = (response: Response): void => {
@@ -250,6 +264,25 @@ export class BrowserSession {
     };
   }
 
+  // Fold per-request timing and transfer size onto the listener-collected
+  // entries: `timing()` is the page's own resource timing (sync), `sizes()`
+  // resolves once the response is received, so this runs after the recording
+  // window when the requests have settled. Best-effort per request — a still
+  // in-flight one keeps its null duration/size rather than failing the inspect.
+  private async finalizeNetwork(
+    entriesByRequest: Map<Request, NetworkRequestEntry>,
+  ): Promise<NetworkRequestEntry[]> {
+    await Promise.all(
+      [...entriesByRequest].map(async ([request, entry]) => {
+        const responseEndMs = request.timing().responseEnd;
+        entry.durationMs = responseEndMs > 0 ? Math.round(responseEndMs) : null;
+        const sizes = await request.sizes().catch(() => null);
+        entry.encodedBytes = sizes ? sizes.responseBodySize : null;
+      }),
+    );
+    return [...entriesByRequest.values()];
+  }
+
   // A per-page watermark inside collectPerformanceReport keeps a repeated
   // no-reload measurement from re-counting frames an earlier command already
   // reported on the same persistent page.
@@ -257,6 +290,24 @@ export class BrowserSession {
     return this.page.evaluate(collectPerformanceReport, PERFORMANCE_OBSERVE_WINDOW_MS);
   }
 
+  // The page's current runtime footprint from the CDP Performance domain (heap,
+  // DOM nodes, listeners, documents/frames) — the counters DevTools' Performance
+  // monitor shows. A snapshot, not a window, so it reflects the post-action state.
+  private async captureMemory(cdpSession: CDPSession): Promise<MemoryStats> {
+    const result = await cdpSession.send("Performance.getMetrics").catch(() => null);
+    if (!result) return emptyMemory();
+    const valueByName = new Map(result.metrics.map((metric) => [metric.name, metric.value]));
+    const read = (name: string): number => Math.round(valueByName.get(name) ?? 0);
+    return {
+      jsHeapUsedBytes: read("JSHeapUsedSize"),
+      jsHeapTotalBytes: read("JSHeapTotalSize"),
+      domNodes: read("Nodes"),
+      jsEventListeners: read("JSEventListeners"),
+      documents: read("Documents"),
+      frames: read("Frames"),
+    };
+  }
+
   // Begin a best-effort DevTools timeline trace on the CDP session, returning a
   // `stop()` that resolves the collected events (empty if tracing never started),
   // so the caller can bracket exactly the recording window. Runs alongside the
@@ -316,6 +367,7 @@ export class BrowserSession {
     try {
       detachers.push(this.collectConsole(consoleEntries), this.collectNetwork(networkByRequest));
       await this.settle();
+      await cdpSession.send("Performance.enable").catch(() => {});
       await cdpSession.send("Profiler.enable");
       await cdpSession.send("Profiler.setSamplingInterval", {
         interval: DEFAULT_CPU_SAMPLING_INTERVAL_US,
@@ -362,17 +414,23 @@ export class BrowserSession {
       const writtenTracePath =
         tracePath && traceEvents.length > 0 ? await writeTraceFile(tracePath, traceEvents) : null;
 
+      // Read memory before axe runs so the snapshot reflects the app's footprint,
+      // not axe's injected globals.
+      const memory = await this.captureMemory(cdpSession);
+
       // Detach the page listeners before the accessibility audit so axe's injected
       // evaluate (and anything it logs) can't land in the captured signals.
       for (const detach of detachers) detach();
       detachers.length = 0;
+      const network = await this.finalizeNetwork(networkByRequest);
       const accessibility = await this.runAxe();
 
       return {
         result,
         console: consoleEntries,
-        network: [...networkByRequest.values()],
+        network,
         performance: { ...vitals, timeline: analyzeTimelineTrace(traceEvents) },
+        memory,
         accessibility,
         tracePath: writtenTracePath,
         profile: {
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
index 9961cc7b5..f3c2ce9e0 100644
--- a/packages/browser/src/types.ts
+++ b/packages/browser/src/types.ts
@@ -3,6 +3,9 @@ export interface BrowserConnectOptions {
   // a local endpoint launches our own persistent Chrome instead.
   cdpEndpoint?: string;
   launch?: boolean;
+  // Only applies to the Chrome we launch ourselves (not a browser we attach to):
+  // launch it headless unless explicitly false. Defaults to headless.
+  headless?: boolean;
 }
 
 export interface Viewport {
@@ -30,6 +33,12 @@ export interface NetworkRequestEntry {
   resourceType: string;
   status: number | null;
   failure: string | null;
+  // Wall time from request start to response end in ms (Playwright resource
+  // timing), or null if the request never finished within the recording window.
+  durationMs: number | null;
+  // Encoded response body size in bytes, or null when unknown (still pending, or
+  // served from cache with no transfer). The "heavy request" signal.
+  encodedBytes: number | null;
 }
 
 export interface PerformanceScriptAttribution {
@@ -73,6 +82,20 @@ export interface PerformanceReport {
   timeline: TimelineAnalysis;
 }
 
+// A point-in-time snapshot of the page's runtime footprint, read from the CDP
+// Performance domain after the driven action. Growth across repeated `inspect`
+// runs on the same persistent page is the leak signal: detached DOM keeps
+// `domNodes` climbing, leaked closures keep `jsEventListeners`/`jsHeapUsedBytes`
+// climbing, and orphaned iframes keep `documents`/`frames` climbing.
+export interface MemoryStats {
+  jsHeapUsedBytes: number;
+  jsHeapTotalBytes: number;
+  domNodes: number;
+  jsEventListeners: number;
+  documents: number;
+  frames: number;
+}
+
 // Everything `inspect` observes that the page itself reports (LoAF / LCP / CLS),
 // before the trace-derived `timeline` is folded in to form the PerformanceReport.
 export type PageVitals = Omit<PerformanceReport, "timeline">;
@@ -93,6 +116,7 @@ export interface PageInspection {
   console: ConsoleMessageEntry[];
   network: NetworkRequestEntry[];
   performance: PerformanceReport;
+  memory: MemoryStats;
   accessibility: AccessibilityViolation[];
   // Absolute path the raw timeline trace was written to, or null when none was.
   tracePath: string | null;
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index 92bc3c20f..ba0c4a5ca 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -1,6 +1,11 @@
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import { DEFAULT_TRACE_FILENAME, formatEvalValue, parseViewport } from "@react-doctor/browser";
+import {
+  closeLaunchedBrowser,
+  DEFAULT_TRACE_FILENAME,
+  formatEvalValue,
+  parseViewport,
+} from "@react-doctor/browser";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
@@ -14,6 +19,10 @@ const connectionShape = {
     .boolean()
     .optional()
     .describe("Fail instead of launching Chrome when no attach target exists"),
+  headed: z
+    .boolean()
+    .optional()
+    .describe("Show the launched browser window (the launched Chrome is headless by default)"),
 };
 
 const viewportShape = {
@@ -26,12 +35,14 @@ const viewportShape = {
 interface ConnectionArgs {
   cdp?: string;
   noLaunch?: boolean;
+  headed?: boolean;
   viewport?: string;
 }
 
 const toConnection = (args: ConnectionArgs): BrowserToolConnection => ({
   cdp: args.cdp,
   noLaunch: args.noLaunch,
+  headed: args.headed,
   viewport: args.viewport ? parseViewport(args.viewport) : undefined,
 });
 
@@ -71,7 +82,7 @@ export const registerBrowserTools = (server: McpServer): void => {
           .boolean()
           .optional()
           .describe(
-            "Set true to record and return the full runtime picture while the expression runs — console, network, performance (LoAF jank/LCP/CLS plus a `timeline` roll-up of forced style-recalc/layout/hit-test/paint cost from a DevTools trace), accessibility, the React render profile (slow commits, hot components, unnecessary re-renders), and a V8 CPU profile. Also writes the raw timeline trace to `out` (loadable in DevTools) and returns its path as `tracePath`. Omit for just the expression's return value.",
+            "Set true to record and return the full runtime picture while the expression runs — console, network (failures, plus each request's time and transfer size, with slow/heavy ones flagged), performance (LoAF jank/LCP/CLS plus a `timeline` roll-up of forced style-recalc/layout/hit-test/paint cost from a DevTools trace), memory (JS heap, DOM nodes, listeners, documents/frames — watch these climb across runs for leaks), accessibility, the React render profile (slow commits, hot components, unnecessary re-renders), and a V8 CPU profile. Also writes the raw timeline trace to `out` (loadable in DevTools) and returns its path as `tracePath`. Omit for just the expression's return value.",
           ),
         out: z
           .string()
@@ -139,4 +150,24 @@ export const registerBrowserTools = (server: McpServer): void => {
         };
       }),
   );
+
+  server.registerTool(
+    "browser_close",
+    {
+      title: "Close the launched browser",
+      description:
+        "Stop the dedicated Chrome React Doctor launched as a fallback (the persistent instance reused across calls). Never touches a browser you started yourself. Use it to free that headless instance when done.",
+      inputSchema: {},
+      annotations: { openWorldHint: true },
+    },
+    () =>
+      runTool(async () => {
+        const closed = await closeLaunchedBrowser();
+        return textResult(
+          closed
+            ? "Closed the launched browser."
+            : "No launched browser to close (it only stops the one React Doctor launched).",
+        );
+      }),
+  );
 };
diff --git a/packages/mcp/src/utils/with-session.ts b/packages/mcp/src/utils/with-session.ts
index 2ca81615a..0a0858a23 100644
--- a/packages/mcp/src/utils/with-session.ts
+++ b/packages/mcp/src/utils/with-session.ts
@@ -3,6 +3,7 @@ import { BrowserSession, type Viewport } from "@react-doctor/browser";
 export interface BrowserToolConnection {
   cdp?: string;
   noLaunch?: boolean;
+  headed?: boolean;
   viewport?: Viewport;
 }
 
@@ -18,6 +19,7 @@ export const withSession = async <ResultType>(
   const session = await BrowserSession.attach({
     cdpEndpoint: connection.cdp,
     launch: connection.noLaunch === true ? false : undefined,
+    headless: connection.headed ? false : undefined,
   });
   try {
     if (connection.viewport) await session.setViewport(connection.viewport);
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 22122194d..a9704d869 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -1,10 +1,12 @@
 import {
   BrowserSession,
+  closeLaunchedBrowser,
   DEFAULT_TRACE_FILENAME,
   formatEvalValue,
   type AccessibilityViolation,
   type ConsoleMessageEntry,
   type CpuProfileAnalysis,
+  type MemoryStats,
   type NetworkRequestEntry,
   type PageInspection,
   type PerformanceReport,
@@ -13,13 +15,19 @@ import {
   type TimelinePhaseStat,
   type Viewport,
 } from "@react-doctor/browser";
-import { DEFAULT_SCREENSHOT_FILENAME, METRIC } from "../utils/constants.js";
+import {
+  DEFAULT_SCREENSHOT_FILENAME,
+  HEAVY_REQUEST_BYTES,
+  METRIC,
+  SLOW_REQUEST_MS,
+} from "../utils/constants.js";
 import { cliLogger as logger } from "../utils/cli-logger.js";
 import { recordCount } from "../utils/record-metric.js";
 
 export interface BrowserCommandOptions {
   cdp?: string;
   launch?: boolean;
+  headed?: boolean;
   out?: string;
   viewport?: Viewport;
   profile?: boolean;
@@ -32,7 +40,11 @@ const withSession = async (
   options: BrowserCommandOptions,
   useSession: (session: BrowserSession) => Promise<void>,
 ): Promise<void> => {
-  const session = await BrowserSession.attach({ cdpEndpoint: options.cdp, launch: options.launch });
+  const session = await BrowserSession.attach({
+    cdpEndpoint: options.cdp,
+    launch: options.launch,
+    headless: options.headed ? false : undefined,
+  });
   try {
     if (options.viewport) await session.setViewport(options.viewport);
     await useSession(session);
@@ -54,12 +66,19 @@ export const browserOpenAction = async (
     );
     if (session.launched) {
       logger.log(
-        "Launched a dedicated Chrome (separate from your main profile); later browser commands reuse it. Quit that window when you're done.",
+        "Launched a dedicated headless Chrome (separate from your main profile); later browser commands reuse it. Run `react-doctor browser close` when done, or pass --headed to see the window.",
       );
     }
   });
 };
 
+export const browserCloseAction = async (): Promise<void> => {
+  recordCount(METRIC.cliInvoked, 1, { command: "browser.close" });
+  const closed = await closeLaunchedBrowser();
+  if (closed) logger.success("Closed the launched browser.");
+  else logger.log("No launched browser to close (it only stops the one React Doctor launched).");
+};
+
 export const browserEvalAction = async (
   expression: string | undefined,
   options: BrowserCommandOptions,
@@ -120,15 +139,42 @@ const printConsoleMessages = (messages: ConsoleMessageEntry[]): void => {
   }
 };
 
+// ` (123ms, 45.6kB)` from whichever of duration/size is known, or "" when
+// neither is — a cache hit or a request that never finished in the window.
+const formatRequestCost = (request: NetworkRequestEntry): string => {
+  const parts: string[] = [];
+  if (request.durationMs !== null) parts.push(`${request.durationMs}ms`);
+  if (request.encodedBytes !== null) parts.push(`${(request.encodedBytes / 1024).toFixed(1)}kB`);
+  return parts.length > 0 ? ` (${parts.join(", ")})` : "";
+};
+
 const printNetworkRequests = (requests: NetworkRequestEntry[]): void => {
   const failures = requests.filter(
     (request) => request.failure !== null || (request.status !== null && request.status >= 400),
   );
+  const slow = requests.filter(
+    (request) => request.durationMs !== null && request.durationMs >= SLOW_REQUEST_MS,
+  );
+  const heavy = requests.filter(
+    (request) => request.encodedBytes !== null && request.encodedBytes >= HEAVY_REQUEST_BYTES,
+  );
   for (const request of requests) {
     const outcome = request.failure ?? (request.status === null ? "pending" : request.status);
-    logger.log(`${outcome} ${request.method} ${request.url}`);
+    logger.log(`${outcome} ${request.method} ${request.url}${formatRequestCost(request)}`);
   }
-  logger.log(`${requests.length} request(s), ${failures.length} failed`);
+  const heavyMb = HEAVY_REQUEST_BYTES / 1024 / 1024;
+  logger.log(
+    `${requests.length} request(s), ${failures.length} failed, ${slow.length} slow (>${SLOW_REQUEST_MS}ms), ${heavy.length} heavy (>${heavyMb}MB)`,
+  );
+};
+
+const printMemoryStats = (memory: MemoryStats): void => {
+  const heapMb = (memory.jsHeapUsedBytes / 1024 / 1024).toFixed(1);
+  const totalMb = (memory.jsHeapTotalBytes / 1024 / 1024).toFixed(1);
+  logger.log(`JS heap: ${heapMb}MB used / ${totalMb}MB total`);
+  logger.log(
+    `${memory.domNodes} DOM nodes, ${memory.jsEventListeners} listeners, ${memory.documents} document(s), ${memory.frames} frame(s)`,
+  );
 };
 
 const printPerformanceReport = (report: PerformanceReport): void => {
@@ -231,6 +277,9 @@ const printInspection = (inspection: PageInspection): void => {
   logger.log("\n# Performance");
   printPerformanceReport(inspection.performance);
 
+  logger.log("\n# Memory");
+  printMemoryStats(inspection.memory);
+
   logger.log("\n# Accessibility");
   if (inspection.accessibility.length === 0) logger.log("(none)");
   else printAuditViolations(inspection.accessibility);
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index 3a5e2a01f..ed9058813 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -2,6 +2,7 @@ import { Command, Option } from "commander";
 import { CANONICAL_GITHUB_URL, highlighter } from "@react-doctor/core";
 import { flushSentry, initializeSentry } from "../instrument.js";
 import {
+  browserCloseAction,
   browserEvalAction,
   browserOpenAction,
   browserScreenshotAction,
@@ -239,7 +240,11 @@ const browser = program
 const withConnectionOptions = (command: Command): Command =>
   command
     .option("--cdp <endpoint>", "CDP endpoint to attach to (default http://127.0.0.1:9222)")
-    .option("--no-launch", "fail instead of launching Chrome when no attach target exists");
+    .option("--no-launch", "fail instead of launching Chrome when no attach target exists")
+    .option(
+      "--headed",
+      "show the launched browser window (the launched Chrome is headless by default)",
+    );
 
 // Commands that render or measure the page also accept a one-shot emulated
 // viewport (e.g. a phone). It's applied via a CDP override that clears when the
@@ -290,6 +295,13 @@ withRenderOptions(
     .option("--out <path>", "output file path (default react-doctor-screenshot.png)"),
 ).action(browserScreenshotAction);
 
+browser
+  .command("close")
+  .description(
+    "Stop the dedicated Chrome React Doctor launched (the persistent fallback); never touches a browser you started",
+  )
+  .action(browserCloseAction);
+
 const debug = program
   .command("debug")
   .description("Runtime debugging tools for the debug job (NDJSON logging server)");
diff --git a/packages/react-doctor/src/cli/utils/constants.ts b/packages/react-doctor/src/cli/utils/constants.ts
index d76319f29..56500adaa 100644
--- a/packages/react-doctor/src/cli/utils/constants.ts
+++ b/packages/react-doctor/src/cli/utils/constants.ts
@@ -30,6 +30,13 @@ export const GIT_HOOK_EXECUTABLE_MODE = 0o755;
 // Default output path for `browser screenshot` when `--out` is omitted.
 export const DEFAULT_SCREENSHOT_FILENAME = "react-doctor-screenshot.png";
 
+// Thresholds for flagging requests in the `eval --profile` network section: a
+// request slower than this, or with a larger encoded body, is called out.
+// Bytes are a binary MiB so the printed `kB`/`MB` (both /1024) read as whole
+// units.
+export const SLOW_REQUEST_MS = 500;
+export const HEAVY_REQUEST_BYTES = 1_048_576;
+
 export const AGENT_HOOK_TIMEOUT_SECONDS = 120;
 
 // Hard cap on the `gh repo view` default-branch probe. A healthy gh answers
diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
index 5f2bee229..4b0bf2ef1 100644
--- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
+++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
@@ -100,12 +100,12 @@ const WHY_FLAG_SPEC: CliFlagSpec = {
 };
 
 // Union of every flag across the `browser` subcommands (open / eval / snapshot /
-// screenshot). The sub-subcommand name and any URL / expression positional pass
+// screenshot / close). The sub-subcommand name and any URL / expression positional pass
 // through untouched; only these options need to survive the pre-parse strip so
 // Commander can route them — without this, e.g. `--cdp <endpoint>` is dropped and
 // its value leaks in as a stray positional.
 const BROWSER_FLAG_SPEC: CliFlagSpec = {
-  longOptionsWithoutValues: new Set(["--help", "--no-launch", "--profile"]),
+  longOptionsWithoutValues: new Set(["--headed", "--help", "--no-launch", "--profile"]),
   longOptionsWithRequiredValues: new Set(["--cdp", "--out", "--viewport"]),
   longOptionsWithOptionalValues: new Set(),
   shortOptionsWithoutValues: new Set(["-h"]),
diff --git a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
index 9c82269d9..f4c2aa765 100644
--- a/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
+++ b/packages/react-doctor/tests/strip-unknown-cli-flags.test.ts
@@ -163,6 +163,15 @@ describe("stripUnknownCliFlags", () => {
         "--offline",
       ]),
     ).toEqual(["browser", "eval", 'page.getByText("Next").click()', "--profile"]);
+    // `--headed` is a boolean (no value), so it survives without swallowing the
+    // expression positional after it.
+    expect(stripUserArguments(["browser", "eval", "--profile", "--headed", "--offline"])).toEqual([
+      "browser",
+      "eval",
+      "--profile",
+      "--headed",
+    ]);
+    expect(stripUserArguments(["browser", "close"])).toEqual(["browser", "close"]);
   });
 
   it("keeps debug serve flags and consumes their values (no value leaks as a positional)", () => {
diff --git a/skills/react-doctor/references/debug.md b/skills/react-doctor/references/debug.md
index 26cefe4ef..b95d30e79 100644
--- a/skills/react-doctor/references/debug.md
+++ b/skills/react-doctor/references/debug.md
@@ -51,7 +51,7 @@ Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup la
 
 Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
 
-- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile, headless — pass `--headed` to watch it, and `browser close` to stop it) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, memory (heap, DOM nodes, listeners), accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
 
 ```bash
 npx react-doctor browser open http://localhost:3000           # attach + open the page
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
index 49829b303..a2c2b5bd9 100644
--- a/skills/react-doctor/references/performance.md
+++ b/skills/react-doctor/references/performance.md
@@ -17,10 +17,14 @@ npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
 npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one (which lands on a free port automatically if 9222 is taken, and later commands reattach to it). The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one — launched headless (pass `--headed` to watch the window), landing on a free port automatically if 9222 is taken, with later commands reattaching to it and `browser close` stopping it when you're done. The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
 
 It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
 
+The `# Memory` section snapshots the page's runtime footprint after the action — JS heap used/total, DOM node count, event listeners, and document/frame counts (the CDP Performance counters). For a leak, re-run on the same page with no reload (`browser eval --profile`) and watch these climb: growing DOM nodes mean detached subtrees retained, growing listeners/heap mean leaked closures, growing documents/frames mean orphaned iframes.
+
+The `# Network` section lists each request with its outcome (status or failure), and — once it has settled — its time and encoded transfer size, with a summary counting failed, slow (>500ms), and heavy (>1MB) requests. Use it to spot a blocking waterfall or an oversized bundle/asset; a cache hit or an unfinished request shows no size/time.
+
 To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
 
 ## 3. Analyze the worst frame first

From e5dfeb88a28a12044a346efc356bc703ef9ac21b Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 21:21:05 -0700
Subject: [PATCH 19/38] feat(browser): make eval self-reporting and consolidate
 the skill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turn `browser eval` into the one primitive for driving a page: a pure
action (returns nothing) now hands back the resulting accessibility tree,
multi-statement source works without an async IIFE, and a page-global
ReferenceError (`window is not defined`) explains that eval runs in Node
with the Playwright `page` in scope. eval and `eval --profile` (and the
`browser_eval`/`browser_profile` MCP tools) now surface page-side errors
a driven action triggered and report page geometry — viewport, devicePixelRatio,
scroll offset, and how far the page scrolled while the action ran — so a
silent failure or a viewport that moved under you is answerable from output.

Consolidate the skill: merge motion into design.md, tighten the references
to stay within the context budget, and symlink .agents/skills/react-doctor
at the skills/ source of truth so there is one copy to maintain.
---
 .agents/skills/react-doctor                   |   1 +
 .agents/skills/react-doctor/SKILL.md          | 106 --------
 .../skills/react-doctor/references/debug.md   |  86 ------
 .../skills/react-doctor/references/design.md  |  52 ----
 .../skills/react-doctor/references/explain.md |  69 -----
 .../react-doctor/references/performance.md    |  63 -----
 .changeset/browser-eval-errors-geometry.md    |   5 +
 .../browser-eval-snapshot-ergonomics.md       |   5 +
 .changeset/react-browser-debug-skill.md       |   2 +-
 .gitignore                                    |   3 +-
 packages/browser/src/session.ts               |  83 +++++-
 packages/browser/src/types.ts                 |  18 ++
 .../browser/src/utils/append-eval-errors.ts   |  14 +
 packages/browser/src/utils/compile-eval.ts    |  26 ++
 .../browser/src/utils/enrich-eval-error.ts    |  20 ++
 packages/mcp/src/tools/browser.ts             |  19 +-
 packages/mcp/tests/server.test.ts             |   1 +
 .../react-doctor/src/cli/commands/browser.ts  |  20 +-
 packages/react-doctor/src/cli/index.ts        |   2 +-
 skills/react-doctor/SKILL.md                  |  12 +-
 skills/react-doctor/references/debug.md       |   8 +-
 skills/react-doctor/references/design.md      | 244 ++++++++++++++++--
 skills/react-doctor/references/performance.md |  10 +-
 23 files changed, 433 insertions(+), 436 deletions(-)
 create mode 120000 .agents/skills/react-doctor
 delete mode 100644 .agents/skills/react-doctor/SKILL.md
 delete mode 100644 .agents/skills/react-doctor/references/debug.md
 delete mode 100644 .agents/skills/react-doctor/references/design.md
 delete mode 100644 .agents/skills/react-doctor/references/explain.md
 delete mode 100644 .agents/skills/react-doctor/references/performance.md
 create mode 100644 .changeset/browser-eval-errors-geometry.md
 create mode 100644 .changeset/browser-eval-snapshot-ergonomics.md
 create mode 100644 packages/browser/src/utils/append-eval-errors.ts
 create mode 100644 packages/browser/src/utils/compile-eval.ts
 create mode 100644 packages/browser/src/utils/enrich-eval-error.ts

diff --git a/.agents/skills/react-doctor b/.agents/skills/react-doctor
new file mode 120000
index 000000000..97981d623
--- /dev/null
+++ b/.agents/skills/react-doctor
@@ -0,0 +1 @@
+../../skills/react-doctor
\ No newline at end of file
diff --git a/.agents/skills/react-doctor/SKILL.md b/.agents/skills/react-doctor/SKILL.md
deleted file mode 100644
index 4d3298dab..000000000
--- a/.agents/skills/react-doctor/SKILL.md
+++ /dev/null
@@ -1,106 +0,0 @@
----
-name: react-doctor
-description: Use when writing, finishing, or committing React or React Native code, when the user types `/react-doctor`, or when they ask to scan, triage, lint, profile performance, debug a UI in the browser, or review design and accessibility. Covers lint, accessibility, performance, bundle size, and architecture.
-version: "1.7.0"
----
-
-# React Doctor
-
-One skill that makes your agent good at React. It writes better React by default, checks your changes in the background, and opens a real browser to profile performance, reproduce bugs, and review design.
-
-## Baseline rules (always on)
-
-Apply these on every React edit, before any tool runs. They shape how you write code, not only what you flag:
-
-1. Derive state during render, don't duplicate it in another `useState`.
-2. Skip effects for values you can compute while rendering and for logic that belongs in an event handler.
-3. Compose components instead of piling on boolean props.
-4. Lift state only as far as it needs to go, no higher.
-5. Keep one source of truth for each piece of state.
-6. Render without side effects; keep the render pass pure.
-7. Use stable keys in lists, never the array index.
-8. Fetch independent data in parallel, not in a waterfall.
-9. Skip manual `useMemo`, `useCallback`, and `memo`; let the React Compiler handle it.
-10. Handle the loading, error, and empty states, not only the happy path.
-
-## Routing
-
-`/react-doctor` picks the job from what you're doing. Name a job (`/react-doctor perf`) to force it. When the request is genuinely unclear, ask which one rather than guessing.
-
-| Signal                                                  | Job        | What it does                    |
-| ------------------------------------------------------- | ---------- | ------------------------------- |
-| "review", "before commit", "clean up", or changed files | **doctor** | static scan plus 0 to 100 score |
-| "slow", "laggy", "janky", "re-rendering"                | **perf**   | React render + CPU profilers    |
-| "broken", "crashes", "doesn't work" in the UI           | **debug**  | reproduce in a real browser     |
-| "looks off", "polish", a screenshot or pasted element   | **design** | measured UI review              |
-
-doctor runs from code alone, so it is the one that fires in the background. The browser jobs (perf, debug, design) need a live page and are slower, so they run only when asked.
-
-## Which browser to drive
-
-debug, design, and perf need a real Chrome. Two ways to get one:
-
-1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, leave the page, React profiler injected), `eval` (run a Playwright expression with `page` in scope — returns its value), `snapshot` (accessibility tree), and `screenshot`. Add `--profile` to `eval` to record the whole runtime picture while the expression runs — console, network, performance (long animation frames with per-script attribution, LCP, CLS), an axe-core accessibility audit, a React render profile (slowest commits, hottest components, unnecessary re-renders), and a V8/DevTools CPU profile over CDP (hottest JS functions by self time). Run `eval --profile` with no expression to measure the live page as it is.
-
-It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
-
-## Run as an MCP server
-
-React Doctor ships its own Model Context Protocol server over stdio so any MCP-capable agent can call the jobs directly:
-
-```bash
-npx react-doctor@latest mcp
-```
-
-It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes a `profile: true` argument that records the whole runtime picture — console, network, performance, accessibility, a React render profile, and a literal Chrome DevTools CPU profile — in one pass while the expression runs.
-
-## doctor: scan and triage
-
-After making React changes, run a regression check and confirm the score did not drop:
-
-```bash
-npx react-doctor@latest --verbose --scope changed
-```
-
-If the score dropped, fix the regressions before committing. For a cleanup of the whole codebase, drop `--scope changed` (the default is `--scope full`) and fix by severity: errors first, then warnings.
-
-When the user types `/react-doctor`, `/doctor`, says "run react doctor", or asks for a full triage or cleanup pass (not a regression check), fetch the canonical local-triage playbook and follow every step in it:
-
-```bash
-curl --fail --silent --show-error \
-  --header 'Cache-Control: no-cache' \
-  https://www.react.doctor/prompts/react-doctor-agent.md
-```
-
-The playbook is the single source of truth: a scan, filter, triage, fix, validate loop that edits the working tree directly and never commits or opens PRs. Updating the prompt at its source updates every agent on its next fetch, no reinstall needed. Pair it with the per-rule prompts at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md` (fetched on demand inside the playbook) so each fix uses the reviewer-tested recipe.
-
-## perf: profile performance
-
-When the user reports jank, slow interactions, dropped frames, excessive re-renders, or asks to profile or optimize render performance, read [references/performance.md](references/performance.md) and follow it. It runs an evidence-driven profile, analyze, fix, re-profile loop against the real React DevTools profiler export, never guessing from code alone.
-
-## debug: reproduce in a real browser
-
-When the user says something is broken, crashes, throws, or behaves wrong in the running app, read [references/debug.md](references/debug.md) and follow it. It runs the [debug-agent](https://github.com/millionco/debug-agent) loop: generate hypotheses, instrument the code with runtime NDJSON logs, reproduce the bug in the live browser, and fix only once the logs prove the cause.
-
-## design: review and improve UI
-
-When the user wants to build, polish, or review an interface ("looks off", "make this nicer", a pasted screenshot or element), read [references/design.md](references/design.md) and follow it. It opens the page, takes a screenshot, and reports what it can measure (contrast, line length, spacing, tap-target size), not only taste.
-
-## Configuring or explaining rules
-
-When the user wants to understand a rule, disagrees with one, or wants to disable or tune which rules run (not fix code), read [references/explain.md](references/explain.md) and follow it. Start with `npx react-doctor@latest rules explain <rule>`, then apply the narrowest control via `npx react-doctor@latest rules disable|set|category|ignore-tag …`.
-
-## Command
-
-```bash
-npx react-doctor@latest --verbose --scope changed
-```
-
-| Flag              | Purpose                                                          |
-| ----------------- | ---------------------------------------------------------------- |
-| `.`               | Scan current directory                                           |
-| `--verbose`       | Show affected files and line numbers per rule                    |
-| `--scope changed` | Only report issues introduced vs the base branch (default: full) |
-| `--scope lines`   | Only report issues on the changed lines                          |
-| `--score`         | Output only the numeric score                                    |
diff --git a/.agents/skills/react-doctor/references/debug.md b/.agents/skills/react-doctor/references/debug.md
deleted file mode 100644
index b95d30e79..000000000
--- a/.agents/skills/react-doctor/references/debug.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# Debugging with runtime evidence
-
-Reproduce and fix UI bugs with runtime evidence, never by guessing from code alone. Use this when the user says something is broken, crashes, throws, hangs, or behaves wrong in the running app.
-
-This is the [debug-agent](https://github.com/millionco/debug-agent) loop, built into React Doctor: hypothesize, instrument with logs, reproduce, analyze the logs, fix only once the logs prove the cause, verify, clean up.
-
-## 0. Start the logging server (before any instrumentation)
-
-The server is long-running. Start it once and keep it up for the whole session. `--daemon` prints the server info and returns, leaving the server running in the background:
-
-```bash
-npx react-doctor debug serve --daemon
-```
-
-It prints one JSON line. Capture and remember:
-
-- `endpoint`: POST your logs here from JS or TS at runtime
-- `logPath`: the NDJSON log file you read after each run
-- `sessionId`: include it in every log payload
-
-The server is idempotent: a second start returns the running server's info. If it fails to start, stop and tell the user. Do not instrument without it.
-
-## 1. Generate hypotheses
-
-Write 3 to 5 precise hypotheses about why the bug happens: a thrown error in a specific component, a failed or duplicated request, a null or undefined access, a state update after unmount, a missing loading or error branch. Aim for more, not fewer. Each hypothesis gets an id (A, B, C, …).
-
-## 2. Instrument the code
-
-Add 2 to 6 logs (never more than 10) at the points that confirm or reject each hypothesis: function entry and exit, values before and after a critical operation, which branch ran. In JS or TS, POST to the server `endpoint`:
-
-```js
-// #region debug log
-fetch("ENDPOINT", {
-  method: "POST",
-  headers: { "Content-Type": "application/json" },
-  body: JSON.stringify({
-    sessionId: "SESSION_ID",
-    hypothesisId: "A",
-    location: "cart.tsx:42",
-    message: "cart total before render",
-    data: { total },
-    timestamp: Date.now(),
-  }),
-}).catch(() => {});
-// #endregion
-```
-
-Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup later is deterministic. Each log maps to at least one `hypothesisId`. Never log secrets or PII.
-
-## 3. Reproduce
-
-Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
-
-- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile, headless — pass `--headed` to watch it, and `browser close` to stop it) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, memory (heap, DOM nodes, listeners), accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
-
-```bash
-npx react-doctor browser open http://localhost:3000           # attach + open the page
-npx react-doctor browser eval --profile                       # console + network + perf + a11y + React/CPU in one pass
-npx react-doctor browser snapshot                             # what rendered, by role + name
-npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()'
-npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()' --profile  # drive + measure it
-npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
-```
-
-`snapshot` and `eval` are a pair. `snapshot` lists the rendered elements by role and accessible name. `eval` runs an expression with the Playwright `page` in scope, so you act on what you saw using Playwright's own selectors: `page.locator("text=Login").click()`, `page.getByRole(...)`, `page.fill(...)`, `page.waitForSelector(...)`. For raw DOM, reach through `page.evaluate(() => …)`. No separate ref scheme to track.
-
-- **Backend or CLI bugs:** write and run a small repro script (Node, shell) yourself.
-- Otherwise ask the user for numbered steps, and remind them to restart any app or service whose instrumented files are bundled or cached.
-
-Reuse the same repro pathway for every iteration.
-
-## 4. Analyze the logs
-
-Read the NDJSON at `logPath`. Mark each hypothesis CONFIRMED, REJECTED, or INCONCLUSIVE, citing the specific log lines. If the file is empty, the repro likely did not run the instrumented path, so try again. If every hypothesis is rejected, revert the rejected code changes, generate new hypotheses from a different subsystem, and add more instrumentation.
-
-## 5. Fix, only with proof
-
-Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in `SKILL.md` (derive don't duplicate, effects, single source of truth). Do not remove the instrumentation yet. Never use `setTimeout` or `sleep` as a fix.
-
-## 6. Verify
-
-Clear the log file, re-run the same reproduction (tag the logs `runId:"post-fix"` if helpful), and compare before and after with cited lines. Re-run a couple of times to rule out races. No fix is confirmed without log proof.
-
-## 7. Clean up
-
-Once verified, search every file for `#region debug log`, delete each block through its `#endregion`, grep again to confirm none remain, and `git diff` to confirm only the intentional fix is left.
diff --git a/.agents/skills/react-doctor/references/design.md b/.agents/skills/react-doctor/references/design.md
deleted file mode 100644
index 8a6e6a405..000000000
--- a/.agents/skills/react-doctor/references/design.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# Reviewing and improving UI
-
-Improve interfaces with measured evidence from the rendered page, not taste alone. Use this when the user wants to build, polish, or review a UI: "looks off", "make this nicer", or a pasted screenshot.
-
-The value here is what a screenshot and the live DOM let you measure that reading code cannot: contrast ratios, line length, the spacing scale, and tap-target size. Lead with those, then apply craft.
-
-## Review against the live page
-
-```bash
-npx react-doctor browser open http://localhost:3000
-npx react-doctor browser screenshot --out review.png   # what the user actually sees
-npx react-doctor browser eval --profile                # full picture incl. axe-core a11y: contrast, names, landmarks
-```
-
-Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, or `eval`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
-
-```bash
-npx react-doctor browser screenshot --viewport 390x844 --out mobile.png
-```
-
-Look at the screenshot, then measure specifics with `eval` (computed styles, bounding boxes, color values) to get objective numbers rather than opinions:
-
-```bash
-npx react-doctor browser eval 'page.evaluate(() => getComputedStyle(document.querySelector("button")).fontSize)'
-```
-
-`browser eval --profile` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors in its Accessibility section. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
-
-## What to check
-
-Measured, in priority order:
-
-1. **Contrast**: body text at least 4.5:1, large text at least 3:1. Report the actual ratio.
-2. **Tap targets**: interactive elements at least 24 × 24 px (ideally 44 × 44 on touch).
-3. **Line length**: body copy roughly 45 to 75 characters per line.
-4. **Spacing**: spacing values come from one consistent scale, not ad-hoc px.
-
-Then craft, drawing on the bundled design rules:
-
-5. **Type**: one clear hierarchy; avoid default system-only stacks for brand surfaces; consistent line-height.
-6. **Color**: a committed palette, not arbitrary hexes; check both light and dark.
-7. **Layout**: alignment, rhythm, and a deliberate focal point.
-8. **State**: hover, focus-visible, disabled, loading, and empty states exist.
-
-## The loop
-
-Build or fix, screenshot, re-check, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
-
-## Working rules
-
-- Always look at the screenshot; do not review UI from JSX alone.
-- Report measured findings with their numbers; keep taste suggestions short and clearly separate from the measured ones.
diff --git a/.agents/skills/react-doctor/references/explain.md b/.agents/skills/react-doctor/references/explain.md
deleted file mode 100644
index 18cd0cea2..000000000
--- a/.agents/skills/react-doctor/references/explain.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Explaining and configuring rules
-
-Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user wants to understand a rule or change which rules run, not for fixing diagnostics (that is the main `react-doctor` skill, `/doctor`).
-
-Triggers: "why did this rule fire", "I disagree with this rule", "turn this rule off", "stop flagging X", "too noisy", "disable design rules".
-
-## Workflow
-
-1. Identify the rule key from the diagnostic (for example `react-doctor/no-array-index-as-key`).
-2. Explain it before changing anything:
-
-```bash
-npx react-doctor@latest rules explain react-doctor/no-array-index-as-key
-```
-
-3. Pick the narrowest control that matches the user's intent (see decision guide).
-4. Apply it with a `rules` subcommand. It edits your `doctor.config.*` or `package.json#reactDoctor` in place, preserving other fields and formatting.
-5. Validate the change did what they wanted:
-
-```bash
-npx react-doctor@latest --verbose --diff
-```
-
-## Commands
-
-```bash
-npx react-doctor@latest rules list                          # every rule + its effective severity
-npx react-doctor@latest rules list --configured             # only what your config changed
-npx react-doctor@latest rules list --category Performance    # filter by category
-npx react-doctor@latest rules explain <rule>                # why it matters + how to configure
-npx react-doctor@latest rules disable <rule>                # rule never runs
-npx react-doctor@latest rules enable <rule>                 # turn back on at its recommended severity
-npx react-doctor@latest rules set <rule> warn               # off | warn | error
-npx react-doctor@latest rules category "React Native" off    # whole category
-npx react-doctor@latest rules ignore-tag design             # skip a rule family (design, test-noise, …)
-npx react-doctor@latest rules unignore-tag design
-```
-
-Rule references accept the full key (`react-doctor/no-danger`), the bare id (`no-danger`), or a legacy key (`react/no-danger`).
-
-## Decision guide
-
-Match the control to the intent, and prefer the narrowest one:
-
-- **User disagrees with one rule, or it is a false positive for them**: `rules disable <rule>` (sets `rules.<key> = "off"`; the rule stops running everywhere). This is the default for "I don't want this rule".
-- **Rule is fine but wrong severity**: `rules set <rule> warn` or `rules set <rule> error`.
-- **A disabled-by-default rule they want on**: `rules enable <rule>`.
-- **A whole area is unwanted** (for example all React Native rules): `rules category "<Category>" off`.
-- **A behavioral family is noisy** (`design`, `test-noise`, `migration-hint`): `rules ignore-tag <tag>`.
-- **Keep it locally but hide from PR comment, score, or CI gate only**: do not disable. Edit `surfaces` in your config (`surfaces.prComment.excludeRules`, `surfaces.score.excludeTags`, `surfaces.ciFailure.excludeCategories`). The rule still shows in local `cli` output.
-
-How the layers combine: `ignore.tags` disables every rule carrying that tag before linting, so a tagged rule stays off even if `rules` or `categories` set it to `warn` or `error` (a rule-level override cannot re-enable a tag-ignored rule). For rules that are not tag-disabled, `rules` overrides `categories` overrides the rule's default. `surfaces` is visibility-only and never changes whether a rule runs.
-
-## Config shape
-
-Config lives in `doctor.config.ts` (or `.js`, `.mjs`, `.cjs`, `.json`, `.jsonc`), or the `reactDoctor` key in `package.json`. The `rules` commands edit whichever exists (TS and JS edits preserve formatting via magicast) and create `doctor.config.json` when none does, stamping `$schema`:
-
-```ts
-// doctor.config.ts
-export default {
-  rules: { "react-doctor/no-array-index-as-key": "off" },
-  categories: { "React Native": "warn" },
-  ignore: { tags: ["design"] },
-};
-```
-
-## Educating the user
-
-When explaining a rule, lead with the "Why it matters" guidance from `rules explain` and, when they want depth, the per-rule recipe at `https://www.react.doctor/prompts/rules/<plugin>/<rule>.md`. Only after they understand it should you offer to disable it: many "bad" rules are catching real issues.
diff --git a/.agents/skills/react-doctor/references/performance.md b/.agents/skills/react-doctor/references/performance.md
deleted file mode 100644
index a2c2b5bd9..000000000
--- a/.agents/skills/react-doctor/references/performance.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# Performance engineering (runtime-evidence loop)
-
-Find and fix jank with runtime evidence, never code reading alone. The primary signal is the long animation frame (LoAF): a frame longer than 50 ms, captured with `PerformanceObserver` and attributed to the exact script that blocked it (its `sourceURL`, `sourceFunctionName`, and how much of that time was synchronous layout). That attribution is what `performance.now()` and reading code cannot give you. Use this when the user reports jank, dropped frames, janky scroll, slow click or typing response, poor INP, slow LCP, or layout shift, or asks to make something faster.
-
-Same discipline as [debug](./debug.md): hypothesize, capture, analyze the worst frame, fix the top evidence-backed cause, re-capture to verify, repeat. A change that does not make the offending script's frame time drop is not a fix.
-
-## 1. Hypothesize (3 to 5)
-
-Why is it slow, and where? Common React causes: unstable callback or object props, a missing `memo` or `useMemo`, a context provider that is too broad, large unvirtualized lists, expensive children re-rendering on every parent commit, or a sync layout read interleaved with writes (layout thrashing).
-
-## 2. Capture (no app changes)
-
-`browser eval --profile` arms every observer (LoAF/LCP/CLS, the React render profiler, and a V8 CPU profiler), runs the expression you pass while it records, then reports the worst frames first with per-script attribution. Drive a fresh load by passing the navigation, or omit the expression to read the page as it is now without reloading:
-
-```bash
-npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
-npx react-doctor browser eval --profile   # measures the current page, no reload
-```
-
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one — launched headless (pass `--headed` to watch the window), landing on a free port automatically if 9222 is taken, with later commands reattaching to it and `browser close` stopping it when you're done. The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
-
-It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
-
-The `# Memory` section snapshots the page's runtime footprint after the action — JS heap used/total, DOM node count, event listeners, and document/frame counts (the CDP Performance counters). For a leak, re-run on the same page with no reload (`browser eval --profile`) and watch these climb: growing DOM nodes mean detached subtrees retained, growing listeners/heap mean leaked closures, growing documents/frames mean orphaned iframes.
-
-The `# Network` section lists each request with its outcome (status or failure), and — once it has settled — its time and encoded transfer size, with a summary counting failed, slow (>500ms), and heavy (>1MB) requests. Use it to spot a blocking waterfall or an oversized bundle/asset; a cache hit or an unfinished request shows no size/time.
-
-To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
-
-## 3. Analyze the worst frame first
-
-The output is already sorted worst-first. The script with the largest duration inside the worst frame is your culprit. If a script's sync-layout time is a large share of its duration, that is layout thrashing: sync reads (`offsetHeight`, `getBoundingClientRect`, `scrollTop`, `getComputedStyle`) interleaved with DOM writes. A minified `sourceURL` is meaningless on its own, so resolve it through your sourcemap. Cite the specific script when you conclude:
-
-> CONFIRMED: 128 ms frame, script `app.js` `drawSeries` ran 84 ms with 42 ms sync layout. The chart redraw forces layout inside the scroll handler.
-
-## 4. Zoom into React renders (optional)
-
-When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop — then drive the repro with `browser eval --profile`:
-
-```bash
-npx react-doctor browser open http://localhost:3000
-npx react-doctor browser eval 'page.getByText("Next").click()' --profile
-```
-
-For trustworthy timings, run against React's profiling build (alias `react-dom` to `react-dom/profiling` in your bundler) in a dev or non-prod build. Dev timings work but are inflated.
-
-`browser eval --profile` records one pass with both lenses. The `react` lens reports the slowest commits, the components that render most/cost the most self time, and the count of unnecessary re-renders (components that re-rendered with nothing they own changed — the memoization candidates). The `cpu` lens is a Chrome DevTools CPU profile via V8's sampling profiler over CDP, the hottest JS functions ranked by self time. The `react` lens is null on a production React build (it records no profiling data); the `cpu` lens works on any build. For manual control of the React profiler, drive it through `browser eval` without `--profile` (the Playwright `page` is in scope):
-
-```bash
-npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.start())'
-# drive the exact repro with more `browser eval`: page.locator("...").click(), page.keyboard.type("...")
-npx react-doctor browser eval 'page.evaluate(() => window.__REACT_PERF__.stop())'
-```
-
-Reading the raw React export: aggregate `dataForRoots[].commitData[]`: per fiber, render count and summed `fiberActualDurations` and `fiberSelfDurations` (both `[fiberID, ms]` pairs); `changeDescriptions[fiberID]` for why it rendered (which props, state, hooks, or context changed, plus `isFirstMount` and `didHooksChange`). Everything keys by fiber id; map ids to component names with `dataForRoots[].elementNames` (`[fiberID, name]` pairs). Rank by components that render most often, cost the most self time, or re-render with no meaningful prop change (memoization candidates) — which is exactly what `browser eval --profile` computes for you.
-
-## 5. Fix, only with proof
-
-Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in [`SKILL.md`](../SKILL.md) (derive don't duplicate, effects, single source of truth). Never fix by wrapping work in `setTimeout`: that defers the work to a later frame, it does not remove it.
-
-## 6. Verify
-
-Re-run the same capture and diff before and after: the offending frame and its script time must drop, and no other frame may regress. For the React profiler, re-run the scenario a few times and compare medians (dev timings are noisy; StrictMode double-renders on mount). Never claim a performance win without before-and-after evidence. The profiler leaves nothing behind in your app to clean up; it lives only in the injected browser session.
diff --git a/.changeset/browser-eval-errors-geometry.md b/.changeset/browser-eval-errors-geometry.md
new file mode 100644
index 000000000..74975fbc5
--- /dev/null
+++ b/.changeset/browser-eval-errors-geometry.md
@@ -0,0 +1,5 @@
+---
+"react-doctor": patch
+---
+
+Make `browser eval` and `browser eval --profile` self-reporting about what an action did to the page. A driven action that triggers a page-side error (a `console.error` or an uncaught throw) now appends an "Errors during eval" section instead of failing silently, so a broken interaction surfaces without hand-wiring a console hook. `--profile` (and the `browser_profile` MCP tool) now reports page geometry alongside memory — viewport size, devicePixelRatio, scroll offset, and how far the page scrolled while the action ran — so "did the element move, or did the page scroll under me?" is answerable from the output. Page scroll delta only prints when the viewport actually moved.
diff --git a/.changeset/browser-eval-snapshot-ergonomics.md b/.changeset/browser-eval-snapshot-ergonomics.md
new file mode 100644
index 000000000..595635cc8
--- /dev/null
+++ b/.changeset/browser-eval-snapshot-ergonomics.md
@@ -0,0 +1,5 @@
+---
+"react-doctor": patch
+---
+
+Make `browser eval` the one primitive for driving a page: when an expression just acts (returns nothing), it now hands back the resulting accessibility tree, so a single call both drives the page and shows the new state — no follow-up `snapshot`. Multi-statement source works without hand-wrapping it in an async IIFE, and a page-context `ReferenceError` (`window is not defined`) now explains that `eval` runs in Node with the Playwright `page` in scope and to reach page globals through `page.evaluate(() => …)`. The same applies to the `browser_eval` MCP tool. Locating stays pure Playwright — `browser snapshot`, or `page.locator(...).ariaSnapshot()` inside `eval` for a subtree.
diff --git a/.changeset/react-browser-debug-skill.md b/.changeset/react-browser-debug-skill.md
index 9fb3d3858..baf2f2b92 100644
--- a/.changeset/react-browser-debug-skill.md
+++ b/.changeset/react-browser-debug-skill.md
@@ -1,5 +1,5 @@
 ---
-"react-doctor": minor
+"react-doctor": patch
 ---
 
 Add the `browser`, `debug`, and `mcp` commands behind the unified `/react-doctor` skill. `browser` drives a real Chrome over CDP (attaching to your running session, launching a dedicated persistent profile only as a fallback): `open` a page, `eval` a Playwright expression, `snapshot` the accessibility tree, and `screenshot`. Adding `--profile` to `eval` records the whole runtime picture in one pass while the expression runs — console, network, performance (long animation frames with per-script attribution, LCP, CLS, plus a DevTools timeline roll-up of forced style-recalc/layout/hit-test/paint cost), an axe-core accessibility audit, a React render profile (slowest commits, hottest components by self time, unnecessary re-render counts), and a Chrome DevTools CPU profile via V8's sampling profiler over CDP (the hottest JS functions ranked by self time). It also writes the raw DevTools timeline trace to a file (`--out`, default `react-doctor-trace.json`) that loads in the DevTools Performance panel. `debug` runs an NDJSON logging server the debug job posts runtime evidence to. `mcp` runs a Model Context Protocol server over stdio that exposes the doctor scan and the browser/debug jobs as MCP tools, so any MCP-capable agent can run `react-doctor mcp` and call `doctor_scan`, the `browser_*` tools (`browser_eval` takes a `profile: true` argument that captures every signal together), and the `debug_*` log server directly.
diff --git a/.gitignore b/.gitignore
index 25e55dce9..e575024ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,8 +15,7 @@ review-*.md
 /.agents/*
 !/.agents/skills/
 /.agents/skills/*
-!/.agents/skills/react-doctor/
-!/.agents/skills/react-doctor/**
+!/.agents/skills/react-doctor
 !/.agents/skills/rule-research/
 !/.agents/skills/rule-research/**
 !/.agents/skills/rule-writing/
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 8c8ff4ecd..a02225ef5 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -14,6 +14,10 @@ import {
   TIMELINE_TRACE_CATEGORIES,
 } from "./constants.js";
 import { collectPerformanceReport } from "./perf-observer.js";
+import { appendEvalErrors } from "./utils/append-eval-errors.js";
+import { compileEval } from "./utils/compile-eval.js";
+import { enrichEvalError } from "./utils/enrich-eval-error.js";
+import { formatEvalValue } from "./utils/format-eval-value.js";
 import { writeTraceFile } from "./utils/write-trace-file.js";
 import { analyzeReactProfile } from "./react-profiler/analyze-profile.js";
 import type { ReactProfilerDataExport } from "./react-profiler/types/profiling-export.js";
@@ -25,6 +29,7 @@ import type {
   InspectOptions,
   MemoryStats,
   NetworkRequestEntry,
+  PageGeometry,
   PageInspection,
   PageVitals,
   Viewport,
@@ -149,14 +154,39 @@ export class BrowserSession {
     this.viewportOverride = cdpSession;
   }
 
-  // The expression runs here in Node with the Playwright `page` in scope (the
-  // whole driver API), not in the page — so an agent acts on what `snapshot`
-  // showed it using Playwright's own selectors.
+  // The source runs here in Node with the Playwright `page` (the whole driver
+  // API) in scope, not in the page — so an agent locates and acts with
+  // Playwright's own selectors: `page.getByRole("button", { name: "Open" })
+  // .click()`. A bare expression returns its value; multi-statement source works
+  // too (see `compileEval`). Page globals (`window`, `document`, …) live in the
+  // page, so reach them via `page.evaluate(...)`.
   async evaluate<T = unknown>(expression: string): Promise<T> {
-    const run = new Function("page", `"use strict"; return (async () => (${expression}))();`) as (
-      page: Page,
-    ) => Promise<T>;
-    return run(this.page);
+    try {
+      return await compileEval<T>(expression)(this.page);
+    } catch (error) {
+      throw enrichEvalError(error);
+    }
+  }
+
+  // The driving path the CLI and MCP use: run the source, and when it was a pure
+  // action (returned nothing) hand back the resulting accessibility tree so one
+  // call both acts and shows the new page state — no follow-up `snapshot`. An
+  // expression that returns a value yields that value instead. Page-side errors
+  // the action triggered (console.error, an uncaught throw) are appended so a
+  // silent failure can't slip past without the agent hand-wiring a console hook.
+  async evaluateOrSnapshot(expression: string): Promise<string> {
+    const consoleEntries: ConsoleMessageEntry[] = [];
+    const detach = this.collectConsole(consoleEntries);
+    try {
+      const result = await this.evaluate(expression);
+      const output = result === undefined ? await this.snapshot() : formatEvalValue(result);
+      // HACK: one event-loop turn lets page-side console/pageerror events queued
+      // during the action drain (CDP delivers them async) before we read them.
+      await new Promise((resolve) => setTimeout(resolve, 0));
+      return appendEvalErrors(output, consoleEntries);
+    } finally {
+      detach();
+    }
   }
 
   // Wait for the page to stop changing before we read it: in-flight requests
@@ -290,6 +320,42 @@ export class BrowserSession {
     return this.page.evaluate(collectPerformanceReport, PERFORMANCE_OBSERVE_WINDOW_MS);
   }
 
+  // The page's native scroll offset, read before the action so `captureGeometry`
+  // can report how far it moved while the action ran.
+  private readScroll(): Promise<{ x: number; y: number }> {
+    return this.page
+      .evaluate(() => ({ x: window.scrollX, y: window.scrollY }))
+      .catch(() => ({ x: 0, y: 0 }));
+  }
+
+  // Post-action scroll + viewport state, plus how far the page scrolled during
+  // the action (`scrolledX/Y`). A large scroll delta means the viewport moved
+  // under you — useful context for "did the element move, or did the page?".
+  private captureGeometry(scrollBefore: { x: number; y: number }): Promise<PageGeometry> {
+    return this.page
+      .evaluate(
+        (before) => ({
+          scrollX: Math.round(window.scrollX),
+          scrollY: Math.round(window.scrollY),
+          scrolledX: Math.round(window.scrollX - before.x),
+          scrolledY: Math.round(window.scrollY - before.y),
+          viewportWidth: window.innerWidth,
+          viewportHeight: window.innerHeight,
+          devicePixelRatio: window.devicePixelRatio,
+        }),
+        scrollBefore,
+      )
+      .catch(() => ({
+        scrollX: 0,
+        scrollY: 0,
+        scrolledX: 0,
+        scrolledY: 0,
+        viewportWidth: 0,
+        viewportHeight: 0,
+        devicePixelRatio: 1,
+      }));
+  }
+
   // The page's current runtime footprint from the CDP Performance domain (heap,
   // DOM nodes, listeners, documents/frames) — the counters DevTools' Performance
   // monitor shows. A snapshot, not a window, so it reflects the post-action state.
@@ -381,6 +447,7 @@ export class BrowserSession {
         return true;
       });
 
+      const scrollBefore = await this.readScroll();
       let result: unknown = null;
       let vitals = emptyVitals();
       let reactExport: ReactProfilerDataExport | null = null;
@@ -417,6 +484,7 @@ export class BrowserSession {
       // Read memory before axe runs so the snapshot reflects the app's footprint,
       // not axe's injected globals.
       const memory = await this.captureMemory(cdpSession);
+      const geometry = await this.captureGeometry(scrollBefore);
 
       // Detach the page listeners before the accessibility audit so axe's injected
       // evaluate (and anything it logs) can't land in the captured signals.
@@ -431,6 +499,7 @@ export class BrowserSession {
         network,
         performance: { ...vitals, timeline: analyzeTimelineTrace(traceEvents) },
         memory,
+        geometry,
         accessibility,
         tracePath: writtenTracePath,
         profile: {
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
index f3c2ce9e0..4405b482f 100644
--- a/packages/browser/src/types.ts
+++ b/packages/browser/src/types.ts
@@ -96,6 +96,23 @@ export interface MemoryStats {
   frames: number;
 }
 
+// The page's scroll + viewport state around the driven action. `scrolledX/Y` is
+// how far the page scrolled while the expression ran — a large value means the
+// action moved the viewport under you (auto-scroll, focus jump, scroll-into-
+// view), which can masquerade as an element moving or resizing. `devicePixelRatio`
+// and the viewport size also confirm a `--viewport` emulation took effect. Note:
+// this is native page scroll only — an app with its own camera/pan (a canvas
+// editor) won't show its movement here.
+export interface PageGeometry {
+  scrollX: number;
+  scrollY: number;
+  scrolledX: number;
+  scrolledY: number;
+  viewportWidth: number;
+  viewportHeight: number;
+  devicePixelRatio: number;
+}
+
 // Everything `inspect` observes that the page itself reports (LoAF / LCP / CLS),
 // before the trace-derived `timeline` is folded in to form the PerformanceReport.
 export type PageVitals = Omit<PerformanceReport, "timeline">;
@@ -117,6 +134,7 @@ export interface PageInspection {
   network: NetworkRequestEntry[];
   performance: PerformanceReport;
   memory: MemoryStats;
+  geometry: PageGeometry;
   accessibility: AccessibilityViolation[];
   // Absolute path the raw timeline trace was written to, or null when none was.
   tracePath: string | null;
diff --git a/packages/browser/src/utils/append-eval-errors.ts b/packages/browser/src/utils/append-eval-errors.ts
new file mode 100644
index 000000000..e1293b210
--- /dev/null
+++ b/packages/browser/src/utils/append-eval-errors.ts
@@ -0,0 +1,14 @@
+import type { ConsoleMessageEntry } from "../types.js";
+
+// Append a compact "Errors during eval" section when the driven action triggered
+// any page-side errors (console.error or an uncaught throw, both surfaced by the
+// console collector as type "error"). Returns the output unchanged when none
+// fired, so a clean action stays clean.
+export const appendEvalErrors = (output: string, entries: ConsoleMessageEntry[]): string => {
+  const errors = entries.filter((entry) => entry.type === "error");
+  if (errors.length === 0) return output;
+  const lines = errors.map(
+    (entry) => `[error] ${entry.text}${entry.location ? ` (${entry.location})` : ""}`,
+  );
+  return `${output}\n\n# Errors during eval\n${lines.join("\n")}`;
+};
diff --git a/packages/browser/src/utils/compile-eval.ts b/packages/browser/src/utils/compile-eval.ts
new file mode 100644
index 000000000..89da38933
--- /dev/null
+++ b/packages/browser/src/utils/compile-eval.ts
@@ -0,0 +1,26 @@
+import type { Page } from "playwright-core";
+
+export interface CompiledEval<T> {
+  (page: Page): Promise<T>;
+}
+
+// `eval` source runs in Node with the Playwright `page` in scope. A bare
+// expression — `page.getByText("Login").click()` — is the common case, so
+// compile that first to keep its return value. Multi-statement source, or a
+// body that uses `return`, isn't a valid expression and throws a SyntaxError at
+// construction; recompile it as a function body so both shapes work without the
+// caller hand-wrapping an async IIFE.
+export const compileEval = <T>(expression: string): CompiledEval<T> => {
+  try {
+    return new Function(
+      "page",
+      `"use strict"; return (async () => (${expression}))();`,
+    ) as CompiledEval<T>;
+  } catch (error) {
+    if (!(error instanceof SyntaxError)) throw error;
+    return new Function(
+      "page",
+      `"use strict"; return (async () => { ${expression} })();`,
+    ) as CompiledEval<T>;
+  }
+};
diff --git a/packages/browser/src/utils/enrich-eval-error.ts b/packages/browser/src/utils/enrich-eval-error.ts
new file mode 100644
index 000000000..746a88b6c
--- /dev/null
+++ b/packages/browser/src/utils/enrich-eval-error.ts
@@ -0,0 +1,20 @@
+// Page globals that don't exist in the Node scope `eval` runs in. Reaching for
+// one is the classic mistake — writing page-context code directly instead of
+// inside `page.evaluate(() => ...)` — so turn the bare ReferenceError into that
+// fix rather than leaving the agent to guess.
+const PAGE_GLOBALS = [
+  "window",
+  "document",
+  "navigator",
+  "localStorage",
+  "sessionStorage",
+  "location",
+];
+
+export const enrichEvalError = (error: unknown): unknown => {
+  if (!(error instanceof ReferenceError)) return error;
+  const culprit = PAGE_GLOBALS.find((name) => error.message.includes(`${name} is not defined`));
+  if (!culprit) return error;
+  error.message = `${error.message}\n\`eval\` runs in Node with the Playwright \`page\` in scope, not in the page. Reach page globals through it: page.evaluate(() => ${culprit}...).`;
+  return error;
+};
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index ba0c4a5ca..ecf4b32d2 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -1,11 +1,6 @@
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import {
-  closeLaunchedBrowser,
-  DEFAULT_TRACE_FILENAME,
-  formatEvalValue,
-  parseViewport,
-} from "@react-doctor/browser";
+import { closeLaunchedBrowser, DEFAULT_TRACE_FILENAME, parseViewport } from "@react-doctor/browser";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
@@ -70,13 +65,13 @@ export const registerBrowserTools = (server: McpServer): void => {
     {
       title: "Run Playwright code, optionally profiling it",
       description:
-        'Run an async expression with the Playwright `page` in scope (e.g. page.getByText("Login").click()) against the attached page. Two modes: by default it returns the expression\'s value — use it to locate, read, or drive the page. Set profile:true to instead record and return the full runtime picture while the expression runs. Open the page first with browser_open for React render data.',
+        'Run Playwright code with the `page` in scope (e.g. page.getByRole("button", { name: "Login" }).click()) against the attached page. Locate with the accessibility tree (browser_snapshot, or page.locator(...).ariaSnapshot() for a subtree) then act. By default: an expression that returns a value yields the value; an expression that just acts (returns nothing) yields the resulting accessibility tree, so one call drives the page and shows the new state. Multi-statement source works without wrapping it yourself. Page globals (window/document) live in the page — reach them via page.evaluate(() => ...). Set profile:true to instead record and return the full runtime picture while the code runs. Open the page first with browser_open for React render data.',
       inputSchema: {
         expression: z
           .string()
           .optional()
           .describe(
-            "Async expression with the Playwright `page` in scope; omit together with profile:true to measure the live page idle",
+            "Playwright code with `page` in scope (single expression or multiple statements); omit together with profile:true to measure the live page idle",
           ),
         profile: z
           .boolean()
@@ -109,11 +104,11 @@ export const registerBrowserTools = (server: McpServer): void => {
         }
         if (args.expression === undefined) return textResult("(no value)");
         const expression = args.expression;
-        const result = await withSession(toConnection(args), (session) =>
-          session.evaluate(expression),
+        return textResult(
+          await withSession(toConnection(args), (session) =>
+            session.evaluateOrSnapshot(expression),
+          ),
         );
-        if (result === undefined) return textResult("(no value)");
-        return textResult(formatEvalValue(result));
       }),
   );
 
diff --git a/packages/mcp/tests/server.test.ts b/packages/mcp/tests/server.test.ts
index c36d568ce..9b391ae93 100644
--- a/packages/mcp/tests/server.test.ts
+++ b/packages/mcp/tests/server.test.ts
@@ -19,6 +19,7 @@ const listToolNames = async (): Promise<string[]> => {
 
 test("registers the doctor, browser, and debug tools", async () => {
   expect(await listToolNames()).toEqual([
+    "browser_close",
     "browser_eval",
     "browser_open",
     "browser_screenshot",
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index a9704d869..107d814a3 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -8,6 +8,7 @@ import {
   type CpuProfileAnalysis,
   type MemoryStats,
   type NetworkRequestEntry,
+  type PageGeometry,
   type PageInspection,
   type PerformanceReport,
   type ReactProfileAnalysis,
@@ -98,9 +99,7 @@ export const browserEvalAction = async (
     return;
   }
   await withSession(options, async (session) => {
-    const result = await session.evaluate(expression);
-    if (result === undefined) return;
-    logger.log(formatEvalValue(result));
+    logger.log(await session.evaluateOrSnapshot(expression));
   });
 };
 
@@ -177,6 +176,20 @@ const printMemoryStats = (memory: MemoryStats): void => {
   );
 };
 
+// Scroll + viewport context. The scroll delta is only printed when the page
+// actually moved during the action — that's the signal worth noticing (the
+// viewport shifted under you), so a still page stays quiet.
+const printGeometry = (geometry: PageGeometry): void => {
+  logger.log(
+    `Viewport: ${geometry.viewportWidth}x${geometry.viewportHeight} @ ${geometry.devicePixelRatio}x, scroll ${geometry.scrollX},${geometry.scrollY}`,
+  );
+  if (geometry.scrolledX !== 0 || geometry.scrolledY !== 0) {
+    logger.log(
+      `Page scrolled ${geometry.scrolledX},${geometry.scrolledY} during the action (the viewport moved under you)`,
+    );
+  }
+};
+
 const printPerformanceReport = (report: PerformanceReport): void => {
   const lcp = report.largestContentfulPaintMs;
   logger.log(`LCP: ${lcp === null ? "n/a" : `${lcp}ms`}   CLS: ${report.cumulativeLayoutShift}`);
@@ -279,6 +292,7 @@ const printInspection = (inspection: PageInspection): void => {
 
   logger.log("\n# Memory");
   printMemoryStats(inspection.memory);
+  printGeometry(inspection.geometry);
 
   logger.log("\n# Accessibility");
   if (inspection.accessibility.length === 0) logger.log("(none)");
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index ed9058813..c08c78cd7 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -270,7 +270,7 @@ withRenderOptions(
   browser
     .command("eval [expression]")
     .description(
-      "Run an expression with the Playwright `page` in scope, e.g. 'page.getByText(\"Login\").click()'. Add --profile to also record the full runtime picture.",
+      'Run Playwright code with `page` in scope, e.g. \'page.getByRole("button", { name: "Login" }).click()\'. Returns the expression\'s value, or — when it just acts — the resulting accessibility tree (so one call drives + shows the new state). Multiple statements work; reach page globals via page.evaluate(...). Add --profile to record the full runtime picture instead.',
     )
     .option(
       "--profile",
diff --git a/skills/react-doctor/SKILL.md b/skills/react-doctor/SKILL.md
index 4d3298dab..03ac2ae9b 100644
--- a/skills/react-doctor/SKILL.md
+++ b/skills/react-doctor/SKILL.md
@@ -32,7 +32,7 @@ Apply these on every React edit, before any tool runs. They shape how you write
 | "review", "before commit", "clean up", or changed files | **doctor** | static scan plus 0 to 100 score |
 | "slow", "laggy", "janky", "re-rendering"                | **perf**   | React render + CPU profilers    |
 | "broken", "crashes", "doesn't work" in the UI           | **debug**  | reproduce in a real browser     |
-| "looks off", "polish", a screenshot or pasted element   | **design** | measured UI review              |
+| "looks off", "polish", "animate", a screenshot/element  | **design** | measured UI + motion review     |
 
 doctor runs from code alone, so it is the one that fires in the background. The browser jobs (perf, debug, design) need a live page and are slower, so they run only when asked.
 
@@ -41,7 +41,7 @@ doctor runs from code alone, so it is the one that fires in the background. The
 debug, design, and perf need a real Chrome. Two ways to get one:
 
 1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to the Chrome you already have open over the Chrome DevTools Protocol, and launches a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, leave the page, React profiler injected), `eval` (run a Playwright expression with `page` in scope — returns its value), `snapshot` (accessibility tree), and `screenshot`. Add `--profile` to `eval` to record the whole runtime picture while the expression runs — console, network, performance (long animation frames with per-script attribution, LCP, CLS), an axe-core accessibility audit, a React render profile (slowest commits, hottest components, unnecessary re-renders), and a V8/DevTools CPU profile over CDP (hottest JS functions by self time). Run `eval --profile` with no expression to measure the live page as it is.
+2. **The bundled `react-doctor browser` command.** Attaches to your open Chrome over the Chrome DevTools Protocol, launching a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, inject the React profiler), `eval` (run Playwright code with `page` in scope), `snapshot` (accessibility tree), and `screenshot`. Locate from the a11y tree, then act with Playwright selectors: `eval 'page.getByRole("button", { name: "Save" }).click()'`. `eval` returns the expression's value, or — when it only acts — the resulting a11y tree, so one call both drives the page and shows the new state. Reach page globals through `page.evaluate(() => …)`. Add `--profile` to record the whole runtime picture while the expression runs: console, network, performance (LoAF with per-script attribution, LCP, CLS), an axe-core a11y audit, a React render profile, and a V8 CPU profile. With no expression it measures the live page as-is.
 
 It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
 
@@ -53,7 +53,7 @@ React Doctor ships its own Model Context Protocol server over stdio so any MCP-c
 npx react-doctor@latest mcp
 ```
 
-It exposes `doctor_scan` (the static scan), the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes a `profile: true` argument that records the whole runtime picture — console, network, performance, accessibility, a React render profile, and a literal Chrome DevTools CPU profile — in one pass while the expression runs.
+It exposes `doctor_scan`, the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes `profile: true` to record the whole runtime picture (console, network, performance, accessibility, React + CPU profiles) in one pass.
 
 ## doctor: scan and triage
 
@@ -81,11 +81,11 @@ When the user reports jank, slow interactions, dropped frames, excessive re-rend
 
 ## debug: reproduce in a real browser
 
-When the user says something is broken, crashes, throws, or behaves wrong in the running app, read [references/debug.md](references/debug.md) and follow it. It runs the [debug-agent](https://github.com/millionco/debug-agent) loop: generate hypotheses, instrument the code with runtime NDJSON logs, reproduce the bug in the live browser, and fix only once the logs prove the cause.
+When the user says something is broken, crashes, throws, or behaves wrong in the running app, read [references/debug.md](references/debug.md) and follow it. It runs an evidence-driven loop: generate hypotheses, instrument the code with runtime NDJSON logs, reproduce in the live browser, and fix only once the logs prove the cause.
 
-## design: review and improve UI
+## design: review and improve UI (incl. motion)
 
-When the user wants to build, polish, or review an interface ("looks off", "make this nicer", a pasted screenshot or element), read [references/design.md](references/design.md) and follow it. It opens the page, takes a screenshot, and reports what it can measure (contrast, line length, spacing, tap-target size), not only taste.
+When the user wants to build, polish, or review an interface ("looks off", "make this nicer", a pasted screenshot or element) — or to add or fix animation (it "feels janky", sluggish, or off) — read [references/design.md](references/design.md) and follow it. It opens the page, takes a screenshot, and reports what it can measure (contrast, line length, spacing, tap-target size), not only taste. The same file carries the motion ruleset (§15–17): when to animate, easing and duration, physics, interruptibility, GPU performance, gestures, and accessibility, measured with `browser eval --profile`.
 
 ## Configuring or explaining rules
 
diff --git a/skills/react-doctor/references/debug.md b/skills/react-doctor/references/debug.md
index b95d30e79..d551d00c2 100644
--- a/skills/react-doctor/references/debug.md
+++ b/skills/react-doctor/references/debug.md
@@ -2,7 +2,7 @@
 
 Reproduce and fix UI bugs with runtime evidence, never by guessing from code alone. Use this when the user says something is broken, crashes, throws, hangs, or behaves wrong in the running app.
 
-This is the [debug-agent](https://github.com/millionco/debug-agent) loop, built into React Doctor: hypothesize, instrument with logs, reproduce, analyze the logs, fix only once the logs prove the cause, verify, clean up.
+The loop: hypothesize, instrument with logs, reproduce, analyze the logs, fix only once the logs prove the cause, verify, clean up.
 
 ## 0. Start the logging server (before any instrumentation)
 
@@ -51,7 +51,7 @@ Wrap every debug log in `// #region debug log` and `// #endregion` so cleanup la
 
 Clear the log file (`DELETE` the file at `logPath`) before each run, then trigger the exact behavior the user described:
 
-- **Browser bugs:** drive the repro with whatever controls a live Chrome. The bundled browser core attaches to the Chrome you already have open over the Chrome DevTools Protocol, so the real session, logins, and cookies come along. If nothing debuggable is running, it launches a dedicated persistent Chrome (its own profile, headless — pass `--headed` to watch it, and `browser close` to stop it) that later commands reattach to, so the flow below works either way. To drive your real logged-in session, open Chrome with `--remote-debugging-port=9222` first and it attaches to that instead. `browser eval --profile` hands you the whole runtime picture in one pass — the console (with uncaught errors), the network waterfall with failures flagged, performance, memory (heap, DOM nodes, listeners), accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as it is, or pass the repro to record what it triggers. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, it also covers this and adds performance traces and Lighthouse.
+- **Browser bugs:** drive the repro with a live Chrome. The bundled browser attaches to your open Chrome over the Chrome DevTools Protocol (real session, logins, cookies come along), or launches a dedicated persistent one (own profile, headless — `--headed` to watch, `browser close` to stop) that later commands reattach to. To drive your logged-in session, start Chrome with `--remote-debugging-port=9222` first. `browser eval --profile` hands you the whole runtime picture in one pass — console (with uncaught errors), network with failures flagged, performance, memory, accessibility, and the React + CPU profiles — so you rarely need to instrument at all. Run it with no expression to read the page as-is, or pass the repro to record what it triggers. [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp), if present, also covers this and adds Lighthouse.
 
 ```bash
 npx react-doctor browser open http://localhost:3000           # attach + open the page
@@ -62,7 +62,7 @@ npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).cl
 npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
 ```
 
-`snapshot` and `eval` are a pair. `snapshot` lists the rendered elements by role and accessible name. `eval` runs an expression with the Playwright `page` in scope, so you act on what you saw using Playwright's own selectors: `page.locator("text=Login").click()`, `page.getByRole(...)`, `page.fill(...)`, `page.waitForSelector(...)`. For raw DOM, reach through `page.evaluate(() => …)`. No separate ref scheme to track.
+Locate from the accessibility tree, then act — cheaper and more stable than coordinates or DOM scraping. `snapshot` lists rendered elements by role and accessible name; inside `eval`, `page.locator("…").ariaSnapshot()` does the same for one subtree. `eval` runs Playwright code with the `page` in scope: `page.getByRole("button", { name: "Checkout" }).click()`, `page.getByLabel(...).fill(...)`, `page.waitForSelector(...)`. When the code only acts, `eval` returns the resulting accessibility tree (one call drives the page and shows the new state); if it triggers a page-side error (`console.error` or an uncaught throw), `eval` appends an "Errors during eval" section. Multiple statements work without an IIFE. For raw DOM, reach through `page.evaluate(() => …)` — bare `window`/`document` at the top level won't, since `eval` runs in Node.
 
 - **Backend or CLI bugs:** write and run a small repro script (Node, shell) yourself.
 - Otherwise ask the user for numbered steps, and remind them to restart any app or service whose instrumented files are bundled or cached.
@@ -73,6 +73,8 @@ Reuse the same repro pathway for every iteration.
 
 Read the NDJSON at `logPath`. Mark each hypothesis CONFIRMED, REJECTED, or INCONCLUSIVE, citing the specific log lines. If the file is empty, the repro likely did not run the instrumented path, so try again. If every hypothesis is rejected, revert the rejected code changes, generate new hypotheses from a different subsystem, and add more instrumentation.
 
+When reasoning from black-box behavior rather than logs (a driven interaction, a measured delta), the same proof bar applies: confirm the mechanism in the source before calling it a bug. An internally-consistent anomaly — the box grows by exactly the distance the page scrolled — is usually intended behavior (auto-pan, momentum, a debounce), not a defect. Synthetic input is not real input: `page.mouse.move(..., { steps })` spreads over wall-clock time, so an effect that looks like it "scales with event count" may be time-based. Read the handler, then conclude.
+
 ## 5. Fix, only with proof
 
 Apply the smallest change that addresses the proven cause. Cross-check it against the baseline rules in `SKILL.md` (derive don't duplicate, effects, single source of truth). Do not remove the instrumentation yet. Never use `setTimeout` or `sleep` as a fix.
diff --git a/skills/react-doctor/references/design.md b/skills/react-doctor/references/design.md
index 8a6e6a405..214639797 100644
--- a/skills/react-doctor/references/design.md
+++ b/skills/react-doctor/references/design.md
@@ -2,9 +2,9 @@
 
 Improve interfaces with measured evidence from the rendered page, not taste alone. Use this when the user wants to build, polish, or review a UI: "looks off", "make this nicer", or a pasted screenshot.
 
-The value here is what a screenshot and the live DOM let you measure that reading code cannot: contrast ratios, line length, the spacing scale, and tap-target size. Lead with those, then apply craft.
+The value here is what a screenshot and the live DOM let you measure that reading code cannot: contrast ratios, line length, the spacing scale, radius math, and tap-target size. Lead with those, then apply the craft eye across color, type, surfaces, icons, copy, states, and motion — squint at the composition, reject the slop defaults, polish the details that compound. The same loop covers animation: §15–17 are the motion ruleset, measured the same way.
 
-## Review against the live page
+## Capture the live page
 
 ```bash
 npx react-doctor browser open http://localhost:3000
@@ -12,41 +12,241 @@ npx react-doctor browser screenshot --out review.png   # what the user actually
 npx react-doctor browser eval --profile                # full picture incl. axe-core a11y: contrast, names, landmarks
 ```
 
-Review responsive breakpoints with `--viewport WIDTHxHEIGHT` (for example `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, or `eval`. It emulates the size for that one command via a CDP override, so it never resizes your real browser window:
+Review breakpoints with `--viewport WIDTHxHEIGHT` (e.g. `--viewport 390x844` for a phone) on `screenshot`, `snapshot`, or `eval` — it emulates the size for that one command via a CDP override, never resizing your real window. Then measure specifics from the DOM with `eval` (computed styles, bounding boxes, color values) so findings are numbers, not opinions:
 
 ```bash
-npx react-doctor browser screenshot --viewport 390x844 --out mobile.png
+npx react-doctor browser eval 'page.evaluate(() => { const r = document.querySelector("button").getBoundingClientRect(); return { w: r.width, h: r.height }; })'
 ```
 
-Look at the screenshot, then measure specifics with `eval` (computed styles, bounding boxes, color values) to get objective numbers rather than opinions:
+## 1. Measure (objective, in priority order)
 
-```bash
-npx react-doctor browser eval 'page.evaluate(() => getComputedStyle(document.querySelector("button")).fontSize)'
-```
+`browser eval --profile` runs axe-core and reports contrast, missing names, heading order, and landmarks in its Accessibility section. Lead with these; a smarter model cannot dismiss a measured number as opinion. (If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) is in your tools, its `lighthouse_audit` adds performance and best-practice findings.)
+
+1. **Contrast**: body text at least 4.5:1, large text (≥ 24px, or ≥ 18.66px bold) at least 3:1. Report the actual ratio (axe gives it).
+2. **Hit area**: interactive elements at least 44 × 44px (WCAG), or 40 × 40 with a smaller visible control. Measure the bounding box; a 20px icon needs an expanded hit area, and two hit areas must never overlap.
+3. **Line length**: body copy roughly 45 to 75 characters per line.
+4. **Spacing scale**: every gap, padding, and margin a multiple of one base (usually 4px). Flag ad-hoc values (`13px`, `7px`), and asymmetric padding where TLBR don't match without reason.
+5. **Concentric radius**: a nested rounded element must satisfy `outer = inner + padding`. Mismatched nested radii are the most common "off" tell. Skip when padding > 24px — treat those as separate surfaces with independent radii.
+6. **Optical alignment**: icon+text buttons want ~2px less padding on the icon side; triangular or asymmetric icons (play, caret, star) shift toward their visual center, not the geometric one.
 
-`browser eval --profile` runs axe-core against the live page and reports accessibility violations (color contrast, missing button or SVG names, heading order, landmarks) with the failing selectors in its Accessibility section. If [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) is in your tools, its `lighthouse_audit` adds performance and best-practice findings on top. Lead with the measured issues; a smarter model cannot dismiss them as opinion.
+## 2. Color
 
-## What to check
+The foundation can't be patched by swapping a hex later — measure contrast first, then judge intent:
 
-Measured, in priority order:
+- **Gray builds structure; color communicates.** One accent used with intention beats five used by reflex; limit the accent to roughly one role per view. Unmotivated color is noise.
+- **The palette should feel like it came from a world**, not applied to a wireframe. Name the specific quality (quiet/loud, dense/spacious, serious/playful) before picking values.
+- **Use OKLCH**, and reduce chroma as lightness approaches 0 or 100 so dark/light extremes don't muddy.
+- **Never `#000` or `#fff`** — tint every neutral toward the brand hue (chroma ~0.005–0.01). Pure gray and pure black read as untouched defaults.
+- **Never gray text on a colored background** — use a darker/lighter shade of that color or an alpha of the foreground.
+- **Keep one hue across surface levels; shift only lightness.** Different hues per level fragment the space.
+- **Text hierarchy is four levels** (primary, secondary, tertiary, muted). Only two reads as flat.
 
-1. **Contrast**: body text at least 4.5:1, large text at least 3:1. Report the actual ratio.
-2. **Tap targets**: interactive elements at least 24 × 24 px (ideally 44 × 44 on touch).
-3. **Line length**: body copy roughly 45 to 75 characters per line.
-4. **Spacing**: spacing values come from one consistent scale, not ad-hoc px.
+## 3. Typography
+
+- **Distinct levels at a glance, via size + weight + tracking — not size alone.** Headings: heavier weight, slightly tighter tracking. Body: comfortable weight and size. Labels: medium weight at small size. If a squint can't separate title from body, the hierarchy is too weak.
+- **Body text ≥ 16px** on the web (smaller invites zoom and fails older eyes); set a deliberate type scale (one ratio, e.g. 1.2–1.333) rather than ad-hoc sizes.
+- **Reject the default font reflex** (system/Inter/Roboto/Arial on a brand surface). Pick a face with intent — describe the brand as a physical object first, then choose; pair at most a display face with a body face, two or three families total.
+- **Data wants monospace + `tabular-nums`** (numbers, IDs, codes, timestamps) so columns align and values don't jitter as they change.
+- **Wrap on purpose**: `text-wrap: balance` for headings of ≤ 6 lines, `text-wrap: pretty` for body, so no orphaned last word.
+- **All-caps and large display text need positive tracking**; tight tracking belongs on large headings, not small caps.
+
+## 4. Surfaces, depth, and spacing
+
+- **Pick one depth strategy and commit**: borders-only, a single subtle shadow, layered shadows, or surface-color shifts. Mixing them is the amateur tell.
+  ```css
+  --shadow-layered:
+    0 0 0 0.5px rgba(0, 0, 0, 0.05), 0 1px 2px rgba(0, 0, 0, 0.04), 0 2px 4px rgba(0, 0, 0, 0.03),
+    0 4px 8px rgba(0, 0, 0, 0.02); /* one shadow that doubles as a hairline ring */
+  ```
+- **Elevation = small lightness jumps on the same hue** (a few percent per level): base canvas → cards → dropdowns/popovers → stacked overlays. You barely see it but you feel it.
+- **Borders are low-opacity rgba** (~0.05–0.12 alpha) that disappear until needed; a solid hex border looks harsh. Treat border strength as a scale, not on/off.
+- **Inputs are inset** — slightly darker than their surroundings — signaling "type here" without a heavy outline. Give controls their own tokens, don't reuse surface tokens.
+- **A sidebar shares the canvas background** with a divider border, not a different fill — different fills split the UI into two worlds.
+- **Spacing is multiples of one base**; vary it for rhythm (same padding everywhere is monotony) but keep it explainable. Radius is a scale too — don't mix sharp and soft at random.
+- **Nested cards are almost always wrong**; reach for an inset/alternate background or a divider before wrapping a card in a card. Use a fixed z-index scale, never arbitrary `z-[9999]`.
+
+## 5. Icons and imagery
+
+- **Icons clarify, not decorate** — if removing one loses no meaning, remove it.
+- **One icon set, one stroke weight.** Prefer precise, lighter line icons over thick default sets; align icons optically (not mathematically) with adjacent text.
+- **Icon-only buttons need an `aria-label`** (and `aria-hidden` on the icon); give a standalone icon a subtle background container for presence.
+- **Frame untinted images** with an inset `outline: 1px solid rgba(0,0,0,.1)` (white/10 in dark mode), never a tinted slate/zinc border.
+
+## 6. Copy
+
+The words are part of the design; vague copy reads as unfinished:
+
+- **Buttons are a specific verb + object** — never `OK`, `Submit`, `Yes`, `Continue`, or `Click here`. The label is the control's accessible name (`Save API key`, `Send invite`, `Delete account`).
+- **Destructive actions name the destruction and the count**: `Delete 5 items`, not `Remove` or `Delete selected`. Confirmations are serious, not cute: `Delete this project? This can't be undone.`
+- **Errors state the problem and the fix, in order** — what happened, why (if known), how to fix — in active voice, inline next to the field. No `Oops!`, no humor on frustration paths, no exclamation marks. Reframe to the value (`Enter a date as MM/DD/YYYY`), never blame the user.
+- **Empty states are onboarding**: name what's empty, say why it matters, give one clear next action.
+- **Loading copy is specific** (`Saving your draft…`), and scales to the wait: spinner alone < 2s, `Loading…` over 2s, progress + honest label over 10s.
+- **Placeholders are examples, not labels** — the label lives in a real `<label>`; use the placeholder to show format (`name@example.com`).
+- **Voice is constant; tone shifts by moment** (success brief, error empathetic, destructive grave). Labels describe (`Email address` over `Email`); link text stands alone (`View pricing plans`, never `here`). No emoji as UI chrome.
+
+## 7. Interaction states
+
+Every interactive element needs all eight, or it reads as a photograph of software:
+
+- **default, hover, focus-visible, active, disabled** — plus the data states **loading, empty, error**.
+- **Loading** uses skeletons over a centered spinner; **empty** states teach the interface; **errors** appear next to where the action happened.
+- **One primary action per view** (filled), the rest secondary/ghost — competing primaries flatten the hierarchy.
+- Use `AlertDialog` for destructive/irreversible actions, never block paste in inputs, and build custom components for native `select`/date controls (they can't be styled reliably). Reach for vetted accessible primitives rather than rebuilding keyboard/focus by hand, and don't mix two primitive systems in one surface.
+- For the transitions on these states, see Motion (§15–17).
+
+## 8. Accessibility (beyond the measured pass)
+
+§1 catches contrast, names, heading order, and landmarks via axe. The rest needs judgment, fixed in priority order — names → keyboard → focus → semantics → forms/errors:
+
+- **Accessible name on every control**: text button = its text; icon-only = `aria-label` + `aria-hidden` on the glyph; input/select/textarea = a real `<label htmlFor>` (or `aria-label`); link = descriptive text. Decorative `<svg>`/`<canvas>` gets `aria-hidden="true"` — skipping it is the modern a11y tell.
+- **Keyboard works without a mouse**: no `<div onClick>` without `role` + `tabIndex` + a key handler; every control is reachable and operable by Tab/Enter/Space.
+- **Visible focus**: never `outline: none` without a replacement ring; never a positive `tabIndex` (it breaks natural order).
+- **Don't encode meaning in color alone** — pair status/error color with an icon or text.
+- **Dialogs trap focus** while open and restore it to the trigger on close.
+
+## 9. Dark mode
+
+- **Preserve light mode's contrast ratios, don't invert** — re-pick panels, shadows, and decoration. Default to the OS `prefers-color-scheme`; add a manual toggle only when asked.
+- **Surfaces**: a card sits slightly lighter than the page (e.g. `zinc-900` on `zinc-950`) with a `ring-1 ring-white/5` and **`shadow-none`** — ambient shadows are invisible on dark.
+- **Drop large branded/colored panels** (use the same background plus a hairline divider), hide decorative gradient blobs (`dark:hidden`), and make decorative glyphs much fainter (`dark:text-white/5`).
+- **One light color for all headings** (no accent + neutral mix); keep text contrast (primary ≥ 4.5:1, secondary ≥ 3:1) and mirror borders, focus, and disabled states in both themes.
+- **Images**: ship real dark assets over CSS filters; `dark:invert dark:grayscale` on a screenshot is a stopgap; soften white-background images so they don't glow. Dark-only sites set `color-scheme: dark` on `<html>` so native scrollbars and controls follow.
+
+## 10. Responsive
+
+- **Mobile-first**: the smallest viewport is the default; `min-width` queries add complexity going up; never `max-width` as the primary direction. Breakpoints land where the **content** breaks (in `rem`), not at device sizes; three or four suffice. Never disable zoom in the viewport meta.
+- **Size up on mobile, step down at `sm:`** — body `text-base` → `sm:text-sm`, inputs `py-2.5` → `sm:py-1.5`, inline icons `size-5` → `sm:size-4`. Exceptions: an `<h1>` stays the same or gets _smaller_ on mobile; multi-column layouts collapse to a single column (don't shrink the columns into unusable slivers).
+- **Body ≥ 16px on mobile** (smaller triggers iOS input zoom). Use `clamp()` for sizes that scale continuously, media queries for discrete layout shifts.
+- **Detect input by `pointer`/`hover`, not width** — a touchscreen laptop has a fine pointer too. Never ship a hover-only interaction with no tap path; coarse pointers get ≥ 44px targets.
+- **Heights use `dvh`/`svh`**, never `width: 100vw` (it includes the scrollbar and overflows); pad for the notch with `env(safe-area-inset-*)`. Clickable text (buttons, nav, CTAs) must never wrap — shorten the label or `white-space: nowrap`.
 
-Then craft, drawing on the bundled design rules:
+## 11. Hierarchy and cognitive load
+
+- **Working memory holds ≤ 4 items.** Keep top-level nav ≤ 5, form fields ≤ 4 per group, one primary action plus one or two secondary, dashboard metrics ≤ 4, pricing tiers ≤ 3.
+- **One focal point**: something dominates by size, position, contrast, or surrounding space. When every element carries the same visual weight, nothing stands out (the "visual noise floor").
+- **Eliminate extraneous load**: cut chrome that doesn't help the task, and use progressive disclosure instead of showing everything at once.
+
+## 12. Squint at the composition
+
+Step back from the screenshot. Correct is not the same as crafted:
+
+- **Rhythm**: does density vary with purpose, or is every card the same size and gap (the flat "no one decided" look)?
+- **Proportion**: do the specific numbers say something (a 280px sidebar serves the content; 360px makes them peers)? If you can't articulate what a proportion says, it says nothing.
+- **Layout**: don't default to centering everything — left-aligned asymmetry or a confident strict grid reads as designed; a centered icon-title-subtitle stack reads as template. When cards are genuinely right, `grid-template-columns: repeat(auto-fit, minmax(280px, 1fr))` stays responsive without breakpoints.
+- **Backgrounds**: atmosphere and depth (subtle gradient, noise, layered transparency) beat a flat fill on brand surfaces.
+
+## 13. Reject the slop defaults
+
+The master gate: if someone could glance at the screen and say "AI made that" without doubt, it failed. The cliché is usually chosen by training data, not by you — and the obvious anti-cliché is the next trap:
+
+| Domain                     | Reflex palette to avoid   |
+| -------------------------- | ------------------------- |
+| Observability / monitoring | Dark + saturated blue     |
+| Fintech                    | Navy + gold               |
+| AI / agents                | Purple + cream            |
+| Crypto                     | Neon on black             |
+| Dev tool                   | Monospace + dark gradient |
+
+When the slop test fails, fix the upstream choice (the whole stance), not the surface — swapping a font or deleting a gradient won't repair a defaulted foundation. The common surface tells, each with its fix:
+
+- Default font stack on a brand surface → commit to a typeface with intent.
+- 1px gray border + a harsh uniform shadow → a layered `box-shadow` that doubles as a 1px ring (keep real borders only for dividers and input outlines).
+- `transition: all` with `ease-in-out` on UI → transition specific properties, `ease-out`, under 300ms.
+- Centered-everything, equal-weight grid → one focal point, intentional asymmetry.
+- Orphaned heading words → `text-wrap: balance`; layout-shifting numbers → `font-variant-numeric: tabular-nums`.
+
+## 14. Components
+
+Recurring patterns where the defaults go wrong (platform-native surfaces — iOS/Android/terminal — are out of scope for web React review):
+
+- **Buttons**: a secondary must never out-contrast the primary; inline form actions (`Change avatar`, `Resend code`) are secondary _and_ smaller than the submit, never the same height. Cap an app UI at two button sizes (≥ 6px apart) at compact `text-sm`; the icon-side padding equals the vertical padding so the glyph isn't stranded; a spinner replaces the label, it doesn't sit beside it.
+- **Forms**: group related fields, label every control, validate on blur or submit (not keystroke), and show each error inline under its field. Placeholders show format, not the label.
+- **Overlays**: pick by job — `Dialog` for a focused input task, a confirm/`AlertDialog` for destructive, `Sheet`/`Drawer` for side or mobile-bottom detail, `Popover` for small contextual content, hover card/tooltip for hover info. Prefer the native `<dialog>` (free focus-trap, escape, `::backdrop`); center modals (never corner-stuck), make the page behind `inert`, send first focus to the first field (not the close button), and use a 40–60% scrim. Flip popovers near the viewport edge and never clip them in an `overflow: hidden` parent; show one sheet/popover at a time, never cascade. **Prefer undo over confirm**: for reversible actions just do it and show a toast with Undo for 5–10s; keep the confirm (and type-to-confirm) only for irreversible destruction.
+- **Tables**: sentence-case headers with `nowrap`; horizontal row dividers only (no vertical or outer borders); sit them on the background, not inside a card; `tabular-nums` for numeric columns; `aria-sort` when sortable; wrap in a horizontal-scroll container on small screens.
+- **Navigation**: always ship a mobile menu; mark the active item with color or a soft/muted background — never a primary-color fill or a font-weight change between states; always show the current location.
+- **Toasts**: auto-dismiss in 3–5s (5–10s with an Undo action), `aria-live="polite"`, and never steal focus; confirm completed actions with brief feedback that auto-dismisses.
+
+## 15. Motion: when to animate, easing, duration
+
+Motion is reviewed on the same loop — most "feel" problems are decidable from rules, and `browser eval '<repro>' --profile` shows whether the animation drops frames (see [performance.md](./performance.md)). Motion must earn its place: spatial continuity, state change, feedback, explanation, or preventing a jarring jump — not "it looks cool" on something seen often.
+
+**Should it animate?**
+
+| Frequency                                       | Decision                     |
+| ----------------------------------------------- | ---------------------------- |
+| 100+×/day (keyboard shortcuts, command palette) | No animation, ever           |
+| Tens of ×/day (hover, list nav)                 | Remove or drastically reduce |
+| Occasional (modals, drawers, toasts)            | Standard animation           |
+| Rare / first-run (onboarding, celebration)      | Can add delight              |
+
+Never animate a keyboard-initiated action — it repeats hundreds of times a day, and motion makes it feel slow.
+
+**Easing**: `ease-out` for enter/exit (and the default), `ease-in-out` for on-screen movement, `ease` for hover/color, `linear` only for constant motion. Never `ease-in` on UI — it delays the moment the user is watching. Built-in curves are weak; use strong custom ones:
+
+```css
+--ease-out: cubic-bezier(0.23, 1, 0.32, 1);
+--ease-in-out: cubic-bezier(0.77, 0, 0.175, 1);
+--ease-drawer: cubic-bezier(0.32, 0.72, 0, 1);
+```
+
+**Duration**: button press 100–160ms, tooltips 125–200ms, dropdowns 150–250ms, modals/drawers 200–500ms. UI animations stay under 300ms — a 180ms dropdown feels more responsive than a 400ms one.
+
+## 16. Motion: physics, interruptibility, choreography
+
+- **Physicality**: never `scale(0)` (start from `scale(0.9–0.97)` + `opacity: 0`); scale popovers from the trigger via `transform-origin`, but keep modals centered; press feedback is `transform: scale(0.97)` on `:active` (160ms ease-out). Springs simulate physics for drag, "alive", and interruptible motion — keep bounce subtle (0.1–0.3) and out of most UI: `{ type: "spring", duration: 0.5, bounce: 0.2 }`.
+- **Interruptibility**: CSS transitions retarget mid-flight; keyframes restart from zero, so prefer transitions for rapidly-triggered elements. Drive entry without JS via `@starting-style`:
+  ```css
+  .toast {
+    opacity: 1;
+    transform: translateY(0);
+    transition:
+      opacity 400ms ease,
+      transform 400ms ease;
+    @starting-style {
+      opacity: 0;
+      transform: translateY(100%);
+    }
+  }
+  ```
+  Use **asymmetric timing**: slow where the user decides, fast where the system responds.
+- **Choreography**: stage in sequence (backdrop → panel → control), don't animate many things at once. Stagger group entrances 30–80ms apart (decorative, never blocking interaction). Soften abrupt stops with a slight overshoot; reserve anticipation/exaggeration/secondary flourishes for rare moments, and for frequent elements often animate only the exit.
+
+## 17. Motion: performance and accessibility
+
+Measure jank with `browser eval '<repro>' --profile`: a long animation frame attributed to a layout or paint property, or heavy style-recalc/layout in the timeline, is your evidence. Rendering steps: **composite** = `transform`/`opacity`; **paint** = color/border/gradient/filter; **layout** = size/position.
+
+- **Animate only `transform` and `opacity`** — GPU, skipping layout and paint. Never `width`/`height`/`top`/`left`/`margin`/`padding`.
+- **Never drive child transforms via a CSS variable on the parent** (it restyles every child); set `transform` on the element directly.
+- **Library `x`/`y`/`scale` shorthands aren't always hardware-accelerated** — animate the full `transform` string.
+- **CSS and WAAPI run off the main thread**; rAF stutters under load. Use CSS for predetermined motion, JS for interruptible; WAAPI gives JS control at compositor speed.
+- **Scroll-linked**: Scroll/View Timelines (`animation-timeline: view()`) or `IntersectionObserver`, never scroll events; pause off-screen.
+- **Layout-like moves**: FLIP — measure first and last, animate the delta via `transform`; batch all reads before writes.
+- Blur ≤ 8px and one-shot only; `will-change` only on elements about to animate (removed after); view transitions for navigation only. `clip-path: inset(...)` powers reveals/wipes; `translate` percentages are element-relative; `scale()` scales children too.
+- **Gestures**: momentum dismissal (velocity `abs(distance)/elapsedMs > ~0.11`), damping past edges, pointer capture once dragging starts, ignore extra touch points after a drag begins.
+
+**Accessibility**: reduced motion means fewer and gentler, not none — keep opacity/color, drop movement.
+
+```css
+@media (prefers-reduced-motion: reduce) {
+  .el {
+    animation: fade 0.2s ease;
+  }
+}
+@media (hover: hover) and (pointer: fine) {
+  .el:hover {
+    transform: scale(1.05);
+  } /* touch fires false hovers */
+}
+```
 
-5. **Type**: one clear hierarchy; avoid default system-only stacks for brand surfaces; consistent line-height.
-6. **Color**: a committed palette, not arbitrary hexes; check both light and dark.
-7. **Layout**: alignment, rhythm, and a deliberate focal point.
-8. **State**: hover, focus-visible, disabled, loading, and empty states exist.
+**Reviewing motion** — block on sight: `transition: all`; `scale(0)`; `ease-in` on UI enter/exit; keyframes on a rapidly-retriggered element; animating a layout or paint property; scroll-event-driven motion; a CSS-variable-on-parent recalc storm. Remediate cheapest-first: delete → reduce → fix easing → fix `transform-origin` → make interruptible → move to `transform`/`opacity` → asymmetric timing → polish → reduced-motion.
 
 ## The loop
 
-Build or fix, screenshot, re-check, compare. Confirm the measured issue you targeted actually moved (the ratio crossed the threshold, the target grew) and that the screenshot looks right before and after.
+Build or fix, screenshot, re-measure, compare. Confirm the measured issue you targeted actually moved — the ratio crossed the threshold, the target grew, the nested radii now match — and that the screenshot reads better before and after.
 
 ## Working rules
 
-- Always look at the screenshot; do not review UI from JSX alone.
+- Always look at the screenshot; never review UI from JSX alone.
 - Report measured findings with their numbers; keep taste suggestions short and clearly separate from the measured ones.
+- The correct fix is simpler than the hack: prefer flex + section padding over negative margins, `max-width` + auto margins over absolute positioning, real tokens over `calc()` workarounds.
diff --git a/skills/react-doctor/references/performance.md b/skills/react-doctor/references/performance.md
index a2c2b5bd9..09f51367a 100644
--- a/skills/react-doctor/references/performance.md
+++ b/skills/react-doctor/references/performance.md
@@ -17,15 +17,17 @@ npx react-doctor browser eval 'page.goto("http://localhost:3000")' --profile
 npx react-doctor browser eval --profile   # measures the current page, no reload
 ```
 
-It drives the same Chrome the other `browser` commands do: your real logged-in session when you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one — launched headless (pass `--headed` to watch the window), landing on a free port automatically if 9222 is taken, with later commands reattaching to it and `browser close` stopping it when you're done. The performance section ranks frames by input-blocking time — the jank signal — and drops non-blocking ones (a long but non-blocking frame, like the first frame after navigation, is not jank), leading with the most-blocking frame, then each script that ran in it (time, function name, source, and sync-layout time when present), with LCP and CLS for context. LoAF is Chromium-only; on a quiet page it reports no blocking frames, which is a result, not a failure.
+It drives the same Chrome the other `browser` commands do: your logged-in session if you started Chrome with `--remote-debugging-port=9222`, otherwise a dedicated persistent one (headless; `--headed` to watch, auto-picks a free port if 9222 is taken, `browser close` to stop). The performance section ranks frames by input-blocking time — the jank signal — dropping non-blocking ones (a long but non-blocking frame, like the first after navigation, is not jank), leading with the most-blocking frame, then each script in it (time, function, source, sync-layout time), with LCP and CLS for context. LoAF is Chromium-only; a quiet page reports no blocking frames — a result, not a failure.
 
 It also captures a Chrome DevTools timeline trace over the same window. The perf section rolls it up into the native cost a forced reflow incurs — total/longest **style-recalc**, **layout**, **hit-test**, and **paint** time — which the script-level LoAF rows can't isolate (this is where `getComputedStyle` / `getBoundingClientRect` / `elementsFromPoint` land). The raw trace is written to `react-doctor-trace.json` (override with `--out`); drop it into the DevTools **Performance** panel for the full flame chart.
 
-The `# Memory` section snapshots the page's runtime footprint after the action — JS heap used/total, DOM node count, event listeners, and document/frame counts (the CDP Performance counters). For a leak, re-run on the same page with no reload (`browser eval --profile`) and watch these climb: growing DOM nodes mean detached subtrees retained, growing listeners/heap mean leaked closures, growing documents/frames mean orphaned iframes.
+The `# Memory` section snapshots the page's runtime footprint after the action — JS heap, DOM nodes, event listeners, document/frame counts. For a leak, re-run on the same page with no reload and watch these climb: detached subtrees retained, leaked closures, orphaned iframes. It also reports page geometry — viewport size, devicePixelRatio, scroll offset, and how far the page scrolled during the action. A non-zero scroll delta means the viewport moved under you (auto-scroll, focus jump); this is native scroll only, so an app with its own camera/pan won't show its movement here — read that from the app's own state.
 
 The `# Network` section lists each request with its outcome (status or failure), and — once it has settled — its time and encoded transfer size, with a summary counting failed, slow (>500ms), and heavy (>1MB) requests. Use it to spot a blocking waterfall or an oversized bundle/asset; a cache hit or an unfinished request shows no size/time.
 
-To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included.
+To attribute interaction jank (a slow click, scroll, or keypress), pass the repro as the expression so it runs while recording: `browser open` the page, then `browser eval 'page.getByText("Next").click()' --profile`. The recording covers the action, so its frames, renders, and CPU samples are all included. When the jank is an animation or transition, the Motion sections of [design.md](./design.md) carry the GPU-only ruleset to fix it.
+
+The persistent session keeps page state between commands — scroll position, zoom, toggles, the open URL. That continuity is the point for multi-step repros, but it also means leftover state can skew a fresh measurement; reload (pass the navigation as the expression) to re-baseline whenever a number looks off.
 
 ## 3. Analyze the worst frame first
 
@@ -33,6 +35,8 @@ The output is already sorted worst-first. The script with the largest duration i
 
 > CONFIRMED: 128 ms frame, script `app.js` `drawSeries` ran 84 ms with 42 ms sync layout. The chart redraw forces layout inside the scroll handler.
 
+Measurement tells you _what_ happened, not _why_ — confirm the mechanism in the source before calling a behavior a bug. An internally-consistent anomaly (an element growing by exactly the scroll distance, a cost tracking wall-clock time) is usually intended behavior — auto-pan, momentum, a debounce — not a defect. Synthetic input is not real input: `page.mouse.move(..., { steps })` plays out over wall-clock time, so an effect that seems to "scale with event count" may be time-based; you cannot infer an input-frequency bug from `steps`.
+
 ## 4. Zoom into React renders (optional)
 
 When the worst frame's script is your own React bundle and you need per-component render counts and why each rendered, profile React directly. `browser open` injects the real DevTools profiler before the page loads, so there are no app changes, no Chrome extension, and no manual record or stop — then drive the repro with `browser eval --profile`:

From f4d735b28790cc7458f36684860ef8e63b4a8464 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 21:37:57 -0700
Subject: [PATCH 20/38] fix(browser): don't orphan launched Chrome on
 attach/close failure (bugbot)

When the fallback launch's debugger answered but the CDP handshake still
failed, connect threw while leaving a running headless Chrome and its
persisted endpoint behind, so each retry stacked another orphan on a fresh
port. launchPersistentChrome now returns the pid so connect terminates the
instance it just spawned (and forgets its endpoint) before throwing.

closeLaunchedBrowser cleared the persisted endpoint even when the attach
failed, orphaning a briefly-unreachable-but-alive Chrome it had just
forgotten. It now clears the endpoint only after a successful attach + close;
a genuinely dead endpoint is harmless because the next launch overwrites it.
---
 packages/browser/src/close-launched-browser.ts |  8 +++++---
 packages/browser/src/connect.ts                | 16 ++++++++++++----
 packages/browser/src/launch.ts                 | 12 ++++++++++--
 packages/browser/src/utils/kill-process.ts     |  7 +++++++
 4 files changed, 34 insertions(+), 9 deletions(-)
 create mode 100644 packages/browser/src/utils/kill-process.ts

diff --git a/packages/browser/src/close-launched-browser.ts b/packages/browser/src/close-launched-browser.ts
index 9f08aa63f..4efcead72 100644
--- a/packages/browser/src/close-launched-browser.ts
+++ b/packages/browser/src/close-launched-browser.ts
@@ -7,8 +7,7 @@ import { readLaunchedEndpoint } from "./utils/read-launched-endpoint.js";
 // persistent model keeps the page alive across commands), so this is the one path
 // that actually stops it — the cleanup a headless instance needs since there's no
 // window to quit. It targets ONLY our recorded endpoint, never a browser the user
-// started, so it can't kill their Chrome. Returns whether it closed anything; the
-// recorded endpoint is forgotten either way (a stale one shouldn't linger).
+// started, so it can't kill their Chrome. Returns whether it closed anything.
 export const closeLaunchedBrowser = async (): Promise<boolean> => {
   const endpoint = readLaunchedEndpoint();
   if (!endpoint) return false;
@@ -16,9 +15,12 @@ export const closeLaunchedBrowser = async (): Promise<boolean> => {
   const browser = await chromium
     .connectOverCDP(endpoint, { timeout: CONNECT_TIMEOUT_MS })
     .catch(() => null);
-  clearLaunchedEndpoint();
+  // Couldn't attach: the instance may just be briefly unreachable, so keep the
+  // endpoint rather than orphaning a still-running Chrome we've now forgotten. A
+  // genuinely dead endpoint is harmless — the next launch overwrites it.
   if (!browser) return false;
   const cdpSession = await browser.newBrowserCDPSession();
   await cdpSession.send("Browser.close").catch(() => {});
+  clearLaunchedEndpoint();
   return true;
 };
diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
index 28179133f..48223606c 100644
--- a/packages/browser/src/connect.ts
+++ b/packages/browser/src/connect.ts
@@ -4,9 +4,11 @@ import { CONNECT_TIMEOUT_MS, DEFAULT_CDP_ENDPOINT } from "./constants.js";
 import { launchPersistentChrome } from "./launch.js";
 import type { BrowserConnectOptions } from "./types.js";
 import { cdpPortFromEndpoint } from "./utils/cdp-port.js";
+import { clearLaunchedEndpoint } from "./utils/clear-launched-endpoint.js";
 import { findAvailablePort } from "./utils/find-available-port.js";
 import { isLoopbackEndpoint } from "./utils/is-loopback-endpoint.js";
 import { isPortAvailable } from "./utils/is-port-available.js";
+import { killProcess } from "./utils/kill-process.js";
 import { loadPlaywright } from "./utils/load-playwright.js";
 import { readLaunchedEndpoint } from "./utils/read-launched-endpoint.js";
 import { writeLaunchedEndpoint } from "./utils/write-launched-endpoint.js";
@@ -71,16 +73,22 @@ export const connectToBrowser = async (
   const launchEndpoint = options.cdpEndpoint
     ? options.cdpEndpoint
     : await resolveLaunchEndpoint(fallbackEndpoint);
-  const reachableEndpoint = await launchPersistentChrome(launchEndpoint, options.headless ?? true);
-  writeLaunchedEndpoint(reachableEndpoint);
+  const launched = await launchPersistentChrome(launchEndpoint, options.headless ?? true);
+  writeLaunchedEndpoint(launched.endpoint);
   try {
     return {
-      browser: await chromium.connectOverCDP(reachableEndpoint, { timeout: CONNECT_TIMEOUT_MS }),
+      browser: await chromium.connectOverCDP(launched.endpoint, { timeout: CONNECT_TIMEOUT_MS }),
       launched: true,
     };
   } catch (launchedAttachError) {
+    // The debugger answered /json/version, yet the CDP handshake still failed
+    // (usually a Chrome/playwright-core mismatch). Terminate the instance we
+    // just spawned and forget its endpoint, so a retry doesn't attach-fail
+    // against it again and stack another orphan Chrome on a fresh port.
+    if (launched.pid !== undefined) killProcess(launched.pid);
+    clearLaunchedEndpoint();
     throw new BrowserEnvironmentError(
-      `Launched Chrome at ${reachableEndpoint} but could not attach to it. Update Chrome (or playwright-core), or start Chrome yourself with --remote-debugging-port and pass --cdp.`,
+      `Launched Chrome at ${launched.endpoint} but could not attach to it. Update Chrome (or playwright-core), or start Chrome yourself with --remote-debugging-port and pass --cdp.`,
       { cause: launchedAttachError },
     );
   }
diff --git a/packages/browser/src/launch.ts b/packages/browser/src/launch.ts
index 39ac1ad17..42861cc76 100644
--- a/packages/browser/src/launch.ts
+++ b/packages/browser/src/launch.ts
@@ -78,6 +78,14 @@ const waitForCdpEndpoint = async (endpoint: string): Promise<string> => {
   );
 };
 
+export interface LaunchedChrome {
+  // The loopback form that actually responded, to attach to.
+  endpoint: string;
+  // The spawned process, so the caller can terminate it if the CDP handshake
+  // still fails after the debugger came up — otherwise it leaks as an orphan.
+  pid: number | undefined;
+}
+
 // Detached and unref'd on success so the browser outlives this process and the
 // next `browser` command reattaches over CDP — the persistent model Chrome
 // DevTools MCP uses to keep state across calls. Headless by default (an agent
@@ -85,7 +93,7 @@ const waitForCdpEndpoint = async (endpoint: string): Promise<string> => {
 export const launchPersistentChrome = async (
   endpoint: string,
   headless: boolean,
-): Promise<string> => {
+): Promise<LaunchedChrome> => {
   const executable = resolveChromeExecutable();
   const args = [
     `--remote-debugging-port=${cdpPortFromEndpoint(endpoint)}`,
@@ -105,5 +113,5 @@ export const launchPersistentChrome = async (
     throw error;
   });
   child.unref();
-  return reachableEndpoint;
+  return { endpoint: reachableEndpoint, pid: child.pid };
 };
diff --git a/packages/browser/src/utils/kill-process.ts b/packages/browser/src/utils/kill-process.ts
new file mode 100644
index 000000000..e98708716
--- /dev/null
+++ b/packages/browser/src/utils/kill-process.ts
@@ -0,0 +1,7 @@
+// Best-effort terminate a process by pid: ignore ESRCH (already gone) and EPERM
+// (not ours), since this only ever cleans up a Chrome we just spawned.
+export const killProcess = (pid: number): void => {
+  try {
+    process.kill(pid);
+  } catch {}
+};

From f13c069542f12d2cd4e0391f40ca744a9876f9c5 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 21:40:44 -0700
Subject: [PATCH 21/38] fix(browser): forget a dead launched endpoint after
 falling back (bugbot)

A persisted launched-Chrome endpoint (often a non-default port) was tried
first on every attach. Once that instance died, each command paid the full
5s CDP timeout against the dead port before falling back to the default. Now
when the fallback to the default Chrome succeeds, the dead launched endpoint
is cleared, so later commands skip it instead of timing out every time.
---
 packages/browser/src/connect.ts | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
index 48223606c..b44e27402 100644
--- a/packages/browser/src/connect.ts
+++ b/packages/browser/src/connect.ts
@@ -45,16 +45,26 @@ export const connectToBrowser = async (
   // Without an explicit --cdp, prefer the instance we previously launched (which
   // may be on a non-default port) before the well-known default.
   const launchedEndpoint = readLaunchedEndpoint();
+  const preferredLaunchedEndpoint =
+    !options.cdpEndpoint && launchedEndpoint && launchedEndpoint !== DEFAULT_CDP_ENDPOINT
+      ? launchedEndpoint
+      : null;
   const attachCandidates = options.cdpEndpoint
     ? [options.cdpEndpoint]
-    : launchedEndpoint && launchedEndpoint !== DEFAULT_CDP_ENDPOINT
-      ? [launchedEndpoint, DEFAULT_CDP_ENDPOINT]
+    : preferredLaunchedEndpoint
+      ? [preferredLaunchedEndpoint, DEFAULT_CDP_ENDPOINT]
       : [DEFAULT_CDP_ENDPOINT];
 
   let lastAttachError: unknown;
   for (const candidate of attachCandidates) {
     try {
       const browser = await chromium.connectOverCDP(candidate, { timeout: CONNECT_TIMEOUT_MS });
+      // Reached the default fallback because the recorded launched endpoint
+      // didn't answer: that instance is gone, so forget it — otherwise every
+      // later command pays the full attach timeout against a dead port first.
+      if (preferredLaunchedEndpoint && candidate !== preferredLaunchedEndpoint) {
+        clearLaunchedEndpoint();
+      }
       return { browser, launched: false };
     } catch (attachError) {
       lastAttachError = attachError;

From 909957baf40fad3fed0d175294158627f9cddc3e Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 21:54:46 -0700
Subject: [PATCH 22/38] fix(react): force-inline @react-doctor/browser into the
 CLI and MCP bundles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The published react-doctor CLI bundles the private @react-doctor/browser
package, but the bundler only inlined it implicitly. When it was reached
transitively (dist/mcp.js → @react-doctor/mcp → @react-doctor/browser) the
bundler could externalize it instead, emitting a phantom import of a private
package the published tarball never declares (caught by check:published-deps,
issue #629). Declare it in each pack's alwaysBundle so it is always inlined or
the build fails loudly, never silently externalized.
---
 packages/react-doctor/vite.config.ts | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/packages/react-doctor/vite.config.ts b/packages/react-doctor/vite.config.ts
index 1488cc7e3..33655c215 100644
--- a/packages/react-doctor/vite.config.ts
+++ b/packages/react-doctor/vite.config.ts
@@ -87,8 +87,10 @@ export default defineConfig({
         // require so the runtime copy must be on disk), agent-install
         // (its jsonc-parser/yaml/toml transitives ship as UMD that
         // doesn't bundle cleanly), and the typescript compiler all
-        // stay external.
-        alwaysBundle: ["commander", "ora"],
+        // stay external. @react-doctor/browser is private, so it MUST
+        // inline — declaring it here makes that explicit and fails the
+        // build loudly rather than emitting a phantom external import.
+        alwaysBundle: ["@react-doctor/browser", "commander", "ora"],
         neverBundle: [
           // Sentry bundles its own OpenTelemetry instrumentation chain
           // and resolves native/optional deps via require() at runtime;
@@ -200,6 +202,10 @@ export default defineConfig({
       // and oxlint/oxc/deslop resolve their native bindings at runtime.
       entry: { mcp: "./src/mcp.ts" },
       deps: {
+        // @react-doctor/browser is reached transitively through @react-doctor/mcp
+        // here; it's private, so force-inline it (the same reason the CLI pack
+        // does) instead of letting it slip out as a phantom external import.
+        alwaysBundle: ["@react-doctor/browser"],
         neverBundle: [
           "@sentry/node",
           "playwright-core",

From 8461ca8b3bbfc2511797e94067996e3369ea9ab2 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 22:10:29 -0700
Subject: [PATCH 23/38] fix(browser): keep eval/profile diagnostics when the
 driven action fails (bugbot)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three gaps in the new self-reporting eval, all surfacing precisely when an
action breaks — when the diagnostics matter most:

- evaluateOrSnapshot dropped the collected page errors when the expression
  threw (the "Errors during eval" section was skipped on the throw path). It
  now appends them to the thrown message so the real cause rides along.
- inspect (eval --profile / browser_eval profile) threw the whole recording
  away when the action failed. It now captures the failure as `evalError` and
  still returns the console/network/CPU/timeline/React picture as its context.
- console/network capture started before settle() while the profilers started
  after, so they mixed pre-action load traffic into the window. Capture now
  starts after settle, covering the same window as the profilers.
---
 .changeset/browser-eval-errors-geometry.md    |  2 +-
 packages/browser/src/session.ts               | 36 ++++++++++++++++---
 packages/browser/src/types.ts                 |  4 +++
 .../react-doctor/src/cli/commands/browser.ts  |  6 ++++
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/.changeset/browser-eval-errors-geometry.md b/.changeset/browser-eval-errors-geometry.md
index 74975fbc5..f0fcd14d9 100644
--- a/.changeset/browser-eval-errors-geometry.md
+++ b/.changeset/browser-eval-errors-geometry.md
@@ -2,4 +2,4 @@
 "react-doctor": patch
 ---
 
-Make `browser eval` and `browser eval --profile` self-reporting about what an action did to the page. A driven action that triggers a page-side error (a `console.error` or an uncaught throw) now appends an "Errors during eval" section instead of failing silently, so a broken interaction surfaces without hand-wiring a console hook. `--profile` (and the `browser_profile` MCP tool) now reports page geometry alongside memory — viewport size, devicePixelRatio, scroll offset, and how far the page scrolled while the action ran — so "did the element move, or did the page scroll under me?" is answerable from the output. Page scroll delta only prints when the viewport actually moved.
+Make `browser eval` and `browser eval --profile` self-reporting about what an action did to the page. A driven action that triggers a page-side error (a `console.error` or an uncaught throw) now appends an "Errors during eval" section instead of failing silently, so a broken interaction surfaces without hand-wiring a console hook — including when the action itself throws (a missing locator, a timeout), where those page errors are appended to the thrown message rather than dropped. `--profile` no longer throws the whole recording away when the action fails: it returns the captured picture (console, network, CPU, timeline, React) with the failure as an `evalError`, since that picture is the failure's context. `--profile` (and the `browser_profile` MCP tool) now reports page geometry alongside memory — viewport size, devicePixelRatio, scroll offset, and how far the page scrolled while the action ran — so "did the element move, or did the page scroll under me?" is answerable from the output. Page scroll delta only prints when the viewport actually moved. Console and network capture now starts after the page settles, so the recording covers the driven action's window rather than pre-action load traffic.
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index a02225ef5..c651aa21e 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -180,15 +180,26 @@ export class BrowserSession {
     try {
       const result = await this.evaluate(expression);
       const output = result === undefined ? await this.snapshot() : formatEvalValue(result);
-      // HACK: one event-loop turn lets page-side console/pageerror events queued
-      // during the action drain (CDP delivers them async) before we read them.
-      await new Promise((resolve) => setTimeout(resolve, 0));
+      await this.drainPageEvents();
       return appendEvalErrors(output, consoleEntries);
+    } catch (error) {
+      // A throwing action (a missing locator, a timeout) is exactly when the page
+      // usually logged the real cause (a React error boundary, a failed fetch), so
+      // append those page errors to the thrown message instead of dropping them.
+      await this.drainPageEvents();
+      if (error instanceof Error) error.message = appendEvalErrors(error.message, consoleEntries);
+      throw error;
     } finally {
       detach();
     }
   }
 
+  // HACK: one event-loop turn lets page-side console/pageerror events queued
+  // during an action drain (CDP delivers them async) before we read them.
+  private drainPageEvents(): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, 0));
+  }
+
   // Wait for the page to stop changing before we read it: in-flight requests
   // drain, then web fonts finish loading. Without this the design job
   // screenshots a half-rendered frame (lazy images, fade-in, fallback fonts).
@@ -431,8 +442,12 @@ export class BrowserSession {
     const detachers: Array<() => void> = [];
     let stopTimelineTrace: (() => Promise<TraceEventRecord[]>) | null = null;
     try {
-      detachers.push(this.collectConsole(consoleEntries), this.collectNetwork(networkByRequest));
+      // Settle the current page BEFORE attaching listeners and starting the
+      // recorders, so console/network cover the same window as the CPU/timeline/
+      // React profilers instead of also capturing pre-action load/idle traffic.
+      // An expression that navigates runs after this, so its load is still seen.
       await this.settle();
+      detachers.push(this.collectConsole(consoleEntries), this.collectNetwork(networkByRequest));
       await cdpSession.send("Performance.enable").catch(() => {});
       await cdpSession.send("Profiler.enable");
       await cdpSession.send("Profiler.setSamplingInterval", {
@@ -449,12 +464,22 @@ export class BrowserSession {
 
       const scrollBefore = await this.readScroll();
       let result: unknown = null;
+      let evalError: string | null = null;
       let vitals = emptyVitals();
       let reactExport: ReactProfilerDataExport | null = null;
       let traceEvents: TraceEventRecord[] = [];
       let cpuProfile: CdpCpuProfile | null = null;
       try {
-        if (expression) result = (await this.evaluate(expression)) ?? null;
+        if (expression) {
+          // A failing action is when the recorded picture (console, CPU, React,
+          // timeline) is most useful, so capture the failure and still return the
+          // inspection rather than throwing it all away.
+          result =
+            (await this.evaluate(expression).catch((error: unknown) => {
+              evalError = error instanceof Error ? error.message : String(error);
+              return null;
+            })) ?? null;
+        }
         // The perf observe window doubles as the recording window: it runs after
         // the driven action so post-action jank, React commits (concurrent
         // renders land async), and CPU samples all land before we stop.
@@ -495,6 +520,7 @@ export class BrowserSession {
 
       return {
         result,
+        evalError,
         console: consoleEntries,
         network,
         performance: { ...vitals, timeline: analyzeTimelineTrace(traceEvents) },
diff --git a/packages/browser/src/types.ts b/packages/browser/src/types.ts
index 4405b482f..987685031 100644
--- a/packages/browser/src/types.ts
+++ b/packages/browser/src/types.ts
@@ -130,6 +130,10 @@ export interface InspectOptions {
 export interface PageInspection {
   // The `expression`'s return value, or null when none was driven or it had none.
   result: unknown;
+  // The message of the error the driven expression threw, or null when it
+  // succeeded. A failing action still returns the recorded picture (console, CPU,
+  // React, …) rather than throwing it away — that picture is the failure's context.
+  evalError: string | null;
   console: ConsoleMessageEntry[];
   network: NetworkRequestEntry[];
   performance: PerformanceReport;
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 107d814a3..01b6d8b31 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -279,6 +279,12 @@ const printInspection = (inspection: PageInspection): void => {
     logger.log("");
   }
 
+  if (inspection.evalError !== null) {
+    logger.log("# Eval error (the recording below is the failure's context)");
+    logger.log(inspection.evalError);
+    logger.log("");
+  }
+
   logger.log("# Console");
   if (inspection.console.length === 0) logger.log("(none)");
   else printConsoleMessages(inspection.console);

From 815d4d40d8d4a7892d804f66725a804ba17074c3 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 22:26:51 -0700
Subject: [PATCH 24/38] fix(browser): scope eval --profile LoAF/CLS to the
 post-action window (bugbot)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The first profile after a load replayed buffered long-animation-frame and
layout-shift entries from initial page load, because the per-page watermark
started at -1 — so LoAF rows and CLS mixed load jank into a window that console,
network, CPU, and timeline only cover post-action.

Replace the persisted watermark with a per-call `sinceMs` recording-start
timestamp captured right before the driven action: buffered observers now skip
every entry at or below it, which both excludes pre-action load jank and dedupes
frames an earlier no-reload run already counted — the same guarantee the
watermark gave, minus the global page state.
---
 packages/browser/src/perf-observer.ts | 47 ++++++++-------------------
 packages/browser/src/session.ts       | 20 ++++++++----
 2 files changed, 28 insertions(+), 39 deletions(-)

diff --git a/packages/browser/src/perf-observer.ts b/packages/browser/src/perf-observer.ts
index e25849516..4409ace2f 100644
--- a/packages/browser/src/perf-observer.ts
+++ b/packages/browser/src/perf-observer.ts
@@ -2,17 +2,19 @@ import type { PageVitals } from "./types.js";
 
 // Runs in the page (via evaluate) and resolves after `windowMs`. Installs fresh
 // LoAF / LCP / CLS observers with `buffered: true`, so frames already in the
-// performance timeline (a load just navigated to, or an interaction a previous
-// command drove) are replayed immediately, while the window catches anything
-// that fires next. A reload resets the timeline, so a fresh-load measurement
-// always starts clean. For repeated no-reload measurements on the persistent
-// page, `buffered: true` would otherwise replay — and re-count — every frame
-// from earlier runs, inflating LoAF rows and CLS. So we keep a per-page
-// watermark of the latest entry `startTime` already counted (per type) and skip
-// anything at or below it: the first run after an interaction still captures its
-// frames, a second run sees only what fired since. LoAF fields are not in
+// timeline when the observer attaches (an interaction the caller drove just
+// before measuring) are replayed immediately, while the window catches anything
+// that fires next. `sinceMs` is the recording-start `performance.now()` the
+// caller captured right before the driven action: every entry at or below it is
+// skipped, so the report only counts frames from this window — never initial
+// page-load jank still sitting in the buffer, and never frames an earlier
+// no-reload run on the persistent page already reported. LoAF fields are not in
 // lib.dom, so the casts here are unavoidable.
-export const collectPerformanceReport = (windowMs: number): Promise<PageVitals> => {
+export const collectPerformanceReport = (options: {
+  windowMs: number;
+  sinceMs: number;
+}): Promise<PageVitals> => {
+  const { windowMs, sinceMs } = options;
   interface ScriptTiming {
     sourceURL?: string;
     sourceFunctionName?: string;
@@ -36,12 +38,6 @@ export const collectPerformanceReport = (windowMs: number): Promise<PageVitals>
     cumulativeLayoutShift: number;
   }
 
-  interface CountedEntryWatermark {
-    longAnimationFrame: number;
-    layoutShift: number;
-  }
-  const WATERMARK_KEY = "__REACT_DOCTOR_PERF_WATERMARK__";
-
   return new Promise<PageVitals>((resolve) => {
     const report: MutableReport = {
       longAnimationFrames: [],
@@ -49,15 +45,6 @@ export const collectPerformanceReport = (windowMs: number): Promise<PageVitals>
       cumulativeLayoutShift: 0,
     };
 
-    // Persisted on the page so it survives across no-reload measurements (and is
-    // wiped by a navigation, which is exactly when we want a clean slate).
-    const windowScope = window as unknown as Record<string, CountedEntryWatermark | undefined>;
-    const previousWatermark: CountedEntryWatermark = windowScope[WATERMARK_KEY] ?? {
-      longAnimationFrame: -1,
-      layoutShift: -1,
-    };
-    const nextWatermark: CountedEntryWatermark = { ...previousWatermark };
-
     const observers: PerformanceObserver[] = [];
     const observe = (type: string, onEntry: (entry: PerformanceEntry) => void): void => {
       try {
@@ -70,11 +57,7 @@ export const collectPerformanceReport = (windowMs: number): Promise<PageVitals>
     };
 
     observe("long-animation-frame", (entry) => {
-      if (entry.startTime <= previousWatermark.longAnimationFrame) return;
-      nextWatermark.longAnimationFrame = Math.max(
-        nextWatermark.longAnimationFrame,
-        entry.startTime,
-      );
+      if (entry.startTime <= sinceMs) return;
       const longAnimationFrame = entry as unknown as LongAnimationFrameEntry;
       report.longAnimationFrames.push({
         startTimeMs: Math.round(longAnimationFrame.startTime),
@@ -95,15 +78,13 @@ export const collectPerformanceReport = (windowMs: number): Promise<PageVitals>
     });
 
     observe("layout-shift", (entry) => {
-      if (entry.startTime <= previousWatermark.layoutShift) return;
-      nextWatermark.layoutShift = Math.max(nextWatermark.layoutShift, entry.startTime);
+      if (entry.startTime <= sinceMs) return;
       const layoutShift = entry as unknown as LayoutShiftEntry;
       if (!layoutShift.hadRecentInput) report.cumulativeLayoutShift += layoutShift.value;
     });
 
     setTimeout(() => {
       for (const observer of observers) observer.disconnect();
-      windowScope[WATERMARK_KEY] = nextWatermark;
       resolve({
         // Blocking duration — not total duration — is the jank signal: a long
         // frame that blocks nothing (an idle/backgrounded render, the first
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index c651aa21e..33124352a 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -324,11 +324,15 @@ export class BrowserSession {
     return [...entriesByRequest.values()];
   }
 
-  // A per-page watermark inside collectPerformanceReport keeps a repeated
-  // no-reload measurement from re-counting frames an earlier command already
-  // reported on the same persistent page.
-  private measureCurrentPerformance(): Promise<PageVitals> {
-    return this.page.evaluate(collectPerformanceReport, PERFORMANCE_OBSERVE_WINDOW_MS);
+  // `sinceMs` is the recording-start timestamp captured right before the driven
+  // action, so collectPerformanceReport's buffered observers skip pre-action
+  // load jank and frames an earlier no-reload run on the persistent page already
+  // counted, leaving only this window's frames.
+  private measureCurrentPerformance(sinceMs: number): Promise<PageVitals> {
+    return this.page.evaluate(collectPerformanceReport, {
+      windowMs: PERFORMANCE_OBSERVE_WINDOW_MS,
+      sinceMs,
+    });
   }
 
   // The page's native scroll offset, read before the action so `captureGeometry`
@@ -463,6 +467,10 @@ export class BrowserSession {
       });
 
       const scrollBefore = await this.readScroll();
+      // Recording start: every LoAF/CLS entry at or before this is pre-action
+      // (load jank, idle frames) and gets dropped, so the perf report covers the
+      // same window as the CPU/timeline/React recorders started just above.
+      const recordingStartMs = await this.page.evaluate(() => performance.now()).catch(() => 0);
       let result: unknown = null;
       let evalError: string | null = null;
       let vitals = emptyVitals();
@@ -483,7 +491,7 @@ export class BrowserSession {
         // The perf observe window doubles as the recording window: it runs after
         // the driven action so post-action jank, React commits (concurrent
         // renders land async), and CPU samples all land before we stop.
-        vitals = await this.measureCurrentPerformance();
+        vitals = await this.measureCurrentPerformance(recordingStartMs);
       } finally {
         // Stop the recorders BEFORE reading the React profile, and always (even
         // if the expression threw — a left-running recording on the persistent

From e5718eff71ab156dc63066127e7df7e0aee10dfd Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 22:59:53 -0700
Subject: [PATCH 25/38] feat(browser): add eval --codegen (emit Playwright
 tests) and --video (record .webm)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two artifacts off the same verified `browser eval` run, since the expression
already runs as Playwright code with `page` in Node scope:

- --codegen drives the expression, then writes a runnable Playwright spec that
  navigates to the session's current URL, replays the action, and asserts no
  console/page errors fired (the signal eval already reports) — a verified
  interaction becomes a regression test in one step. A failing action throws
  instead of writing a green-looking test.
- --video records a .webm of the page while the expression runs, for playback,
  in any mode (plain / --profile / --codegen). Uses Playwright's imperative
  screencast (1.59+) — the only video API that records a CDP-attached page; a
  missing ffmpeg surfaces an actionable `npx playwright install ffmpeg` hint.

Both are mirrored on the browser_eval MCP tool (codegen, video args). Extracts
isEvalExpression from compile-eval for reuse; bumps playwright-core to ^1.59.0.
---
 .changeset/browser-eval-codegen.md            |  5 ++
 .changeset/browser-eval-video.md              |  5 ++
 packages/browser/package.json                 |  2 +-
 packages/browser/src/constants.ts             |  6 ++
 packages/browser/src/index.ts                 |  6 +-
 packages/browser/src/session.ts               | 53 +++++++++++++++-
 packages/browser/src/utils/compile-eval.ts    | 25 +++-----
 .../src/utils/generate-playwright-test.ts     | 48 +++++++++++++++
 .../browser/src/utils/is-eval-expression.ts   | 12 ++++
 .../tests/generate-playwright-test.test.ts    | 31 ++++++++++
 packages/mcp/src/tools/browser.ts             | 58 +++++++++++++++---
 .../react-doctor/src/cli/commands/browser.ts  | 60 ++++++++++++++++++-
 packages/react-doctor/src/cli/index.ts        | 10 +++-
 pnpm-lock.yaml                                |  2 +-
 skills/react-doctor/SKILL.md                  |  4 +-
 skills/react-doctor/references/debug.md       |  2 +
 16 files changed, 292 insertions(+), 37 deletions(-)
 create mode 100644 .changeset/browser-eval-codegen.md
 create mode 100644 .changeset/browser-eval-video.md
 create mode 100644 packages/browser/src/utils/generate-playwright-test.ts
 create mode 100644 packages/browser/src/utils/is-eval-expression.ts
 create mode 100644 packages/browser/tests/generate-playwright-test.test.ts

diff --git a/.changeset/browser-eval-codegen.md b/.changeset/browser-eval-codegen.md
new file mode 100644
index 000000000..ec68e24ed
--- /dev/null
+++ b/.changeset/browser-eval-codegen.md
@@ -0,0 +1,5 @@
+---
+"react-doctor": patch
+---
+
+Add `--codegen` to `browser eval` (and `codegen: true` to the `browser_eval` MCP tool): drive a Playwright expression as usual, then write it as a runnable Playwright regression test. The generated spec navigates to the page the session is on, replays the action, and asserts no console or page errors fired — the same signal `eval` already reports — so a verified interaction becomes a guarded test in one step. Writes to `--out` (default `react-doctor.spec.ts`); a failing action throws instead of writing a green-looking test.
diff --git a/.changeset/browser-eval-video.md b/.changeset/browser-eval-video.md
new file mode 100644
index 000000000..5e07b0589
--- /dev/null
+++ b/.changeset/browser-eval-video.md
@@ -0,0 +1,5 @@
+---
+"react-doctor": patch
+---
+
+Add `--video [path]` to `browser eval` (and `video: "<path>.webm"` to the `browser_eval` MCP tool): record a `.webm` screen recording of the page while the expression runs, for playback. It works in any mode — plain `eval`, `--profile`, and `--codegen` — so a profiled run or a generated regression test can ship with a video you watch to verify what happened, and the saved path is reported in the summary (returned as `video` from the MCP tool). Uses Playwright's imperative screencast (1.59+), the only video API that records a CDP-attached page; encoding needs Playwright's bundled ffmpeg, so a missing one surfaces an actionable `npx playwright install ffmpeg` hint. Bumps the `playwright-core` floor to `^1.59.0` for the screencast API.
diff --git a/packages/browser/package.json b/packages/browser/package.json
index f2f9fa373..cb434562d 100644
--- a/packages/browser/package.json
+++ b/packages/browser/package.json
@@ -19,7 +19,7 @@
   },
   "dependencies": {
     "axe-core": "^4.10.2",
-    "playwright-core": "^1.49.1"
+    "playwright-core": "^1.59.0"
   },
   "devDependencies": {
     "@types/node": "^25.6.0",
diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 7f9059036..818a65449 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -83,6 +83,12 @@ export const TIMELINE_TRACE_CATEGORIES = [
 // Default file the raw timeline trace is written to (in the working directory).
 export const DEFAULT_TRACE_FILENAME = "react-doctor-trace.json";
 
+// Default file `eval --codegen` writes the generated Playwright spec to.
+export const DEFAULT_CODEGEN_FILENAME = "react-doctor.spec.ts";
+
+// Default file `eval --video` writes the screen recording (.webm) to.
+export const DEFAULT_VIDEO_FILENAME = "react-doctor.webm";
+
 // Functions returned inline by a CPU profile analysis, ranked by self time.
 export const MAX_PROFILE_FUNCTIONS = 20;
 
diff --git a/packages/browser/src/index.ts b/packages/browser/src/index.ts
index 6a97fe3b5..a8e27aff3 100644
--- a/packages/browser/src/index.ts
+++ b/packages/browser/src/index.ts
@@ -5,5 +5,9 @@ export type { BrowserConnection } from "./connect.js";
 export { closeLaunchedBrowser } from "./close-launched-browser.js";
 export { parseViewport } from "./parse-viewport.js";
 export { formatEvalValue } from "./utils/format-eval-value.js";
-export { DEFAULT_TRACE_FILENAME } from "./constants.js";
+export {
+  DEFAULT_CODEGEN_FILENAME,
+  DEFAULT_TRACE_FILENAME,
+  DEFAULT_VIDEO_FILENAME,
+} from "./constants.js";
 export type * from "./types.js";
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 33124352a..3f93b40a6 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -1,6 +1,8 @@
-import { readFile } from "node:fs/promises";
+import { readFile, writeFile } from "node:fs/promises";
+import { resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 import type { Browser, CDPSession, ConsoleMessage, Page, Request, Response } from "playwright-core";
+import { BrowserEnvironmentError } from "./browser-environment-error.js";
 import { connectToBrowser, type BrowserConnection } from "./connect.js";
 import { analyzeCpuProfile, type CdpCpuProfile } from "./analyze-cpu-profile.js";
 import { analyzeTimelineTrace } from "./analyze-timeline-trace.js";
@@ -18,6 +20,7 @@ import { appendEvalErrors } from "./utils/append-eval-errors.js";
 import { compileEval } from "./utils/compile-eval.js";
 import { enrichEvalError } from "./utils/enrich-eval-error.js";
 import { formatEvalValue } from "./utils/format-eval-value.js";
+import { generatePlaywrightTest } from "./utils/generate-playwright-test.js";
 import { writeTraceFile } from "./utils/write-trace-file.js";
 import { analyzeReactProfile } from "./react-profiler/analyze-profile.js";
 import type { ReactProfilerDataExport } from "./react-profiler/types/profiling-export.js";
@@ -197,7 +200,53 @@ export class BrowserSession {
   // HACK: one event-loop turn lets page-side console/pageerror events queued
   // during an action drain (CDP delivers them async) before we read them.
   private drainPageEvents(): Promise<void> {
-    return new Promise((resolve) => setTimeout(resolve, 0));
+    return new Promise((resolveEvents) => setTimeout(resolveEvents, 0));
+  }
+
+  // Record a .webm of the page while `action` runs, returning its absolute path
+  // alongside the result. Uses Playwright's imperative screencast (1.59+) — the
+  // only video API that works on a CDP-attached page, since the declarative
+  // `recordVideo` context option can't record a context we merely connected to.
+  // Stops in a finally so the file is flushed even when the action throws (you
+  // still get the footage of the failing run). Encoding needs Playwright's
+  // bundled ffmpeg; a missing one surfaces as an actionable environment error.
+  async withVideo<T>(
+    videoPath: string,
+    action: () => Promise<T>,
+  ): Promise<{ result: T; video: string }> {
+    const video = resolve(videoPath);
+    try {
+      await this.page.screencast.start({ path: video });
+    } catch (error) {
+      throw new BrowserEnvironmentError(
+        "Could not start video recording — Playwright needs its bundled ffmpeg to encode the .webm. Install it with `npx playwright install ffmpeg`.",
+        { cause: error },
+      );
+    }
+    try {
+      return { result: await action(), video };
+    } finally {
+      await this.page.screencast.stop().catch(() => {});
+    }
+  }
+
+  // Persist a verified `eval` action as a runnable Playwright regression test:
+  // capture the page's current URL (so the test recreates the starting point),
+  // run the expression through the same drive path (which surfaces — and on a
+  // hard failure throws — page errors, so a broken action never writes a green-
+  // looking test), then emit a spec pinned to that URL + action. Returns the
+  // generated source, the drive output, and the absolute path written.
+  async codegen(options: { expression: string; outPath: string }): Promise<{
+    path: string;
+    source: string;
+    output: string;
+  }> {
+    const url = this.page.url();
+    const output = await this.evaluateOrSnapshot(options.expression);
+    const source = generatePlaywrightTest({ url, expression: options.expression });
+    const path = resolve(options.outPath);
+    await writeFile(path, source);
+    return { path, source, output };
   }
 
   // Wait for the page to stop changing before we read it: in-flight requests
diff --git a/packages/browser/src/utils/compile-eval.ts b/packages/browser/src/utils/compile-eval.ts
index 89da38933..92adf67f3 100644
--- a/packages/browser/src/utils/compile-eval.ts
+++ b/packages/browser/src/utils/compile-eval.ts
@@ -1,26 +1,17 @@
 import type { Page } from "playwright-core";
+import { isEvalExpression } from "./is-eval-expression.js";
 
 export interface CompiledEval<T> {
   (page: Page): Promise<T>;
 }
 
 // `eval` source runs in Node with the Playwright `page` in scope. A bare
-// expression — `page.getByText("Login").click()` — is the common case, so
-// compile that first to keep its return value. Multi-statement source, or a
-// body that uses `return`, isn't a valid expression and throws a SyntaxError at
-// construction; recompile it as a function body so both shapes work without the
-// caller hand-wrapping an async IIFE.
+// expression is the common case, so compile that first to keep its return value;
+// multi-statement source is recompiled as a function body, so both shapes work
+// without the caller hand-wrapping an async IIFE.
 export const compileEval = <T>(expression: string): CompiledEval<T> => {
-  try {
-    return new Function(
-      "page",
-      `"use strict"; return (async () => (${expression}))();`,
-    ) as CompiledEval<T>;
-  } catch (error) {
-    if (!(error instanceof SyntaxError)) throw error;
-    return new Function(
-      "page",
-      `"use strict"; return (async () => { ${expression} })();`,
-    ) as CompiledEval<T>;
-  }
+  const body = isEvalExpression(expression)
+    ? `"use strict"; return (async () => (${expression}))();`
+    : `"use strict"; return (async () => { ${expression} })();`;
+  return new Function("page", body) as CompiledEval<T>;
 };
diff --git a/packages/browser/src/utils/generate-playwright-test.ts b/packages/browser/src/utils/generate-playwright-test.ts
new file mode 100644
index 000000000..b8b56088e
--- /dev/null
+++ b/packages/browser/src/utils/generate-playwright-test.ts
@@ -0,0 +1,48 @@
+import { isEvalExpression } from "./is-eval-expression.js";
+
+export interface PlaywrightTestInput {
+  // The page the session is on when codegen runs — the test navigates here first.
+  url: string;
+  // The `eval` expression, verified by running it, that becomes the test's action.
+  expression: string;
+  // Test title; derived from the URL path when omitted.
+  name?: string;
+}
+
+// Turn a verified `browser eval` expression into a runnable Playwright spec: the
+// session's current URL becomes the navigation, the expression becomes the
+// action, and the page is asserted to fire no console / page errors — the same
+// signal `eval` already reports, now a regression guard.
+export const generatePlaywrightTest = ({ url, expression, name }: PlaywrightTestInput): string =>
+  `import { expect, test } from "@playwright/test";
+
+test(${JSON.stringify(name ?? deriveTestName(url))}, async ({ page }) => {
+  const pageErrors: string[] = [];
+  page.on("pageerror", (error) => pageErrors.push(error.message));
+  page.on("console", (message) => {
+    if (message.type() === "error") pageErrors.push(message.text());
+  });
+
+  await page.goto(${JSON.stringify(url)});
+${formatAction(expression)}
+
+  expect(pageErrors, pageErrors.join("\\n")).toEqual([]);
+});
+`;
+
+// A bare expression (`page.getByRole(...).click()`) is awaited; multi-statement
+// source is inlined as-is so its own awaits and declarations stand.
+const formatAction = (expression: string): string => {
+  const trimmed = expression.trim();
+  const lines = isEvalExpression(trimmed) ? [`await ${trimmed};`] : trimmed.split("\n");
+  return lines.map((line) => `  ${line}`).join("\n");
+};
+
+const deriveTestName = (url: string): string => {
+  try {
+    const { pathname } = new URL(url);
+    return `eval on ${pathname.replace(/\/+$/, "") || "/"}`;
+  } catch {
+    return "eval interaction";
+  }
+};
diff --git a/packages/browser/src/utils/is-eval-expression.ts b/packages/browser/src/utils/is-eval-expression.ts
new file mode 100644
index 000000000..d462dec2f
--- /dev/null
+++ b/packages/browser/src/utils/is-eval-expression.ts
@@ -0,0 +1,12 @@
+// Whether `eval` source is a single expression (`page.getByText("Login")
+// .click()`) rather than multi-statement source or a body using `return`. The
+// expression wrap throws a SyntaxError at construction when the source isn't one,
+// which is exactly the signal — a non-syntax failure means it did parse.
+export const isEvalExpression = (expression: string): boolean => {
+  try {
+    new Function("page", `"use strict"; return (async () => (${expression}))();`);
+    return true;
+  } catch (error) {
+    return !(error instanceof SyntaxError);
+  }
+};
diff --git a/packages/browser/tests/generate-playwright-test.test.ts b/packages/browser/tests/generate-playwright-test.test.ts
new file mode 100644
index 000000000..850a9541c
--- /dev/null
+++ b/packages/browser/tests/generate-playwright-test.test.ts
@@ -0,0 +1,31 @@
+import { describe, expect, it } from "vite-plus/test";
+import { generatePlaywrightTest } from "../src/utils/generate-playwright-test.js";
+
+describe("generatePlaywrightTest", () => {
+  it("awaits a bare expression and navigates to the captured URL", () => {
+    const source = generatePlaywrightTest({
+      url: "http://localhost:3000/checkout",
+      expression: 'page.getByRole("button", { name: "Pay" }).click()',
+    });
+    expect(source).toContain('import { expect, test } from "@playwright/test";');
+    expect(source).toContain('test("eval on /checkout"');
+    expect(source).toContain('await page.goto("http://localhost:3000/checkout");');
+    expect(source).toContain('await page.getByRole("button", { name: "Pay" }).click();');
+    expect(source).toContain("expect(pageErrors, pageErrors.join");
+  });
+
+  it("inlines multi-statement source without wrapping it in await", () => {
+    const source = generatePlaywrightTest({
+      url: "http://localhost:3000/",
+      expression: "const title = await page.title();\nawait page.getByText(title).click();",
+    });
+    expect(source).toContain("  const title = await page.title();");
+    expect(source).toContain("  await page.getByText(title).click();");
+    expect(source).not.toContain("await const");
+  });
+
+  it("falls back to a generic name when the URL is not parseable", () => {
+    const source = generatePlaywrightTest({ url: "", expression: "page.title()" });
+    expect(source).toContain('test("eval interaction"');
+  });
+});
diff --git a/packages/mcp/src/tools/browser.ts b/packages/mcp/src/tools/browser.ts
index ecf4b32d2..4749b2ad7 100644
--- a/packages/mcp/src/tools/browser.ts
+++ b/packages/mcp/src/tools/browser.ts
@@ -1,6 +1,12 @@
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import { closeLaunchedBrowser, DEFAULT_TRACE_FILENAME, parseViewport } from "@react-doctor/browser";
+import {
+  closeLaunchedBrowser,
+  DEFAULT_CODEGEN_FILENAME,
+  DEFAULT_TRACE_FILENAME,
+  parseViewport,
+  type BrowserSession,
+} from "@react-doctor/browser";
 import { DEFAULT_CDP_ENDPOINT_HINT } from "../constants.js";
 import { jsonResult, runTool, textResult } from "../utils/tool-result.js";
 import { withSession, type BrowserToolConnection } from "../utils/with-session.js";
@@ -65,7 +71,7 @@ export const registerBrowserTools = (server: McpServer): void => {
     {
       title: "Run Playwright code, optionally profiling it",
       description:
-        'Run Playwright code with the `page` in scope (e.g. page.getByRole("button", { name: "Login" }).click()) against the attached page. Locate with the accessibility tree (browser_snapshot, or page.locator(...).ariaSnapshot() for a subtree) then act. By default: an expression that returns a value yields the value; an expression that just acts (returns nothing) yields the resulting accessibility tree, so one call drives the page and shows the new state. Multi-statement source works without wrapping it yourself. Page globals (window/document) live in the page — reach them via page.evaluate(() => ...). Set profile:true to instead record and return the full runtime picture while the code runs. Open the page first with browser_open for React render data.',
+        'Run Playwright code with the `page` in scope (e.g. page.getByRole("button", { name: "Login" }).click()) against the attached page. Locate with the accessibility tree (browser_snapshot, or page.locator(...).ariaSnapshot() for a subtree) then act. By default: an expression that returns a value yields the value; an expression that just acts (returns nothing) yields the resulting accessibility tree, so one call drives the page and shows the new state. Multi-statement source works without wrapping it yourself. Page globals (window/document) live in the page — reach them via page.evaluate(() => ...). Set profile:true to instead record and return the full runtime picture while the code runs. Set codegen:true to also write the expression as a runnable Playwright regression test. Set video:"<path>.webm" to also record a playback video of the run (any mode). Open the page first with browser_open for React render data.',
       inputSchema: {
         expression: z
           .string()
@@ -79,11 +85,23 @@ export const registerBrowserTools = (server: McpServer): void => {
           .describe(
             "Set true to record and return the full runtime picture while the expression runs — console, network (failures, plus each request's time and transfer size, with slow/heavy ones flagged), performance (LoAF jank/LCP/CLS plus a `timeline` roll-up of forced style-recalc/layout/hit-test/paint cost from a DevTools trace), memory (JS heap, DOM nodes, listeners, documents/frames — watch these climb across runs for leaks), accessibility, the React render profile (slow commits, hot components, unnecessary re-renders), and a V8 CPU profile. Also writes the raw timeline trace to `out` (loadable in DevTools) and returns its path as `tracePath`. Omit for just the expression's return value.",
           ),
+        codegen: z
+          .boolean()
+          .optional()
+          .describe(
+            "Set true to drive the expression and also write it as a runnable Playwright test — page.goto the page's current URL, then the action, then an assertion that no console/page errors fired (the same signal eval reports). Turns a verified interaction into a regression test. Returns the generated source and the file path.",
+          ),
+        video: z
+          .string()
+          .optional()
+          .describe(
+            "Path to save a .webm screen recording of the page while the expression runs (works with any mode — plain, profile, or codegen); the saved path is returned as `video`. Needs Playwright's bundled ffmpeg (npx playwright install ffmpeg).",
+          ),
         out: z
           .string()
           .optional()
           .describe(
-            `With profile:true, write the raw DevTools timeline trace here (default ${DEFAULT_TRACE_FILENAME} in the working directory)`,
+            `Where to write the artifact: with profile:true the raw DevTools timeline trace (default ${DEFAULT_TRACE_FILENAME}), with codegen:true the Playwright spec (default ${DEFAULT_CODEGEN_FILENAME}), in the working directory`,
           ),
         ...connectionShape,
         ...viewportShape,
@@ -92,23 +110,45 @@ export const registerBrowserTools = (server: McpServer): void => {
     },
     (args) =>
       runTool(async () => {
+        // Wrap any mode's drive in a screen recording when `video` is a path, so
+        // the saved .webm rides back in the same structured result.
+        const drive = <T>(
+          session: BrowserSession,
+          action: () => Promise<T>,
+        ): Promise<{ result: T; video: string | null }> =>
+          args.video
+            ? session.withVideo(args.video, action)
+            : action().then((result) => ({ result, video: null }));
+
+        if (args.codegen) {
+          if (args.expression === undefined) {
+            return textResult("Pass an expression to generate a Playwright test from.");
+          }
+          const expression = args.expression;
+          const { result, video } = await withSession(toConnection(args), (session) =>
+            drive(session, () =>
+              session.codegen({ expression, outPath: args.out ?? DEFAULT_CODEGEN_FILENAME }),
+            ),
+          );
+          return jsonResult({ ...result, video });
+        }
         if (args.profile) {
-          return jsonResult(
-            await withSession(toConnection(args), (session) =>
+          const { result, video } = await withSession(toConnection(args), (session) =>
+            drive(session, () =>
               session.inspect({
                 expression: args.expression,
                 tracePath: args.out ?? DEFAULT_TRACE_FILENAME,
               }),
             ),
           );
+          return jsonResult({ ...result, video });
         }
         if (args.expression === undefined) return textResult("(no value)");
         const expression = args.expression;
-        return textResult(
-          await withSession(toConnection(args), (session) =>
-            session.evaluateOrSnapshot(expression),
-          ),
+        const { result, video } = await withSession(toConnection(args), (session) =>
+          drive(session, () => session.evaluateOrSnapshot(expression)),
         );
+        return video ? jsonResult({ output: result, video }) : textResult(result);
       }),
   );
 
diff --git a/packages/react-doctor/src/cli/commands/browser.ts b/packages/react-doctor/src/cli/commands/browser.ts
index 01b6d8b31..3f8b7d67a 100644
--- a/packages/react-doctor/src/cli/commands/browser.ts
+++ b/packages/react-doctor/src/cli/commands/browser.ts
@@ -1,7 +1,9 @@
 import {
   BrowserSession,
   closeLaunchedBrowser,
+  DEFAULT_CODEGEN_FILENAME,
   DEFAULT_TRACE_FILENAME,
+  DEFAULT_VIDEO_FILENAME,
   formatEvalValue,
   type AccessibilityViolation,
   type ConsoleMessageEntry,
@@ -32,8 +34,27 @@ export interface BrowserCommandOptions {
   out?: string;
   viewport?: Viewport;
   profile?: boolean;
+  codegen?: boolean;
+  video?: boolean | string;
 }
 
+// Run `action` on the page, wrapping it in a screen recording when `--video` is
+// set so any eval mode (plain, --profile, --codegen) can ship a playback .webm.
+const recordIf = async <T>(
+  session: BrowserSession,
+  videoPath: string | null,
+  action: () => Promise<T>,
+): Promise<{ result: T; video: string | null }> => {
+  if (!videoPath) return { result: await action(), video: null };
+  return session.withVideo(videoPath, action);
+};
+
+// `--video` takes an optional path; bare `--video` records to the default file.
+const resolveVideoPath = (video: boolean | string | undefined): string | null => {
+  if (!video) return null;
+  return typeof video === "string" ? video : DEFAULT_VIDEO_FILENAME;
+};
+
 // playwright-core loads lazily inside @react-doctor/browser (only when a command
 // attaches to Chrome), so importing the session here costs nothing at startup
 // and a missing install surfaces the package's own actionable hint.
@@ -84,11 +105,40 @@ export const browserEvalAction = async (
   expression: string | undefined,
   options: BrowserCommandOptions,
 ): Promise<void> => {
-  recordCount(METRIC.cliInvoked, 1, { command: "browser.eval" });
+  recordCount(METRIC.cliInvoked, 1, {
+    command: "browser.eval",
+    codegen: options.codegen ? "true" : "false",
+    video: options.video ? "true" : "false",
+  });
+  const videoPath = resolveVideoPath(options.video);
+  const logVideo = (video: string | null): void => {
+    if (video) logger.success(`Recorded video to ${video}`);
+  };
+
+  if (options.codegen) {
+    if (expression === undefined) {
+      logger.log("Pass an expression to generate a Playwright test from.");
+      return;
+    }
+    const outPath = options.out ?? DEFAULT_CODEGEN_FILENAME;
+    await withSession(options, async (session) => {
+      const { result, video } = await recordIf(session, videoPath, () =>
+        session.codegen({ expression, outPath }),
+      );
+      logger.log(result.output);
+      logger.success(`Wrote Playwright test to ${result.path}`);
+      logVideo(video);
+    });
+    return;
+  }
   if (options.profile) {
     const tracePath = options.out ?? DEFAULT_TRACE_FILENAME;
     await withSession(options, async (session) => {
-      printInspection(await session.inspect({ expression, tracePath }));
+      const { result, video } = await recordIf(session, videoPath, () =>
+        session.inspect({ expression, tracePath }),
+      );
+      printInspection(result);
+      logVideo(video);
     });
     return;
   }
@@ -99,7 +149,11 @@ export const browserEvalAction = async (
     return;
   }
   await withSession(options, async (session) => {
-    logger.log(await session.evaluateOrSnapshot(expression));
+    const { result, video } = await recordIf(session, videoPath, () =>
+      session.evaluateOrSnapshot(expression),
+    );
+    logger.log(result);
+    logVideo(video);
   });
 };
 
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index c08c78cd7..b409bc11b 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -276,9 +276,17 @@ withRenderOptions(
       "--profile",
       "record console, network, performance (incl. a DevTools timeline trace), accessibility, and the React + CPU profiles while the expression runs (omit the expression to measure the live page idle)",
     )
+    .option(
+      "--codegen",
+      "drive the expression, then write it as a runnable Playwright test (page.goto the current URL + the action + a no-console-error assertion) so a verified interaction becomes a regression test",
+    )
+    .option(
+      "--video [path]",
+      "record a .webm of the page while the expression runs, for playback in any eval mode (default react-doctor.webm; needs Playwright's ffmpeg: npx playwright install ffmpeg)",
+    )
     .option(
       "--out <path>",
-      "with --profile, write the raw timeline trace here for DevTools (default react-doctor-trace.json)",
+      "where to write the artifact: the raw timeline trace with --profile (default react-doctor-trace.json), or the Playwright spec with --codegen (default react-doctor.spec.ts)",
     ),
 ).action(browserEvalAction);
 
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 3f302b27a..3c7dfee16 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -65,7 +65,7 @@ importers:
         specifier: ^4.10.2
         version: 4.12.1
       playwright-core:
-        specifier: ^1.49.1
+        specifier: ^1.59.0
         version: 1.60.0
     devDependencies:
       '@types/node':
diff --git a/skills/react-doctor/SKILL.md b/skills/react-doctor/SKILL.md
index 03ac2ae9b..c83890bef 100644
--- a/skills/react-doctor/SKILL.md
+++ b/skills/react-doctor/SKILL.md
@@ -41,7 +41,7 @@ doctor runs from code alone, so it is the one that fires in the background. The
 debug, design, and perf need a real Chrome. Two ways to get one:
 
 1. **A browser MCP already in your tools.** Prefer [Chrome DevTools MCP](https://github.com/ChromeDevTools/chrome-devtools-mcp) (`chrome-devtools`) or similar for console, network, and snapshots. It adds full performance traces and Lighthouse on top.
-2. **The bundled `react-doctor browser` command.** Attaches to your open Chrome over the Chrome DevTools Protocol, launching a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, inject the React profiler), `eval` (run Playwright code with `page` in scope), `snapshot` (accessibility tree), and `screenshot`. Locate from the a11y tree, then act with Playwright selectors: `eval 'page.getByRole("button", { name: "Save" }).click()'`. `eval` returns the expression's value, or — when it only acts — the resulting a11y tree, so one call both drives the page and shows the new state. Reach page globals through `page.evaluate(() => …)`. Add `--profile` to record the whole runtime picture while the expression runs: console, network, performance (LoAF with per-script attribution, LCP, CLS), an axe-core a11y audit, a React render profile, and a V8 CPU profile. With no expression it measures the live page as-is.
+2. **The bundled `react-doctor browser` command.** Attaches to your open Chrome over the Chrome DevTools Protocol, launching a dedicated persistent one only as a fallback. Four commands: `open` (load a URL, inject the React profiler), `eval` (run Playwright code with `page` in scope), `snapshot` (accessibility tree), and `screenshot`. Locate from the a11y tree, then act with Playwright selectors: `eval 'page.getByRole("button", { name: "Save" }).click()'`. `eval` returns the expression's value, or — when it only acts — the resulting a11y tree, so one call both drives the page and shows the new state. Reach page globals through `page.evaluate(() => …)`. Add `--profile` to record the whole runtime picture while the expression runs: console, network, performance (LoAF with per-script attribution, LCP, CLS), an axe-core a11y audit, a React render profile, and a V8 CPU profile. With no expression it measures the live page as-is. Add `--codegen` to write a verified action as a runnable Playwright regression test (the current URL + the action + a no-console-error assertion). Add `--video [path]` to record a `.webm` of the run for playback (any mode).
 
 It is the same Chrome either way, so the playbooks apply to both: `browser open`, `eval`, `snapshot`, and `screenshot` map onto the MCP's `navigate_page`, `evaluate_script`, `take_snapshot`, and `take_screenshot`.
 
@@ -53,7 +53,7 @@ React Doctor ships its own Model Context Protocol server over stdio so any MCP-c
 npx react-doctor@latest mcp
 ```
 
-It exposes `doctor_scan`, the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes `profile: true` to record the whole runtime picture (console, network, performance, accessibility, React + CPU profiles) in one pass.
+It exposes `doctor_scan`, the `browser_*` tools (`browser_open`, `browser_eval`, `browser_snapshot`, `browser_screenshot`), and the `debug_*` log server (`debug_serve`, `debug_read_logs`, `debug_clear_logs`). `browser_eval` takes `profile: true` to record the whole runtime picture (console, network, performance, accessibility, React + CPU profiles) in one pass, or `codegen: true` to write the verified action as a runnable Playwright regression test, or `video: "<path>.webm"` to record a playback video of the run.
 
 ## doctor: scan and triage
 
diff --git a/skills/react-doctor/references/debug.md b/skills/react-doctor/references/debug.md
index d551d00c2..a792a18fc 100644
--- a/skills/react-doctor/references/debug.md
+++ b/skills/react-doctor/references/debug.md
@@ -59,6 +59,8 @@ npx react-doctor browser eval --profile                       # console + networ
 npx react-doctor browser snapshot                             # what rendered, by role + name
 npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()'
 npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()' --profile  # drive + measure it
+npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()' --codegen  # save it as a Playwright test
+npx react-doctor browser eval 'page.getByRole("button", { name: "Checkout" }).click()' --video    # record a .webm to play back
 npx react-doctor browser eval 'page.evaluate(() => document.title)'   # raw DOM when you need it
 ```
 

From 12288a8e43dfcfb0881d28463e09a028d8f8f24e Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 23:15:52 -0700
Subject: [PATCH 26/38] fix(browser): align profile windows + raise
 react-doctor playwright floor (bugbot)

- inspect captured recordingStartMs (the LoAF/CLS floor) only after the React
  start + scroll read, so LoAF/CLS omitted the arming gap the timeline/CPU trace
  included, despite the comment claiming one shared window. Capture it the instant
  the recorders are armed, so the setup work falls inside every signal's window.
- react-doctor still declared optional playwright-core ^1.49.1 while the browser
  package needs ^1.59.0 for page.screencast (--video). Raise the floor so an
  install can't resolve a Playwright without the screencast API.
---
 packages/browser/src/session.ts    | 9 +++++----
 packages/react-doctor/package.json | 2 +-
 pnpm-lock.yaml                     | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 3f93b40a6..423f76d0f 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -508,6 +508,11 @@ export class BrowserSession {
       });
       await cdpSession.send("Profiler.start");
       stopTimelineTrace = await this.startTimelineTrace(cdpSession);
+      // Recording start, captured the instant the CPU + timeline recorders are
+      // armed so the LoAF/CLS floor shares their window: entries at or before it
+      // (pre-action load jank, idle frames) are dropped, and the setup work below
+      // falls inside the window for every signal alike — not just the trace.
+      const recordingStartMs = await this.page.evaluate(() => performance.now()).catch(() => 0);
 
       const reactStarted = await this.page.evaluate(() => {
         if (!globalThis.__REACT_PERF__) return false;
@@ -516,10 +521,6 @@ export class BrowserSession {
       });
 
       const scrollBefore = await this.readScroll();
-      // Recording start: every LoAF/CLS entry at or before this is pre-action
-      // (load jank, idle frames) and gets dropped, so the perf report covers the
-      // same window as the CPU/timeline/React recorders started just above.
-      const recordingStartMs = await this.page.evaluate(() => performance.now()).catch(() => 0);
       let result: unknown = null;
       let evalError: string | null = null;
       let vitals = emptyVitals();
diff --git a/packages/react-doctor/package.json b/packages/react-doctor/package.json
index 55f194b3d..24b80735a 100644
--- a/packages/react-doctor/package.json
+++ b/packages/react-doctor/package.json
@@ -87,7 +87,7 @@
     "ora": "^9.4.0"
   },
   "optionalDependencies": {
-    "playwright-core": "^1.49.1"
+    "playwright-core": "^1.59.0"
   },
   "engines": {
     "node": "^20.19.0 || >=22.13.0"
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 3c7dfee16..57028fa56 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -336,7 +336,7 @@ importers:
         version: 9.4.0
     optionalDependencies:
       playwright-core:
-        specifier: ^1.49.1
+        specifier: ^1.59.0
         version: 1.60.0
 
   packages/vscode-react-doctor:

From a626a7f5ae88d335ff601b832c421e7bb6e6628b Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Tue, 23 Jun 2026 23:41:53 -0700
Subject: [PATCH 27/38] fix(cli): force-bundle @react-doctor/debug into the CLI
 (windows packed smoke)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

@react-doctor/debug is private (never published) and statically imported by the
CLI, so it must inline into dist/cli.js like @react-doctor/browser already does.
It was relying on default bundling, which one platform emitted as a phantom
external import instead — resolving locally but breaking `npm i react-doctor`,
which the packed-CLI smoke caught on Windows. Declare it in alwaysBundle so the
build inlines it deterministically (and fails loudly if it ever can't).
---
 packages/react-doctor/vite.config.ts | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/packages/react-doctor/vite.config.ts b/packages/react-doctor/vite.config.ts
index 33655c215..a8a633138 100644
--- a/packages/react-doctor/vite.config.ts
+++ b/packages/react-doctor/vite.config.ts
@@ -87,10 +87,13 @@ export default defineConfig({
         // require so the runtime copy must be on disk), agent-install
         // (its jsonc-parser/yaml/toml transitives ship as UMD that
         // doesn't bundle cleanly), and the typescript compiler all
-        // stay external. @react-doctor/browser is private, so it MUST
-        // inline — declaring it here makes that explicit and fails the
-        // build loudly rather than emitting a phantom external import.
-        alwaysBundle: ["@react-doctor/browser", "commander", "ora"],
+        // stay external. @react-doctor/browser and @react-doctor/debug
+        // are private (never published), so they MUST inline — declaring
+        // them here makes that explicit and fails the build loudly rather
+        // than emitting a phantom external import that resolves locally but
+        // breaks `npm i react-doctor` (the packed-CLI smoke caught exactly
+        // this when @react-doctor/debug went external on one platform).
+        alwaysBundle: ["@react-doctor/browser", "@react-doctor/debug", "commander", "ora"],
         neverBundle: [
           // Sentry bundles its own OpenTelemetry instrumentation chain
           // and resolves native/optional deps via require() at runtime;

From 82b431e46adb291f0fb94190934f1c57fb165b12 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 00:06:40 -0700
Subject: [PATCH 28/38] feat(install): make pkg.pr.new preview builds
 self-referential
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A beta tester installing a pkg.pr.new preview previously got a skill and an
`install` step that both pointed at the published `react-doctor@latest`, so the
agent never actually exercised the previewed branch. Route every install-time
package reference (the dev-dep `install` adds, the `doctor` package script, the
manual-install hint) through one build-injected PACKAGE_SPECIFIER, and rewrite
the shipped skill's `npx` commands to match at build time.

The publish-any-commit workflow stamps each preview build with its own immutable
pkg.pr.new URL (REACT_DOCTOR_PACKAGE_SPECIFIER), declared on turbo's build env so
the per-commit value busts the cache. Released builds leave it unset, so the
specifier defaults to react-doctor@latest and the skill ships verbatim — zero
behavior change for published releases.

No changeset: the published package's behavior is unchanged by default; this is a
distribution mechanism plus a behavior-preserving refactor of the install code.
---
 .github/workflows/publish-any-commit.yml      |  6 ++++
 .../src/cli/utils/install-doctor-script.ts    |  3 +-
 .../src/cli/utils/install-react-doctor.ts     |  7 ++---
 .../src/cli/utils/package-specifier.ts        |  9 ++++++
 .../utils/rewrite-skill-package-specifier.ts  | 10 ++++++
 .../rewrite-skill-package-specifier.test.ts   | 31 +++++++++++++++++++
 packages/react-doctor/vite.config.ts          | 30 ++++++++++++++++++
 turbo.json                                    |  1 +
 8 files changed, 92 insertions(+), 5 deletions(-)
 create mode 100644 packages/react-doctor/src/cli/utils/package-specifier.ts
 create mode 100644 packages/react-doctor/src/cli/utils/rewrite-skill-package-specifier.ts
 create mode 100644 packages/react-doctor/tests/rewrite-skill-package-specifier.test.ts

diff --git a/.github/workflows/publish-any-commit.yml b/.github/workflows/publish-any-commit.yml
index f31673c2b..d8f94a99a 100644
--- a/.github/workflows/publish-any-commit.yml
+++ b/.github/workflows/publish-any-commit.yml
@@ -30,7 +30,13 @@ jobs:
 
       - run: pnpm install --frozen-lockfile --prefer-offline
 
+      # Bake the preview's own immutable pkg.pr.new URL into the build so the
+      # shipped skill's `npx` commands and `react-doctor install` reference this
+      # exact commit — a beta tester then exercises the previewed branch instead
+      # of silently falling back to the published `react-doctor@latest`.
       - run: pnpm build
+        env:
+          REACT_DOCTOR_PACKAGE_SPECIFIER: https://pkg.pr.new/react-doctor@${{ github.sha }}
 
       - name: Publish packages (retry on transient failures)
         run: |
diff --git a/packages/react-doctor/src/cli/utils/install-doctor-script.ts b/packages/react-doctor/src/cli/utils/install-doctor-script.ts
index 1d8095eab..bcd7f0e4c 100644
--- a/packages/react-doctor/src/cli/utils/install-doctor-script.ts
+++ b/packages/react-doctor/src/cli/utils/install-doctor-script.ts
@@ -1,11 +1,12 @@
 import * as path from "node:path";
 import { getPackageJsonPath, isRecord, readPackageJson, writeJsonFile } from "./git-hook-shared.js";
+import { PACKAGE_SPECIFIER } from "./package-specifier.js";
 import { spinner } from "./spinner.js";
 import * as fs from "node:fs";
 
 const DOCTOR_SCRIPT_NAME = "doctor";
 const FALLBACK_DOCTOR_SCRIPT_NAME = "react-doctor";
-const DOCTOR_SCRIPT_COMMAND = "npx react-doctor@latest";
+const DOCTOR_SCRIPT_COMMAND = `npx ${PACKAGE_SPECIFIER}`;
 export const DOCTOR_PACKAGE_NAME = "react-doctor";
 
 const DEPENDENCY_FIELD_NAMES: readonly string[] = [
diff --git a/packages/react-doctor/src/cli/utils/install-react-doctor.ts b/packages/react-doctor/src/cli/utils/install-react-doctor.ts
index ad5711dfc..1cf1676b1 100644
--- a/packages/react-doctor/src/cli/utils/install-react-doctor.ts
+++ b/packages/react-doctor/src/cli/utils/install-react-doctor.ts
@@ -16,11 +16,11 @@ import { readInstallAgents, rememberInstallAgents } from "./install-agents-prefe
 import { METRIC } from "./constants.js";
 import { recordCount } from "./record-metric.js";
 import {
-  DOCTOR_PACKAGE_NAME,
   findNearestPackageDirectory,
   hasDoctorDependency,
   installReactDoctorScriptStep,
 } from "./install-doctor-script.js";
+import { PACKAGE_SPECIFIER } from "./package-specifier.js";
 import { askAddToGitHubActions } from "./ask-add-to-github-actions.js";
 import { askUpgradeActionVersion } from "./ask-upgrade-action-version.js";
 import { detectDefaultBranch } from "./detect-default-branch.js";
@@ -141,7 +141,7 @@ const packageManagerNeedsWorkspaceFlag = (projectRoot: string): boolean =>
 
 const buildInstallCommand = (projectRoot: string): InstallReactDoctorDependencyRunnerInput => {
   const packageManager = detectPackageManager(projectRoot);
-  const packageSpecifier = `${DOCTOR_PACKAGE_NAME}@latest`;
+  const packageSpecifier = PACKAGE_SPECIFIER;
   if (packageManager === "npm") {
     return {
       command: "npm",
@@ -282,8 +282,7 @@ const buildDependencyFollowUp = (
   ) {
     return undefined;
   }
-  const installCommand =
-    result.installCommand ?? `npm install --save-dev ${DOCTOR_PACKAGE_NAME}@latest`;
+  const installCommand = result.installCommand ?? `npm install --save-dev ${PACKAGE_SPECIFIER}`;
   return `  React Doctor still works via \`npx react-doctor\`. To install locally: ${installCommand}`;
 };
 
diff --git a/packages/react-doctor/src/cli/utils/package-specifier.ts b/packages/react-doctor/src/cli/utils/package-specifier.ts
new file mode 100644
index 000000000..3a427418a
--- /dev/null
+++ b/packages/react-doctor/src/cli/utils/package-specifier.ts
@@ -0,0 +1,9 @@
+// The npm specifier `install` adds as the dev-dependency and the package script,
+// and that the bundled skill's `npx` commands reference. Defaults to the
+// published `react-doctor@latest`; a pkg.pr.new preview build overrides it (via
+// the REACT_DOCTOR_PACKAGE_SPECIFIER build env baked in vite.config.ts) with its
+// own immutable tarball URL, and the skill markdown is rewritten to match at
+// build time — so a beta tester exercises the previewed branch end to end
+// instead of silently falling back to the released package.
+export const PACKAGE_SPECIFIER =
+  process.env.REACT_DOCTOR_PACKAGE_SPECIFIER ?? "react-doctor@latest";
diff --git a/packages/react-doctor/src/cli/utils/rewrite-skill-package-specifier.ts b/packages/react-doctor/src/cli/utils/rewrite-skill-package-specifier.ts
new file mode 100644
index 000000000..2a1b2a12e
--- /dev/null
+++ b/packages/react-doctor/src/cli/utils/rewrite-skill-package-specifier.ts
@@ -0,0 +1,10 @@
+// Rewrite the skill's runnable `npx react-doctor …` commands to a specific
+// package specifier so a pkg.pr.new preview build ships a skill that drives the
+// previewed branch (e.g. `npx https://pkg.pr.new/react-doctor@<sha> …`) instead
+// of the published `react-doctor@latest`. Only `npx` invocations are touched —
+// prose mentions of the command name stay as-is — and the `@latest` form is
+// replaced first so the bare-name pass can't double-rewrite the URL it produced.
+export const rewriteSkillPackageSpecifier = (markdown: string, specifier: string): string =>
+  markdown
+    .replaceAll("react-doctor@latest", specifier)
+    .replace(/\bnpx react-doctor(?![@\w-])/g, `npx ${specifier}`);
diff --git a/packages/react-doctor/tests/rewrite-skill-package-specifier.test.ts b/packages/react-doctor/tests/rewrite-skill-package-specifier.test.ts
new file mode 100644
index 000000000..afc7ee1be
--- /dev/null
+++ b/packages/react-doctor/tests/rewrite-skill-package-specifier.test.ts
@@ -0,0 +1,31 @@
+import { describe, expect, it } from "vite-plus/test";
+import { rewriteSkillPackageSpecifier } from "../src/cli/utils/rewrite-skill-package-specifier.js";
+
+const PREVIEW = "https://pkg.pr.new/react-doctor@abc1234";
+
+describe("rewriteSkillPackageSpecifier", () => {
+  it("rewrites the @latest form to the preview specifier", () => {
+    expect(rewriteSkillPackageSpecifier("npx react-doctor@latest mcp", PREVIEW)).toBe(
+      `npx ${PREVIEW} mcp`,
+    );
+  });
+
+  it("rewrites bare npx invocations, including subcommands and end of line", () => {
+    expect(
+      rewriteSkillPackageSpecifier("npx react-doctor browser open http://localhost:3000", PREVIEW),
+    ).toBe(`npx ${PREVIEW} browser open http://localhost:3000`);
+    expect(rewriteSkillPackageSpecifier("run `npx react-doctor`", PREVIEW)).toBe(
+      `run \`npx ${PREVIEW}\``,
+    );
+  });
+
+  it("does not double-rewrite the URL it produced", () => {
+    const once = rewriteSkillPackageSpecifier("npx react-doctor@latest mcp", PREVIEW);
+    expect(rewriteSkillPackageSpecifier(once, PREVIEW)).toBe(once);
+  });
+
+  it("leaves prose mentions of the command name untouched", () => {
+    const prose = "The bundled `react-doctor browser` command attaches to Chrome.";
+    expect(rewriteSkillPackageSpecifier(prose, PREVIEW)).toBe(prose);
+  });
+});
diff --git a/packages/react-doctor/vite.config.ts b/packages/react-doctor/vite.config.ts
index a8a633138..a3be9aae9 100644
--- a/packages/react-doctor/vite.config.ts
+++ b/packages/react-doctor/vite.config.ts
@@ -2,9 +2,12 @@ import * as fs from "node:fs";
 import * as path from "node:path";
 import { fileURLToPath } from "node:url";
 import { defineConfig } from "vite-plus";
+import { rewriteSkillPackageSpecifier } from "./src/cli/utils/rewrite-skill-package-specifier.js";
 
 const packageRoot = path.dirname(fileURLToPath(import.meta.url));
 
+const DEFAULT_PACKAGE_SPECIFIER = "react-doctor@latest";
+
 const packageJson = JSON.parse(fs.readFileSync(path.join(packageRoot, "package.json"), "utf8")) as {
   version: string;
 };
@@ -58,6 +61,30 @@ const copySkillsToDist = () => {
   }
 };
 
+// On a pkg.pr.new preview build (REACT_DOCTOR_PACKAGE_SPECIFIER set to the
+// preview's immutable tarball URL), rewrite the shipped skill's `npx` commands
+// to that URL so a beta tester's agent drives the previewed branch end to end.
+// A normal release leaves the env unset, so the skill ships verbatim
+// (`npx react-doctor@latest`). Runs after copySkillsToDist has populated dist.
+const bakeSkillPackageSpecifier = () => {
+  const specifier = process.env.REACT_DOCTOR_PACKAGE_SPECIFIER;
+  if (!specifier || specifier === DEFAULT_PACKAGE_SPECIFIER) return;
+  const rewriteMarkdownFiles = (directory: string): void => {
+    for (const entry of fs.readdirSync(directory, { withFileTypes: true })) {
+      const entryPath = path.join(directory, entry.name);
+      if (entry.isDirectory()) {
+        rewriteMarkdownFiles(entryPath);
+        continue;
+      }
+      if (!entry.name.endsWith(".md")) continue;
+      const original = fs.readFileSync(entryPath, "utf8");
+      const rewritten = rewriteSkillPackageSpecifier(original, specifier);
+      if (rewritten !== original) fs.writeFileSync(entryPath, rewritten);
+    }
+  };
+  rewriteMarkdownFiles(path.resolve(packageRoot, "dist/skills"));
+};
+
 // The React-profiler init script is a prebuilt browser-only asset, not JS the
 // CLI bundle imports. @react-doctor/browser is inlined into dist/cli.js, so its
 // session resolves the asset relative to its own output — which after bundling
@@ -157,6 +184,8 @@ export default defineConfig({
       sourcemap: true,
       env: {
         VERSION: process.env.VERSION ?? packageJson.version,
+        REACT_DOCTOR_PACKAGE_SPECIFIER:
+          process.env.REACT_DOCTOR_PACKAGE_SPECIFIER ?? DEFAULT_PACKAGE_SPECIFIER,
       },
       // HACK: no shebang on dist/cli.js — the published `bin` entry is
       // bin/react-doctor.js, which owns the `#!/usr/bin/env node` line
@@ -169,6 +198,7 @@ export default defineConfig({
       hooks: {
         "build:done": () => {
           copySkillsToDist();
+          bakeSkillPackageSpecifier();
           copyBrowserInjectToDist();
         },
       },
diff --git a/turbo.json b/turbo.json
index 863486240..d9991a5ee 100644
--- a/turbo.json
+++ b/turbo.json
@@ -11,6 +11,7 @@
         "vite.config.ts",
         "../../skills/**"
       ],
+      "env": ["REACT_DOCTOR_PACKAGE_SPECIFIER"],
       "outputs": ["dist/**"]
     },
     "dev": {

From b918c46037856a1fccea43bd16c5259b67811cca Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 00:32:34 -0700
Subject: [PATCH 29/38] fix(browser): make profile vitals navigation-aware +
 filter LCP (bugbot)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The recording-start floor for LoAF/LCP/CLS was an absolute performance.now()
captured in Node before the driven action. Two gaps:

- A navigating action (page.goto) resets the new document's performance timeline
  to ~0, so every new-document entry sorted below the pre-nav floor and got
  dropped — --profile runs that navigate reported no blocking frames and zero
  CLS for the loaded page.
- LCP was never floored at all, so a no-reload run could replay the initial
  navigation's LCP.

Stash the recording-start timestamp on the page itself (a window marker set when
the recorders arm) and read it in-page. A navigation wipes the marker with the
old document, so the new page reads 0 and keeps its full load vitals (the
navigation is the measured event); a no-reload run still floors out pre-action
jank. Apply the same floor to the LCP observer.
---
 packages/browser/src/constants.ts     |  6 ++++++
 packages/browser/src/perf-observer.ts | 21 +++++++++++-------
 packages/browser/src/session.ts       | 31 ++++++++++++++++-----------
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/packages/browser/src/constants.ts b/packages/browser/src/constants.ts
index 818a65449..480df6117 100644
--- a/packages/browser/src/constants.ts
+++ b/packages/browser/src/constants.ts
@@ -44,6 +44,12 @@ export const LAUNCH_POLL_INTERVAL_MS = 100;
 // post-load jank (hydration, late effects) is captured, not just the load burst.
 export const PERFORMANCE_OBSERVE_WINDOW_MS = 1_000;
 
+// Window property the perf recording-start timestamp is stashed under so the
+// in-page observer can floor its entries to the current recording window. Lives
+// on the document so a navigation during the driven action wipes it — the new
+// document then keeps its full load vitals instead of filtering them all out.
+export const PERFORMANCE_RECORDING_MARKER = "__REACT_DOCTOR_PERF_SINCE__";
+
 // Failing element selectors kept per accessibility violation — enough to locate
 // the problem without dumping every match on a busy page.
 export const MAX_VIOLATION_TARGETS = 5;
diff --git a/packages/browser/src/perf-observer.ts b/packages/browser/src/perf-observer.ts
index 4409ace2f..f17a39726 100644
--- a/packages/browser/src/perf-observer.ts
+++ b/packages/browser/src/perf-observer.ts
@@ -4,17 +4,21 @@ import type { PageVitals } from "./types.js";
 // LoAF / LCP / CLS observers with `buffered: true`, so frames already in the
 // timeline when the observer attaches (an interaction the caller drove just
 // before measuring) are replayed immediately, while the window catches anything
-// that fires next. `sinceMs` is the recording-start `performance.now()` the
-// caller captured right before the driven action: every entry at or below it is
-// skipped, so the report only counts frames from this window — never initial
-// page-load jank still sitting in the buffer, and never frames an earlier
-// no-reload run on the persistent page already reported. LoAF fields are not in
-// lib.dom, so the casts here are unavoidable.
+// that fires next. The recording-start floor is read in-page from `markerKey`
+// (the `performance.now()` the caller stashed when the recorders armed): every
+// entry at or below it is skipped, so the report only counts this window's
+// frames — never initial page-load jank still in the buffer, never frames an
+// earlier no-reload run already reported. A navigation during the driven action
+// wipes the marker with the old document, so the new document reads 0 and keeps
+// its full load vitals — the navigation is itself the measured event. LoAF
+// fields are not in lib.dom, so the casts here are unavoidable.
 export const collectPerformanceReport = (options: {
   windowMs: number;
-  sinceMs: number;
+  markerKey: string;
 }): Promise<PageVitals> => {
-  const { windowMs, sinceMs } = options;
+  const { windowMs, markerKey } = options;
+  const markerValue = Reflect.get(globalThis, markerKey);
+  const sinceMs = typeof markerValue === "number" ? markerValue : 0;
   interface ScriptTiming {
     sourceURL?: string;
     sourceFunctionName?: string;
@@ -74,6 +78,7 @@ export const collectPerformanceReport = (options: {
     });
 
     observe("largest-contentful-paint", (entry) => {
+      if (entry.startTime <= sinceMs) return;
       report.largestContentfulPaintMs = Math.round(entry.startTime);
     });
 
diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 423f76d0f..2d6128e30 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -11,6 +11,7 @@ import {
   MAX_VIOLATION_TARGETS,
   NAVIGATION_TIMEOUT_MS,
   PERFORMANCE_OBSERVE_WINDOW_MS,
+  PERFORMANCE_RECORDING_MARKER,
   REACT_PROFILER_INJECT_FILE,
   SETTLE_TIMEOUT_MS,
   TIMELINE_TRACE_CATEGORIES,
@@ -373,14 +374,14 @@ export class BrowserSession {
     return [...entriesByRequest.values()];
   }
 
-  // `sinceMs` is the recording-start timestamp captured right before the driven
-  // action, so collectPerformanceReport's buffered observers skip pre-action
-  // load jank and frames an earlier no-reload run on the persistent page already
-  // counted, leaving only this window's frames.
-  private measureCurrentPerformance(sinceMs: number): Promise<PageVitals> {
+  // collectPerformanceReport's buffered observers floor their entries to the
+  // recording-start marker stashed on the page when the recorders armed, so they
+  // skip pre-action load jank and frames an earlier no-reload run on the
+  // persistent page already counted, leaving only this window's frames.
+  private measureCurrentPerformance(): Promise<PageVitals> {
     return this.page.evaluate(collectPerformanceReport, {
       windowMs: PERFORMANCE_OBSERVE_WINDOW_MS,
-      sinceMs,
+      markerKey: PERFORMANCE_RECORDING_MARKER,
     });
   }
 
@@ -508,11 +509,17 @@ export class BrowserSession {
       });
       await cdpSession.send("Profiler.start");
       stopTimelineTrace = await this.startTimelineTrace(cdpSession);
-      // Recording start, captured the instant the CPU + timeline recorders are
-      // armed so the LoAF/CLS floor shares their window: entries at or before it
-      // (pre-action load jank, idle frames) are dropped, and the setup work below
-      // falls inside the window for every signal alike — not just the trace.
-      const recordingStartMs = await this.page.evaluate(() => performance.now()).catch(() => 0);
+      // Stash the recording-start timestamp on the page the instant the CPU +
+      // timeline recorders are armed, so the LoAF/LCP/CLS floor shares their
+      // window: pre-action jank and idle frames are dropped while the setup work
+      // below still falls inside the window for every signal alike. It lives on
+      // the document, so an expression that navigates wipes it and the new page
+      // keeps its full load vitals (see collectPerformanceReport).
+      await this.page
+        .evaluate((markerKey) => {
+          Reflect.set(globalThis, markerKey, performance.now());
+        }, PERFORMANCE_RECORDING_MARKER)
+        .catch(() => {});
 
       const reactStarted = await this.page.evaluate(() => {
         if (!globalThis.__REACT_PERF__) return false;
@@ -541,7 +548,7 @@ export class BrowserSession {
         // The perf observe window doubles as the recording window: it runs after
         // the driven action so post-action jank, React commits (concurrent
         // renders land async), and CPU samples all land before we stop.
-        vitals = await this.measureCurrentPerformance(recordingStartMs);
+        vitals = await this.measureCurrentPerformance();
       } finally {
         // Stop the recorders BEFORE reading the React profile, and always (even
         // if the expression threw — a left-running recording on the persistent

From d64c7ff0dd7469bc31c490cfa13bce9a6191bda7 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 01:03:00 -0700
Subject: [PATCH 30/38] ci(smoke): verify the Linux-packed CLI tarball on
 Windows/macOS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The packed-cli smoke built and packed its own tarball on each OS, so the Windows
leg tested a Windows-built bundle that never ships — rollup there intermittently
externalizes the private @react-doctor/debug workspace dep, and turbo's per-OS
cache made the failure stick across reruns. npm releases and pkg.pr.new previews
both build on Linux, so the published CLI bundle is always the Linux one.

Pack the publishable tarballs once on the Linux test leg, upload them as an
artifact, and add a smoke-packed-cli-cross-os job that installs that exact
artifact on Windows and macOS. The CLI bundle is platform-independent JS, so the
only per-OS variables left are install, native-binding resolution, and runtime —
which is what the cross-OS smoke should actually exercise. The smoke script gains
--pack-only / --tarballs modes; with neither flag it still packs+verifies in one
temp dir for local runs.
---
 .github/workflows/ci.yml            |  60 ++++++++++++-
 scripts/smoke-packed-cli-install.ts | 127 +++++++++++++++++++---------
 2 files changed, 144 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 36e6c8fcf..f87307bd3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -120,11 +120,27 @@ jobs:
           pnpm build
           pnpm check:published-deps
 
+      # Pack the publishable tarballs ONCE on Linux — the OS the release and the
+      # pkg.pr.new preview actually build on — then verify the install here and
+      # hand the same artifact to the windows/macos `smoke-packed-cli-cross-os`
+      # job. A per-OS rebuild would test a bundle that never ships (Windows
+      # rollup intermittently externalizes a private workspace dep), so this
+      # tests the real artifact on every OS instead of a per-OS false negative.
       - name: Smoke test packed CLI install
-        if: ${{ matrix.node-version == '22.18.0' && (matrix.os == 'ubuntu-latest' || matrix.os == 'windows-latest') }}
+        if: ${{ matrix.os == 'ubuntu-latest' && matrix.node-version == '22.18.0' }}
         run: |
           pnpm build
-          pnpm smoke:packed-cli-install
+          pnpm smoke:packed-cli-install --pack-only "${{ runner.temp }}/packed-cli-tarballs"
+          pnpm smoke:packed-cli-install --tarballs "${{ runner.temp }}/packed-cli-tarballs"
+
+      - name: Upload packed CLI tarballs for cross-OS smoke
+        if: ${{ matrix.os == 'ubuntu-latest' && matrix.node-version == '22.18.0' }}
+        uses: actions/upload-artifact@v5
+        with:
+          name: packed-cli-tarballs
+          path: ${{ runner.temp }}/packed-cli-tarballs/*.tgz
+          retention-days: 1
+          if-no-files-found: error
 
       # Allocates a real pseudo-terminal (`pty.openpty()`) so the CLI sees an
       # interactive TTY and renders the multiselect prompt, then asserts the
@@ -133,3 +149,43 @@ jobs:
       - name: Smoke test interactive TTY prompt
         if: ${{ matrix.os == 'ubuntu-latest' && matrix.node-version == '22.18.0' }}
         run: pnpm smoke:tty-prompt
+
+  # Install the Linux-packed tarballs (the exact artifact npm + pkg.pr.new ship)
+  # on Windows and macOS. The CLI bundle is platform-independent JS, so the only
+  # per-OS variable is install + native-binding resolution + runtime — which is
+  # what we want to test, without a per-OS rebuild's bundling drift.
+  smoke-packed-cli-cross-os:
+    needs: test
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [windows-latest, macos-latest]
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          persist-credentials: false
+
+      - uses: pnpm/action-setup@v5
+
+      - uses: actions/setup-node@v5
+        with:
+          node-version: "22.18.0"
+          cache: pnpm
+
+      - run: pnpm install --frozen-lockfile --prefer-offline
+
+      # The verify step validates the CLI's JSON output against
+      # @react-doctor/core/schemas, so only core needs building here; the CLI
+      # under test comes from the downloaded Linux-packed tarball, never a local
+      # build of the package whose Windows bundle is the thing we're avoiding.
+      - run: pnpm --filter @react-doctor/core build
+
+      - uses: actions/download-artifact@v5
+        with:
+          name: packed-cli-tarballs
+          path: ${{ runner.temp }}/packed-cli-tarballs
+
+      - name: Smoke test packed CLI install
+        run: pnpm smoke:packed-cli-install --tarballs "${{ runner.temp }}/packed-cli-tarballs"
diff --git a/scripts/smoke-packed-cli-install.ts b/scripts/smoke-packed-cli-install.ts
index 5d424709a..a1772e3cc 100644
--- a/scripts/smoke-packed-cli-install.ts
+++ b/scripts/smoke-packed-cli-install.ts
@@ -97,55 +97,63 @@ const assertFixtureExists = (): void => {
   process.exit(1);
 };
 
-const main = (): void => {
-  assertFixtureExists();
+// Pack the CLI together with its unbundled workspace dependencies:
+// changesets version-bumps and publishes them as a pinned set, so installing
+// the tarballs mirrors what a release ships. The CLI keeps
+// `oxlint-plugin-react-doctor` and `deslop-js` external (neverBundle — both
+// wrap native binaries), so installing only the CLI tarball would resolve them
+// from the registry and reject any PR before their matching versions are
+// published (e.g. a workspace-locked `deslop-js@0.5.x` that npm has never seen).
+const packTarballs = (packDirectory: string): void => {
+  fs.mkdirSync(packDirectory, { recursive: true });
+  runCommand({
+    command: "pnpm",
+    args: [
+      "--filter",
+      "react-doctor",
+      "--filter",
+      "oxlint-plugin-react-doctor",
+      "--filter",
+      "deslop-js",
+      "pack",
+      "--pack-destination",
+      packDirectory,
+    ],
+    cwd: REPOSITORY_ROOT,
+    needsShell: process.platform === "win32",
+  });
+  assertTarballPaths(packDirectory);
+};
 
-  const temporaryDirectory = fs.mkdtempSync(path.join(os.tmpdir(), "react-doctor-packed-cli-"));
-  const packDirectory = path.join(temporaryDirectory, "pack");
-  const installDirectory = path.join(temporaryDirectory, "install");
+const assertTarballPaths = (packDirectory: string): readonly string[] => {
+  const tarballs = fs.readdirSync(packDirectory).filter((fileName) => fileName.endsWith(".tgz"));
+  if (tarballs.length !== 3) {
+    console.error(
+      `Expected exactly three packed tarballs in ${packDirectory}, found ${tarballs.length}.`,
+    );
+    process.exit(1);
+  }
+  return tarballs.map((tarball) => path.join(packDirectory, tarball));
+};
+
+// Install the packed tarballs into a throwaway project and assert the published
+// CLI installs cleanly, pulls no forbidden transitives, reports a real version,
+// and emits a schema-valid JSON report. Split from packing so CI can pack once
+// on Linux (the OS that builds the published bundle) and run this verify step on
+// Windows/macOS against that exact artifact — the platform-divergent bundling a
+// per-OS rebuild would introduce never ships, so testing a per-OS rebuild only
+// produced false negatives.
+const verifyTarballs = (packDirectory: string): void => {
+  assertFixtureExists();
+  const tarballPaths = assertTarballPaths(packDirectory);
+  const installDirectory = fs.mkdtempSync(path.join(os.tmpdir(), "react-doctor-packed-cli-"));
 
   try {
-    fs.mkdirSync(packDirectory);
-    fs.mkdirSync(installDirectory);
     fs.writeFileSync(
       path.join(installDirectory, "package.json"),
       `${JSON.stringify({ name: "react-doctor-packed-cli-smoke", private: true }, null, 2)}\n`,
     );
 
-    // Pack the CLI together with its unbundled workspace dependencies:
-    // changesets version-bumps and publishes them as a pinned set, so
-    // installing the tarballs mirrors what a release ships. The CLI keeps
-    // `oxlint-plugin-react-doctor` and `deslop-js` external (neverBundle —
-    // both wrap native binaries), so installing only the CLI tarball would
-    // resolve them from the registry and reject any PR before their matching
-    // versions are published (e.g. a workspace-locked `deslop-js@0.5.x` that
-    // npm has never seen).
-    runCommand({
-      command: "pnpm",
-      args: [
-        "--filter",
-        "react-doctor",
-        "--filter",
-        "oxlint-plugin-react-doctor",
-        "--filter",
-        "deslop-js",
-        "pack",
-        "--pack-destination",
-        packDirectory,
-      ],
-      cwd: REPOSITORY_ROOT,
-      needsShell: process.platform === "win32",
-    });
-
-    const tarballs = fs.readdirSync(packDirectory).filter((fileName) => fileName.endsWith(".tgz"));
-    if (tarballs.length !== 3) {
-      console.error(
-        `Expected exactly three packed tarballs in ${packDirectory}, found ${tarballs.length}.`,
-      );
-      process.exit(1);
-    }
-    const tarballPaths = tarballs.map((tarball) => path.join(packDirectory, tarball));
-
     runCommand({
       command: "npm",
       args: ["install", "--omit=dev", ...tarballPaths],
@@ -212,6 +220,43 @@ const main = (): void => {
     console.log(
       `Packed install smoke OK: version=${version} diagnostics=${decoded.diagnostics.length} forbiddenPackages=0`,
     );
+  } finally {
+    fs.rmSync(installDirectory, { recursive: true, force: true });
+  }
+};
+
+const readDirectoryArgument = (flag: string): string | null => {
+  const flagIndex = process.argv.indexOf(flag);
+  if (flagIndex === -1) return null;
+  const value = process.argv[flagIndex + 1];
+  if (value === undefined || value.startsWith("--")) {
+    console.error(`${flag} requires a directory path.`);
+    process.exit(1);
+  }
+  return path.resolve(value);
+};
+
+const main = (): void => {
+  // `--pack-only <dir>` packs the publishable tarballs for a CI artifact upload;
+  // `--tarballs <dir>` verifies a previously packed set (the cross-OS leg). With
+  // neither flag, pack and verify in one throwaway directory (local default).
+  const packOnlyDirectory = readDirectoryArgument("--pack-only");
+  if (packOnlyDirectory !== null) {
+    packTarballs(packOnlyDirectory);
+    console.log(`Packed CLI tarballs into ${packOnlyDirectory}`);
+    return;
+  }
+
+  const tarballsDirectory = readDirectoryArgument("--tarballs");
+  if (tarballsDirectory !== null) {
+    verifyTarballs(tarballsDirectory);
+    return;
+  }
+
+  const temporaryDirectory = fs.mkdtempSync(path.join(os.tmpdir(), "react-doctor-packed-cli-"));
+  try {
+    packTarballs(temporaryDirectory);
+    verifyTarballs(temporaryDirectory);
   } finally {
     fs.rmSync(temporaryDirectory, { recursive: true, force: true });
   }

From 48dad7f7086af31af9eae00cb3a328b8db7a3c80 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 01:18:52 -0700
Subject: [PATCH 31/38] fix(ci): partition build cache by OS so a divergent
 build can't poison Linux
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

check:published-deps and the packed-cli pack both consume turbo's shared,
content-hashed build cache. A single runner whose rollup externalizes the
private @react-doctor/debug workspace dep (instead of inlining it) was uploading
that broken react-doctor bundle under the shared hash, and every OS — including
the Linux leg that builds the published release and the pkg.pr.new preview — then
restored it via FULL TURBO and failed (debug as a phantom external import).

Fold RUNNER_OS into the build task hash (mirroring how the test task folds in
MATRIX_NODE_VERSION) so each OS keeps its own build cache and a divergent build
on one can never cross-contaminate the artifact another ships. Also add
@react-doctor/debug to the MCP pack entry's alwaysBundle for parity with the CLI
entry, matching the comment that already claims it's inlined.
---
 packages/react-doctor/vite.config.ts | 9 +++++----
 turbo.json                           | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/packages/react-doctor/vite.config.ts b/packages/react-doctor/vite.config.ts
index a3be9aae9..8e7cfbd18 100644
--- a/packages/react-doctor/vite.config.ts
+++ b/packages/react-doctor/vite.config.ts
@@ -235,10 +235,11 @@ export default defineConfig({
       // and oxlint/oxc/deslop resolve their native bindings at runtime.
       entry: { mcp: "./src/mcp.ts" },
       deps: {
-        // @react-doctor/browser is reached transitively through @react-doctor/mcp
-        // here; it's private, so force-inline it (the same reason the CLI pack
-        // does) instead of letting it slip out as a phantom external import.
-        alwaysBundle: ["@react-doctor/browser"],
+        // @react-doctor/browser and @react-doctor/debug are reached transitively
+        // through @react-doctor/mcp here; both are private, so force-inline them
+        // (the same reason the CLI pack does) instead of letting them slip out as
+        // phantom external imports that break `npm i react-doctor`.
+        alwaysBundle: ["@react-doctor/browser", "@react-doctor/debug"],
         neverBundle: [
           "@sentry/node",
           "playwright-core",
diff --git a/turbo.json b/turbo.json
index d9991a5ee..3cf8203d0 100644
--- a/turbo.json
+++ b/turbo.json
@@ -11,7 +11,7 @@
         "vite.config.ts",
         "../../skills/**"
       ],
-      "env": ["REACT_DOCTOR_PACKAGE_SPECIFIER"],
+      "env": ["REACT_DOCTOR_PACKAGE_SPECIFIER", "RUNNER_OS"],
       "outputs": ["dist/**"]
     },
     "dev": {

From f2c1d547ef9589c241e3f89766a38f629052781d Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 01:35:29 -0700
Subject: [PATCH 32/38] fix(browser): don't adopt a foreign default-port Chrome
 over our launched one (bugbot)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a recorded launched instance (on a non-default port) was briefly unreachable
and another Chromium app squatted on 9222, reattach fell back to that foreign
browser, cleared the saved launched endpoint, and ran later commands against the
wrong session — orphaning our launched profile.

Only fall back to the well-known default when launching is disabled (attaching to
whatever is there is then the only option) or on a cold start (the user's own
Chrome). With launching enabled we relaunch our own instance instead, so a slow
or mid-restart launched Chrome is never silently swapped for an unrelated one.
---
 packages/browser/src/connect.ts | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/packages/browser/src/connect.ts b/packages/browser/src/connect.ts
index b44e27402..9a259c2b9 100644
--- a/packages/browser/src/connect.ts
+++ b/packages/browser/src/connect.ts
@@ -49,19 +49,30 @@ export const connectToBrowser = async (
     !options.cdpEndpoint && launchedEndpoint && launchedEndpoint !== DEFAULT_CDP_ENDPOINT
       ? launchedEndpoint
       : null;
+  // When we have our own launched instance, fall back to the well-known default
+  // ONLY if launching is disabled (attaching to whatever is already there is then
+  // the only option). With launching enabled we relaunch our own below instead:
+  // another Chromium app often squats on 9222, so a launched instance that is
+  // briefly unreachable (slow, mid-restart) must not be silently swapped for a
+  // foreign browser there — that orphans our profile and runs later commands
+  // against the wrong session. The default is also tried on a cold start (no
+  // launched endpoint yet), where it is the user's own running Chrome.
+  const shouldTryDefaultFallback = !preferredLaunchedEndpoint || options.launch === false;
   const attachCandidates = options.cdpEndpoint
     ? [options.cdpEndpoint]
-    : preferredLaunchedEndpoint
-      ? [preferredLaunchedEndpoint, DEFAULT_CDP_ENDPOINT]
-      : [DEFAULT_CDP_ENDPOINT];
+    : [
+        ...(preferredLaunchedEndpoint ? [preferredLaunchedEndpoint] : []),
+        ...(shouldTryDefaultFallback ? [DEFAULT_CDP_ENDPOINT] : []),
+      ];
 
   let lastAttachError: unknown;
   for (const candidate of attachCandidates) {
     try {
       const browser = await chromium.connectOverCDP(candidate, { timeout: CONNECT_TIMEOUT_MS });
-      // Reached the default fallback because the recorded launched endpoint
-      // didn't answer: that instance is gone, so forget it — otherwise every
-      // later command pays the full attach timeout against a dead port first.
+      // Adopted the default because the recorded launched endpoint didn't answer
+      // (only reachable here when launching is disabled): that instance is gone,
+      // so forget it — otherwise every later command pays the full attach timeout
+      // against a dead port first.
       if (preferredLaunchedEndpoint && candidate !== preferredLaunchedEndpoint) {
         clearLaunchedEndpoint();
       }

From 9417c96e28aac24dc250f3ef15d8a2c8a4f09633 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 01:56:28 -0700
Subject: [PATCH 33/38] fix(ci): publish deslop-js to pkg.pr.new so previews
 install

react-doctor depends on `deslop-js: workspace:*`, but the pkg-pr-new publish set
omitted it, so the preview tarball shipped a raw `workspace:*` spec for it.
`npx https://pkg.pr.new/react-doctor@<sha> install` then failed with
EUNSUPPORTEDPROTOCOL. pkg-pr-new only rewrites a workspace dep to its preview URL
when the package is in the publish command, so add ./packages/deslop-js.
---
 .github/workflows/publish-any-commit.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/publish-any-commit.yml b/.github/workflows/publish-any-commit.yml
index d8f94a99a..4ac43a490 100644
--- a/.github/workflows/publish-any-commit.yml
+++ b/.github/workflows/publish-any-commit.yml
@@ -43,10 +43,18 @@ jobs:
           max_attempts=4
           attempt=1
           while [ "$attempt" -le "$max_attempts" ]; do
+            # deslop-js MUST be published too: react-doctor depends on it
+            # (`deslop-js: workspace:*`), and pkg-pr-new only rewrites a
+            # workspace dep to a preview URL when that package is in the publish
+            # set. Omitting it shipped a tarball with a raw `workspace:*` spec
+            # that `npx https://pkg.pr.new/react-doctor@<sha> install` rejected
+            # with EUNSUPPORTEDPROTOCOL. (The private @react-doctor/* workspace
+            # devDependencies stay `workspace:*` but are ignored on install.)
             if pnpm dlx pkg-pr-new publish \
               ./packages/react-doctor \
               ./packages/oxlint-plugin-react-doctor \
-              ./packages/eslint-plugin-react-doctor; then
+              ./packages/eslint-plugin-react-doctor \
+              ./packages/deslop-js; then
               exit 0
             fi
 

From a83df6d7088d30866c64abf56469e542d1b3d0b6 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 02:13:04 -0700
Subject: [PATCH 34/38] chore(install): disable optional pre-commit +
 agent-hook setup

Comment out the "Select additional React Doctor setup" prompt and the
pre-commit / agent-hook install steps so `install` no longer prompts for
or installs them. Their now-unused helpers, constants, imports, and the
hook-installation tests are commented out alongside, keeping lint/typecheck
clean. Flip the flags + uncomment the blocks to restore.
---
 .../src/cli/utils/install-react-doctor.ts     | 309 +++++++++--------
 .../tests/install-react-doctor.test.ts        | 316 +++++++++---------
 2 files changed, 322 insertions(+), 303 deletions(-)

diff --git a/packages/react-doctor/src/cli/utils/install-react-doctor.ts b/packages/react-doctor/src/cli/utils/install-react-doctor.ts
index 1cf1676b1..1c6adb14e 100644
--- a/packages/react-doctor/src/cli/utils/install-react-doctor.ts
+++ b/packages/react-doctor/src/cli/utils/install-react-doctor.ts
@@ -26,7 +26,8 @@ import { askUpgradeActionVersion } from "./ask-upgrade-action-version.js";
 import { detectDefaultBranch } from "./detect-default-branch.js";
 import { hasHandledActionUpgrade, recordActionUpgradeDecision } from "./action-upgrade-prompt.js";
 import { hasHandledCiPrompt, recordCiPromptDecision } from "./ci-prompt-decision.js";
-import { installReactDoctorAgentHooks } from "./install-agent-hooks.js";
+// Disabled with the optional agent-hook setup (see Step 3).
+// import { installReactDoctorAgentHooks } from "./install-agent-hooks.js";
 import {
   getReactDoctorWorkflowPath,
   installReactDoctorWorkflow,
@@ -37,26 +38,28 @@ import {
 import { reportWorkflowResult } from "./report-workflow-result.js";
 import { isRecord, readPackageJson } from "./git-hook-shared.js";
 import { GitHookKind, type GitHookTarget } from "./git-hook-types.js";
-import { detectGitHookTarget, installReactDoctorGitHook } from "./install-git-hook.js";
+import { detectGitHookTarget } from "./install-git-hook.js";
 import { prompts } from "./prompts.js";
 import { shouldSkipPrompts } from "./should-skip-prompts.js";
 import { spinner } from "./spinner.js";
 
-const SETUP_OPTION_GIT_HOOK = "git-hook";
-const SETUP_OPTION_AGENT_HOOKS = "agent-hooks";
-const SETUP_OPTION_SKIP = "skip";
-
-const CONFIG_ONLY_GIT_HOOK_KINDS = new Set([
-  GitHookKind.Ghooks,
-  GitHookKind.GitHooksJs,
-  GitHookKind.Lefthook,
-  GitHookKind.Overcommit,
-  GitHookKind.PreCommit,
-  GitHookKind.PreCommitNpm,
-  GitHookKind.PrettyQuick,
-  GitHookKind.SimpleGitHooks,
-  GitHookKind.Yorkie,
-]);
+// Optional pre-commit / agent-hook setup is disabled (see Step 3 below), so the
+// supporting constants and helpers are commented out alongside it.
+// const SETUP_OPTION_GIT_HOOK = "git-hook";
+// const SETUP_OPTION_AGENT_HOOKS = "agent-hooks";
+// const SETUP_OPTION_SKIP = "skip";
+
+// const CONFIG_ONLY_GIT_HOOK_KINDS = new Set([
+//   GitHookKind.Ghooks,
+//   GitHookKind.GitHooksJs,
+//   GitHookKind.Lefthook,
+//   GitHookKind.Overcommit,
+//   GitHookKind.PreCommit,
+//   GitHookKind.PreCommitNpm,
+//   GitHookKind.PrettyQuick,
+//   GitHookKind.SimpleGitHooks,
+//   GitHookKind.Yorkie,
+// ]);
 
 export interface InstallReactDoctorDependencyRunnerInput {
   readonly command: string;
@@ -245,14 +248,15 @@ const buildManualGitHookTarget = (hookPath: string, projectRoot: string): GitHoo
   kind: GitHookKind.Git,
 });
 
-const formatGitHookInstallMessage = (
-  hookResult: ReturnType<typeof installReactDoctorGitHook>,
-): string => {
-  if (CONFIG_ONLY_GIT_HOOK_KINDS.has(hookResult.kind)) {
-    return `React Doctor pre-commit config ${hookResult.status} at ${hookResult.hookPath}. Run your hook manager's install command if hooks are not already installed.`;
-  }
-  return `React Doctor pre-commit hook ${hookResult.status} at ${hookResult.hookPath}.`;
-};
+// Disabled with the optional pre-commit hook setup (see Step 3 below).
+// const formatGitHookInstallMessage = (
+//   hookResult: ReturnType<typeof installReactDoctorGitHook>,
+// ): string => {
+//   if (CONFIG_ONLY_GIT_HOOK_KINDS.has(hookResult.kind)) {
+//     return `React Doctor pre-commit config ${hookResult.status} at ${hookResult.hookPath}. Run your hook manager's install command if hooks are not already installed.`;
+//   }
+//   return `React Doctor pre-commit hook ${hookResult.status} at ${hookResult.hookPath}.`;
+// };
 
 const formatDependencyInstallMessage = (result: InstallReactDoctorDependencyResult): string => {
   if (result.dependencyStatus === "created") {
@@ -350,8 +354,9 @@ export const getSkillSourceDirectory = (): string => {
   return path.join(distDirectory, "skills", SKILL_NAME);
 };
 
-const canInstallNativeAgentHooks = (agents: readonly SkillAgentType[]): boolean =>
-  agents.some((agent) => agent === "claude-code" || agent === "cursor");
+// Disabled with the optional agent-hook setup (see Step 3 below).
+// const canInstallNativeAgentHooks = (agents: readonly SkillAgentType[]): boolean =>
+//   agents.some((agent) => agent === "claude-code" || agent === "cursor");
 
 // Installs the primary skill (throws on failure — the install can't continue
 // without it).
@@ -393,50 +398,51 @@ const installReactDoctorSkillStep = async (
   }
 };
 
-const installReactDoctorGitHookStep = (gitHookTarget: GitHookTarget): void => {
-  const hookSpinner = spinner("Installing React Doctor pre-commit hook...").start();
-  try {
-    const hookResult = installReactDoctorGitHook({
-      hookPath: gitHookTarget.hookPath,
-      projectRoot: gitHookTarget.runnerRoot,
-      kind: gitHookTarget.kind,
-      hooksPathConfig: gitHookTarget.hooksPathConfig,
-    });
-    hookSpinner.succeed(formatGitHookInstallMessage(hookResult));
-    recordCount(METRIC.installGitHook, 1, { kind: hookResult.kind });
-  } catch (error) {
-    hookSpinner.fail("Failed to install React Doctor pre-commit hook.");
-    throw error;
-  }
-};
-
-const installReactDoctorAgentHooksStep = (
-  projectRoot: string,
-  selectedAgents: SkillAgentType[],
-): void => {
-  const hookSpinner = spinner("Installing React Doctor agent hooks...").start();
-  try {
-    const hookResult = installReactDoctorAgentHooks({
-      projectRoot,
-      agents: selectedAgents,
-    });
-    if (hookResult.installedAgents.length === 0) {
-      hookSpinner.succeed("No supported native agent hook targets selected.");
-    } else {
-      hookSpinner.succeed(
-        `React Doctor agent hooks installed for ${hookResult.installedAgents
-          .map((agent) => getSkillAgentConfig(agent).displayName)
-          .join(", ")}.`,
-      );
-      recordCount(METRIC.installAgentHooks, 1, {
-        agentsCount: hookResult.installedAgents.length,
-      });
-    }
-  } catch (error) {
-    hookSpinner.fail("Failed to install React Doctor agent hooks.");
-    throw error;
-  }
-};
+// Disabled: `install` no longer installs pre-commit or agent hooks (see Step 3).
+// const installReactDoctorGitHookStep = (gitHookTarget: GitHookTarget): void => {
+//   const hookSpinner = spinner("Installing React Doctor pre-commit hook...").start();
+//   try {
+//     const hookResult = installReactDoctorGitHook({
+//       hookPath: gitHookTarget.hookPath,
+//       projectRoot: gitHookTarget.runnerRoot,
+//       kind: gitHookTarget.kind,
+//       hooksPathConfig: gitHookTarget.hooksPathConfig,
+//     });
+//     hookSpinner.succeed(formatGitHookInstallMessage(hookResult));
+//     recordCount(METRIC.installGitHook, 1, { kind: hookResult.kind });
+//   } catch (error) {
+//     hookSpinner.fail("Failed to install React Doctor pre-commit hook.");
+//     throw error;
+//   }
+// };
+
+// const installReactDoctorAgentHooksStep = (
+//   projectRoot: string,
+//   selectedAgents: SkillAgentType[],
+// ): void => {
+//   const hookSpinner = spinner("Installing React Doctor agent hooks...").start();
+//   try {
+//     const hookResult = installReactDoctorAgentHooks({
+//       projectRoot,
+//       agents: selectedAgents,
+//     });
+//     if (hookResult.installedAgents.length === 0) {
+//       hookSpinner.succeed("No supported native agent hook targets selected.");
+//     } else {
+//       hookSpinner.succeed(
+//         `React Doctor agent hooks installed for ${hookResult.installedAgents
+//           .map((agent) => getSkillAgentConfig(agent).displayName)
+//           .join(", ")}.`,
+//       );
+//       recordCount(METRIC.installAgentHooks, 1, {
+//         agentsCount: hookResult.installedAgents.length,
+//       });
+//     }
+//   } catch (error) {
+//     hookSpinner.fail("Failed to install React Doctor agent hooks.");
+//     throw error;
+//   }
+// };
 
 // Writes the workflow into the working tree alongside the other files `install`
 // lands (skill, package script, git hook) so the user reviews and commits it
@@ -647,83 +653,90 @@ export const runInstallReactDoctor = async (
     }
   }
 
-  // Step 3 — optional setup (pre-commit hook, agent hooks).
-  const setupActionChoices = [
-    ...(gitHookPath === null || gitHookPath === undefined
-      ? []
-      : [
-          {
-            title: "Pre-commit hook",
-            description: "Check staged changes before each commit",
-            value: SETUP_OPTION_GIT_HOOK,
-            selected: true,
-          },
-        ]),
-    ...(canInstallNativeAgentHooks(selectedAgents)
-      ? [
-          {
-            title: "Agent hooks",
-            description: "Ask Claude Code or Cursor to scan after code edits",
-            value: SETUP_OPTION_AGENT_HOOKS,
-            selected: Boolean(options.agentHooks),
-          },
-        ]
-      : []),
-  ];
-  const setupChoices =
-    setupActionChoices.length === 0
-      ? []
-      : [
-          {
-            title: "Skip optional setup",
-            description: "Install only the agent skill and package setup",
-            value: SETUP_OPTION_SKIP,
-            selected: false,
-          },
-          ...setupActionChoices,
-        ];
-
-  // Blank line between the skill group and the optional-setup group.
-  if (setupChoices.length > 0 && !options.dryRun) logger.break();
-
-  const selectedSetupOptions: string[] =
-    skipPrompts || setupChoices.length === 0
-      ? []
-      : ((
-          await prompt<"setupOptions">(
-            {
-              type: "multiselect",
-              name: "setupOptions",
-              message: "Select additional React Doctor setup:",
-              choices: setupChoices,
-              instructions: false,
-            },
-            promptOptions,
-          )
-        ).setupOptions ?? []);
-  const selectedSetupActions = selectedSetupOptions.filter(
-    (setupOption) => setupOption !== SETUP_OPTION_SKIP,
-  );
-  const didSkipOptionalSetup =
-    selectedSetupActions.length === 0 && selectedSetupOptions.includes(SETUP_OPTION_SKIP);
-
-  const shouldInstallGitHook =
-    gitHookPath != null &&
-    (Boolean(options.yes) ||
-      (!didSkipOptionalSetup && selectedSetupActions.includes(SETUP_OPTION_GIT_HOOK)));
-
-  const shouldInstallAgentHooks =
-    Boolean(options.agentHooks) ||
-    (!didSkipOptionalSetup && selectedSetupActions.includes(SETUP_OPTION_AGENT_HOOKS));
-
-  if (!options.dryRun) {
-    if (shouldInstallGitHook && gitHookTarget !== null && gitHookTarget !== undefined) {
-      installReactDoctorGitHookStep(gitHookTarget);
-    }
-    if (shouldInstallAgentHooks) {
-      installReactDoctorAgentHooksStep(projectRoot, selectedAgents);
-    }
-  }
+  // Step 3 — optional setup (pre-commit hook, agent hooks) is intentionally
+  // disabled: `install` no longer prompts for, nor installs, pre-commit or
+  // agent hooks. The prompt and the install steps below are commented out;
+  // flip these flags back to the prompt-driven values (and uncomment the
+  // multiselect + install calls) to re-enable.
+  const shouldInstallGitHook = false;
+  const shouldInstallAgentHooks = false;
+
+  // const setupActionChoices = [
+  //   ...(gitHookPath === null || gitHookPath === undefined
+  //     ? []
+  //     : [
+  //         {
+  //           title: "Pre-commit hook",
+  //           description: "Check staged changes before each commit",
+  //           value: SETUP_OPTION_GIT_HOOK,
+  //           selected: true,
+  //         },
+  //       ]),
+  //   ...(canInstallNativeAgentHooks(selectedAgents)
+  //     ? [
+  //         {
+  //           title: "Agent hooks",
+  //           description: "Ask Claude Code or Cursor to scan after code edits",
+  //           value: SETUP_OPTION_AGENT_HOOKS,
+  //           selected: Boolean(options.agentHooks),
+  //         },
+  //       ]
+  //     : []),
+  // ];
+  // const setupChoices =
+  //   setupActionChoices.length === 0
+  //     ? []
+  //     : [
+  //         {
+  //           title: "Skip optional setup",
+  //           description: "Install only the agent skill and package setup",
+  //           value: SETUP_OPTION_SKIP,
+  //           selected: false,
+  //         },
+  //         ...setupActionChoices,
+  //       ];
+  //
+  // // Blank line between the skill group and the optional-setup group.
+  // if (setupChoices.length > 0 && !options.dryRun) logger.break();
+  //
+  // const selectedSetupOptions: string[] =
+  //   skipPrompts || setupChoices.length === 0
+  //     ? []
+  //     : ((
+  //         await prompt<"setupOptions">(
+  //           {
+  //             type: "multiselect",
+  //             name: "setupOptions",
+  //             message: "Select additional React Doctor setup:",
+  //             choices: setupChoices,
+  //             instructions: false,
+  //           },
+  //           promptOptions,
+  //         )
+  //       ).setupOptions ?? []);
+  // const selectedSetupActions = selectedSetupOptions.filter(
+  //   (setupOption) => setupOption !== SETUP_OPTION_SKIP,
+  // );
+  // const didSkipOptionalSetup =
+  //   selectedSetupActions.length === 0 && selectedSetupOptions.includes(SETUP_OPTION_SKIP);
+  //
+  // const shouldInstallGitHook =
+  //   gitHookPath != null &&
+  //   (Boolean(options.yes) ||
+  //     (!didSkipOptionalSetup && selectedSetupActions.includes(SETUP_OPTION_GIT_HOOK)));
+  //
+  // const shouldInstallAgentHooks =
+  //   Boolean(options.agentHooks) ||
+  //   (!didSkipOptionalSetup && selectedSetupActions.includes(SETUP_OPTION_AGENT_HOOKS));
+  //
+  // if (!options.dryRun) {
+  //   if (shouldInstallGitHook && gitHookTarget !== null && gitHookTarget !== undefined) {
+  //     installReactDoctorGitHookStep(gitHookTarget);
+  //   }
+  //   if (shouldInstallAgentHooks) {
+  //     installReactDoctorAgentHooksStep(projectRoot, selectedAgents);
+  //   }
+  // }
 
   if (options.dryRun) {
     logger.log(`Dry run — would install ${SKILL_NAME} skill for:`);
diff --git a/packages/react-doctor/tests/install-react-doctor.test.ts b/packages/react-doctor/tests/install-react-doctor.test.ts
index 032f6d76e..359f53291 100644
--- a/packages/react-doctor/tests/install-react-doctor.test.ts
+++ b/packages/react-doctor/tests/install-react-doctor.test.ts
@@ -1,4 +1,5 @@
-import { execFileSync } from "node:child_process";
+// Only used by the disabled hook tests below (see install-react-doctor.ts Step 3).
+// import { execFileSync } from "node:child_process";
 import { tmpdir } from "node:os";
 import * as path from "node:path";
 import type { SkillAgentType } from "agent-install";
@@ -626,105 +627,108 @@ describe("runInstallReactDoctor", () => {
     ).toBe(true);
   });
 
-  it("--yes installs a non-blocking pre-commit hook when a git hook target is detected", async () => {
-    writeValidSkill(fixture.sourceDir);
-    const hookPath = path.join(fixture.projectRoot, ".git/hooks/pre-commit");
-
-    await runInstallReactDoctorForTest({
-      yes: true,
-      sourceDir: fixture.sourceDir,
-      projectRoot: fixture.projectRoot,
-      detectedAgents: ["cursor"],
-      gitHookPath: hookPath,
-    });
-
-    expect(
-      fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
-    ).toBe(true);
-    expect(fs.readFileSync(hookPath, "utf8")).toContain("react-doctor --staged --blocking warning");
-    expect(fs.existsSync(path.join(fixture.projectRoot, ".react-doctor/hooks/pre-commit"))).toBe(
-      false,
-    );
-  });
-
-  it("--agent-hooks installs native hooks for selected supported agents", async () => {
-    writeValidSkill(fixture.sourceDir);
-
-    await runInstallReactDoctorForTest({
-      yes: true,
-      agentHooks: true,
-      sourceDir: fixture.sourceDir,
-      projectRoot: fixture.projectRoot,
-      detectedAgents: ["cursor", "claude-code", "codex"],
-      gitHookPath: null,
-    });
-
-    expect(
-      fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
-    ).toBe(true);
-    expect(
-      fs.existsSync(path.join(fixture.projectRoot, ".claude/skills/react-doctor/SKILL.md")),
-    ).toBe(true);
-    expect(
-      fs.readFileSync(path.join(fixture.projectRoot, ".claude/settings.json"), "utf8"),
-    ).toContain("PostToolBatch");
-    expect(fs.readFileSync(path.join(fixture.projectRoot, ".cursor/hooks.json"), "utf8")).toContain(
-      "postToolUse",
-    );
-    expect(fs.existsSync(path.join(fixture.projectRoot, ".codex/hooks.json"))).toBe(false);
-  });
-
-  it("prompts once for optional setup and installs only selected options", async () => {
-    writeValidSkill(fixture.sourceDir);
-    writePackageJson(fixture.projectRoot, { scripts: {} });
-    const promptQuestions: unknown[] = [];
-    const hookPath = path.join(fixture.projectRoot, ".git/hooks/pre-commit");
-    const workflowPath = path.join(fixture.projectRoot, ".github/workflows/react-doctor.yml");
-
-    await runInteractiveInstallReactDoctorForTest({
-      sourceDir: fixture.sourceDir,
-      projectRoot: fixture.projectRoot,
-      gitHookPath: hookPath,
-      setupOptions: ["workflow"],
-      promptQuestions,
-    });
-
-    expect(promptQuestions).toHaveLength(3);
-    // CI is asked first, as its own dedicated question (the shared pitch).
-    expect(promptQuestions[0]).toEqual(
-      expect.objectContaining({ type: "select", name: "ciChoice" }),
-    );
-    expect(promptQuestions[2]).toEqual(
-      expect.objectContaining({
-        type: "multiselect",
-        name: "setupOptions",
-        message: "Select additional React Doctor setup:",
-      }),
-    );
-    expect(promptQuestions[2]).toEqual(
-      expect.objectContaining({
-        choices: expect.arrayContaining([
-          expect.objectContaining({ value: "skip" }),
-          expect.objectContaining({ value: "git-hook" }),
-          expect.objectContaining({ value: "agent-hooks" }),
-        ]),
-      }),
-    );
-    expect(fs.existsSync(hookPath)).toBe(false);
-    expect(fs.existsSync(path.join(fixture.projectRoot, ".cursor/hooks.json"))).toBe(false);
-    const workflowContent = fs.readFileSync(workflowPath, "utf8");
-    expect(workflowContent).toContain("name: React Doctor");
-    expect(workflowContent).toContain("pull-requests: write");
-    expect(workflowContent).toContain("issues: write");
-    expect(workflowContent).toContain("statuses: write");
-    expect(workflowContent).toContain("actions/checkout@v5");
-    expect(workflowContent).toContain("millionco/react-doctor@v2");
-    expect(workflowContent).toContain("Advisory by default");
-    expect(workflowContent).toContain("#   blocking: error");
-    expect(workflowContent).not.toContain("\n        with:\n");
-    expect(workflowContent).not.toContain("github-token");
-    expect(workflowContent).not.toContain("diff: main");
-  });
+  // Optional pre-commit / agent-hook setup is disabled in `install` (the prompt
+  // and the install steps are commented out), so these hook-installation tests
+  // are commented out alongside it. Re-enable them when the feature is restored.
+  // it("--yes installs a non-blocking pre-commit hook when a git hook target is detected", async () => {
+  //   writeValidSkill(fixture.sourceDir);
+  //   const hookPath = path.join(fixture.projectRoot, ".git/hooks/pre-commit");
+  //
+  //   await runInstallReactDoctorForTest({
+  //     yes: true,
+  //     sourceDir: fixture.sourceDir,
+  //     projectRoot: fixture.projectRoot,
+  //     detectedAgents: ["cursor"],
+  //     gitHookPath: hookPath,
+  //   });
+  //
+  //   expect(
+  //     fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
+  //   ).toBe(true);
+  //   expect(fs.readFileSync(hookPath, "utf8")).toContain("react-doctor --staged --blocking warning");
+  //   expect(fs.existsSync(path.join(fixture.projectRoot, ".react-doctor/hooks/pre-commit"))).toBe(
+  //     false,
+  //   );
+  // });
+
+  // it("--agent-hooks installs native hooks for selected supported agents", async () => {
+  //   writeValidSkill(fixture.sourceDir);
+  //
+  //   await runInstallReactDoctorForTest({
+  //     yes: true,
+  //     agentHooks: true,
+  //     sourceDir: fixture.sourceDir,
+  //     projectRoot: fixture.projectRoot,
+  //     detectedAgents: ["cursor", "claude-code", "codex"],
+  //     gitHookPath: null,
+  //   });
+  //
+  //   expect(
+  //     fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
+  //   ).toBe(true);
+  //   expect(
+  //     fs.existsSync(path.join(fixture.projectRoot, ".claude/skills/react-doctor/SKILL.md")),
+  //   ).toBe(true);
+  //   expect(
+  //     fs.readFileSync(path.join(fixture.projectRoot, ".claude/settings.json"), "utf8"),
+  //   ).toContain("PostToolBatch");
+  //   expect(fs.readFileSync(path.join(fixture.projectRoot, ".cursor/hooks.json"), "utf8")).toContain(
+  //     "postToolUse",
+  //   );
+  //   expect(fs.existsSync(path.join(fixture.projectRoot, ".codex/hooks.json"))).toBe(false);
+  // });
+
+  // it("prompts once for optional setup and installs only selected options", async () => {
+  //   writeValidSkill(fixture.sourceDir);
+  //   writePackageJson(fixture.projectRoot, { scripts: {} });
+  //   const promptQuestions: unknown[] = [];
+  //   const hookPath = path.join(fixture.projectRoot, ".git/hooks/pre-commit");
+  //   const workflowPath = path.join(fixture.projectRoot, ".github/workflows/react-doctor.yml");
+  //
+  //   await runInteractiveInstallReactDoctorForTest({
+  //     sourceDir: fixture.sourceDir,
+  //     projectRoot: fixture.projectRoot,
+  //     gitHookPath: hookPath,
+  //     setupOptions: ["workflow"],
+  //     promptQuestions,
+  //   });
+  //
+  //   expect(promptQuestions).toHaveLength(3);
+  //   // CI is asked first, as its own dedicated question (the shared pitch).
+  //   expect(promptQuestions[0]).toEqual(
+  //     expect.objectContaining({ type: "select", name: "ciChoice" }),
+  //   );
+  //   expect(promptQuestions[2]).toEqual(
+  //     expect.objectContaining({
+  //       type: "multiselect",
+  //       name: "setupOptions",
+  //       message: "Select additional React Doctor setup:",
+  //     }),
+  //   );
+  //   expect(promptQuestions[2]).toEqual(
+  //     expect.objectContaining({
+  //       choices: expect.arrayContaining([
+  //         expect.objectContaining({ value: "skip" }),
+  //         expect.objectContaining({ value: "git-hook" }),
+  //         expect.objectContaining({ value: "agent-hooks" }),
+  //       ]),
+  //     }),
+  //   );
+  //   expect(fs.existsSync(hookPath)).toBe(false);
+  //   expect(fs.existsSync(path.join(fixture.projectRoot, ".cursor/hooks.json"))).toBe(false);
+  //   const workflowContent = fs.readFileSync(workflowPath, "utf8");
+  //   expect(workflowContent).toContain("name: React Doctor");
+  //   expect(workflowContent).toContain("pull-requests: write");
+  //   expect(workflowContent).toContain("issues: write");
+  //   expect(workflowContent).toContain("statuses: write");
+  //   expect(workflowContent).toContain("actions/checkout@v5");
+  //   expect(workflowContent).toContain("millionco/react-doctor@v2");
+  //   expect(workflowContent).toContain("Advisory by default");
+  //   expect(workflowContent).toContain("#   blocking: error");
+  //   expect(workflowContent).not.toContain("\n        with:\n");
+  //   expect(workflowContent).not.toContain("github-token");
+  //   expect(workflowContent).not.toContain("diff: main");
+  // });
 
   const findAgentChoiceSelection = (promptQuestions: unknown[]): Map<string, boolean> => {
     const agentQuestion = promptQuestions.find(
@@ -862,35 +866,36 @@ describe("runInstallReactDoctor", () => {
     expect(fs.existsSync(path.join(fixture.projectRoot, ".claude/settings.json"))).toBe(false);
   });
 
-  it("--yes installs Git and agent hooks in CI using real git detection", async () => {
-    writeValidSkill(fixture.sourceDir);
-    process.env.CI = "1";
-    execFileSync("git", ["init"], {
-      cwd: fixture.projectRoot,
-      stdio: "ignore",
-    });
-
-    await runInstallReactDoctorForTest({
-      yes: true,
-      agentHooks: true,
-      sourceDir: fixture.sourceDir,
-      projectRoot: fixture.projectRoot,
-      detectedAgents: ["cursor", "claude-code"],
-    });
-
-    expect(
-      fs.readFileSync(path.join(fixture.projectRoot, ".git/hooks/pre-commit"), "utf8"),
-    ).toContain("react-doctor --staged --blocking warning");
-    expect(fs.existsSync(path.join(fixture.projectRoot, ".react-doctor/hooks/pre-commit"))).toBe(
-      false,
-    );
-    expect(fs.readFileSync(path.join(fixture.projectRoot, ".cursor/hooks.json"), "utf8")).toContain(
-      "postToolUse",
-    );
-    expect(
-      fs.readFileSync(path.join(fixture.projectRoot, ".claude/settings.json"), "utf8"),
-    ).toContain("PostToolBatch");
-  });
+  // Disabled with the optional pre-commit / agent-hook setup (see install-react-doctor.ts Step 3).
+  // it("--yes installs Git and agent hooks in CI using real git detection", async () => {
+  //   writeValidSkill(fixture.sourceDir);
+  //   process.env.CI = "1";
+  //   execFileSync("git", ["init"], {
+  //     cwd: fixture.projectRoot,
+  //     stdio: "ignore",
+  //   });
+  //
+  //   await runInstallReactDoctorForTest({
+  //     yes: true,
+  //     agentHooks: true,
+  //     sourceDir: fixture.sourceDir,
+  //     projectRoot: fixture.projectRoot,
+  //     detectedAgents: ["cursor", "claude-code"],
+  //   });
+  //
+  //   expect(
+  //     fs.readFileSync(path.join(fixture.projectRoot, ".git/hooks/pre-commit"), "utf8"),
+  //   ).toContain("react-doctor --staged --blocking warning");
+  //   expect(fs.existsSync(path.join(fixture.projectRoot, ".react-doctor/hooks/pre-commit"))).toBe(
+  //     false,
+  //   );
+  //   expect(fs.readFileSync(path.join(fixture.projectRoot, ".cursor/hooks.json"), "utf8")).toContain(
+  //     "postToolUse",
+  //   );
+  //   expect(
+  //     fs.readFileSync(path.join(fixture.projectRoot, ".claude/settings.json"), "utf8"),
+  //   ).toContain("PostToolBatch");
+  // });
 
   it("--yes upgrades an existing @v1 workflow to @v2 in place", async () => {
     writeValidSkill(fixture.sourceDir);
@@ -1090,30 +1095,31 @@ describe("runInstallReactDoctor", () => {
     expect(fs.existsSync(workflowPath)).toBe(false);
   });
 
-  it("CI skips prompts without --yes but does not install the optional Git hook", async () => {
-    writeValidSkill(fixture.sourceDir);
-    process.env.CI = "1";
-    execFileSync("git", ["init"], {
-      cwd: fixture.projectRoot,
-      stdio: "ignore",
-    });
-
-    await runInstallReactDoctorForTest({
-      agentHooks: true,
-      sourceDir: fixture.sourceDir,
-      projectRoot: fixture.projectRoot,
-      detectedAgents: ["cursor"],
-    });
-
-    expect(
-      fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
-    ).toBe(true);
-    expect(fs.readFileSync(path.join(fixture.projectRoot, ".cursor/hooks.json"), "utf8")).toContain(
-      "postToolUse",
-    );
-    expect(fs.existsSync(path.join(fixture.projectRoot, ".git/hooks/pre-commit"))).toBe(false);
-    expect(fs.existsSync(path.join(fixture.projectRoot, ".react-doctor/hooks/pre-commit"))).toBe(
-      false,
-    );
-  });
+  // Disabled with the optional pre-commit / agent-hook setup (see install-react-doctor.ts Step 3).
+  // it("CI skips prompts without --yes but does not install the optional Git hook", async () => {
+  //   writeValidSkill(fixture.sourceDir);
+  //   process.env.CI = "1";
+  //   execFileSync("git", ["init"], {
+  //     cwd: fixture.projectRoot,
+  //     stdio: "ignore",
+  //   });
+  //
+  //   await runInstallReactDoctorForTest({
+  //     agentHooks: true,
+  //     sourceDir: fixture.sourceDir,
+  //     projectRoot: fixture.projectRoot,
+  //     detectedAgents: ["cursor"],
+  //   });
+  //
+  //   expect(
+  //     fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
+  //   ).toBe(true);
+  //   expect(fs.readFileSync(path.join(fixture.projectRoot, ".cursor/hooks.json"), "utf8")).toContain(
+  //     "postToolUse",
+  //   );
+  //   expect(fs.existsSync(path.join(fixture.projectRoot, ".git/hooks/pre-commit"))).toBe(false);
+  //   expect(fs.existsSync(path.join(fixture.projectRoot, ".react-doctor/hooks/pre-commit"))).toBe(
+  //     false,
+  //   );
+  // });
 });

From 2baa9c7fdc119880a1c40a247191911320ddcc9c Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 02:20:46 -0700
Subject: [PATCH 35/38] docs(skill): drop the non-existent /doctor command
 reference

The skill installs as `/react-doctor`; `/doctor` was never a real slash
command. Point both the triage trigger (SKILL.md) and the explain
playbook cross-reference at `/react-doctor`.
---
 skills/react-doctor/SKILL.md              | 2 +-
 skills/react-doctor/references/explain.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/skills/react-doctor/SKILL.md b/skills/react-doctor/SKILL.md
index c83890bef..237ab67ad 100644
--- a/skills/react-doctor/SKILL.md
+++ b/skills/react-doctor/SKILL.md
@@ -65,7 +65,7 @@ npx react-doctor@latest --verbose --scope changed
 
 If the score dropped, fix the regressions before committing. For a cleanup of the whole codebase, drop `--scope changed` (the default is `--scope full`) and fix by severity: errors first, then warnings.
 
-When the user types `/react-doctor`, `/doctor`, says "run react doctor", or asks for a full triage or cleanup pass (not a regression check), fetch the canonical local-triage playbook and follow every step in it:
+When the user types `/react-doctor`, says "run react doctor", or asks for a full triage or cleanup pass (not a regression check), fetch the canonical local-triage playbook and follow every step in it:
 
 ```bash
 curl --fail --silent --show-error \
diff --git a/skills/react-doctor/references/explain.md b/skills/react-doctor/references/explain.md
index 18cd0cea2..65435585b 100644
--- a/skills/react-doctor/references/explain.md
+++ b/skills/react-doctor/references/explain.md
@@ -1,6 +1,6 @@
 # Explaining and configuring rules
 
-Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user wants to understand a rule or change which rules run, not for fixing diagnostics (that is the main `react-doctor` skill, `/doctor`).
+Explain React Doctor rules and edit `doctor.config.*` safely. Use this when a user wants to understand a rule or change which rules run, not for fixing diagnostics (that is the main `react-doctor` skill, `/react-doctor`).
 
 Triggers: "why did this rule fire", "I disagree with this rule", "turn this rule off", "stop flagging X", "too noisy", "disable design rules".
 

From 3d46a739358fa27caa4c0dbf015cfdfdbfafaed1 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 02:43:06 -0700
Subject: [PATCH 36/38] chore(deps): upgrade agent-install to 0.0.8

Exclude first-party agent-install from the pnpm minimumReleaseAge guard so
the latest release installs without the vetting delay.
---
 packages/react-doctor/package.json |  2 +-
 pnpm-lock.yaml                     | 19 ++++++-------------
 pnpm-workspace.yaml                |  4 ++++
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/packages/react-doctor/package.json b/packages/react-doctor/package.json
index 24b80735a..d5b72d45d 100644
--- a/packages/react-doctor/package.json
+++ b/packages/react-doctor/package.json
@@ -58,7 +58,7 @@
   "dependencies": {
     "@babel/code-frame": "^7.29.0",
     "@sentry/node": "^10.54.0",
-    "agent-install": "0.0.5",
+    "agent-install": "0.0.8",
     "conf": "^15.1.0",
     "confbox": "^0.2.4",
     "deslop-js": "workspace:*",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 57028fa56..9987629f0 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -259,8 +259,8 @@ importers:
         specifier: ^10.54.0
         version: 10.54.0
       agent-install:
-        specifier: 0.0.5
-        version: 0.0.5
+        specifier: 0.0.8
+        version: 0.0.8
       conf:
         specifier: ^15.1.0
         version: 15.1.0
@@ -2746,8 +2746,8 @@ packages:
     engines: {node: '>=0.4.0'}
     hasBin: true
 
-  agent-install@0.0.5:
-    resolution: {integrity: sha512-nHlms9BkP8ZiY79HrwCGiA2DcNaXrAaJrCM/BEqQ7MEsSKyCk+2A76xPGylIfASZSZE0SaU3T0bNSg4rBPIJAQ==}
+  agent-install@0.0.8:
+    resolution: {integrity: sha512-x/AxHAzJx788UnM3MfjPIpIda8swS+x5Dz9wTt9MfteWzQXLgAtWuYU21FKj4rfwZa4Ru8R+xJIhxBnZBNsR+w==}
     hasBin: true
 
   ajv-formats@3.0.1:
@@ -4408,11 +4408,6 @@ packages:
   yallist@3.1.1:
     resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==}
 
-  yaml@2.8.3:
-    resolution: {integrity: sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==}
-    engines: {node: '>= 14.6'}
-    hasBin: true
-
   yaml@2.9.0:
     resolution: {integrity: sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==}
     engines: {node: '>= 14.6'}
@@ -6105,14 +6100,14 @@ snapshots:
 
   acorn@8.16.0: {}
 
-  agent-install@0.0.5:
+  agent-install@0.0.8:
     dependencies:
       '@iarna/toml': 2.2.5
       commander: 14.0.3
       jsonc-parser: 3.3.1
       picocolors: 1.1.1
       prompts: 2.4.2
-      yaml: 2.8.3
+      yaml: 2.9.0
 
   ajv-formats@3.0.1(ajv@8.20.0):
     optionalDependencies:
@@ -7896,8 +7891,6 @@ snapshots:
 
   yallist@3.1.1: {}
 
-  yaml@2.8.3: {}
-
   yaml@2.9.0: {}
 
   yocto-queue@0.1.0: {}
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index 959ff85c4..67790e9d1 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -2,6 +2,10 @@ packages:
   - "packages/*"
 
 minimumReleaseAge: 7200
+# agent-install is our own first-party package, so the release-age vetting delay
+# (meant for third-party supply-chain risk) doesn't apply.
+minimumReleaseAgeExclude:
+  - agent-install
 trustPolicy: no-downgrade
 blockExoticSubdeps: true
 

From 5b4136c8cad7901cdcd7252236231eb24f2eda58 Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 03:18:09 -0700
Subject: [PATCH 37/38] feat(install): add global vs project skill install
 choice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a `--global` flag and an interactive "Where should the skill be
installed?" prompt to `react-doctor install`, threading agent-install's
`global` option so the skill can land in each agent's home dir
(~/.cursor, ~/.claude, …) for every project. Defaults to project-local;
non-interactive runs stay local unless `--global` is passed. Records the
scope on the install.completed wide event.
---
 .changeset/install-global-skill.md            |  5 ++
 .../react-doctor/src/cli/commands/install.ts  |  2 +
 packages/react-doctor/src/cli/index.ts        |  5 ++
 .../src/cli/utils/install-react-doctor.ts     | 55 ++++++++++++++++++-
 .../src/cli/utils/strip-unknown-cli-flags.ts  |  1 +
 .../tests/install-react-doctor.test.ts        | 35 ++++++++++++
 6 files changed, 100 insertions(+), 3 deletions(-)
 create mode 100644 .changeset/install-global-skill.md

diff --git a/.changeset/install-global-skill.md b/.changeset/install-global-skill.md
new file mode 100644
index 000000000..586dc972e
--- /dev/null
+++ b/.changeset/install-global-skill.md
@@ -0,0 +1,5 @@
+---
+"react-doctor": patch
+---
+
+Add `--global` to `react-doctor install` (and an interactive "Where should the skill be installed?" prompt): install the `/react-doctor` skill into each agent's home directory (`~/.cursor`, `~/.claude`, …) so it applies to every project, instead of only this repo's local agent dirs. The default stays project-local; `--global` opts in, and non-interactive runs (`--yes`) remain local unless `--global` is passed.
diff --git a/packages/react-doctor/src/cli/commands/install.ts b/packages/react-doctor/src/cli/commands/install.ts
index 8a3ef26ff..e57704db1 100644
--- a/packages/react-doctor/src/cli/commands/install.ts
+++ b/packages/react-doctor/src/cli/commands/install.ts
@@ -11,6 +11,7 @@ interface InstallCommandOptions {
   yes?: boolean;
   dryRun?: boolean;
   agentHooks?: boolean;
+  global?: boolean;
   // Commander's `--cwd` always supplies `process.cwd()` as the default,
   // so this is defined when invoked via the CLI. The fallback is for
   // direct callers (tests) that construct the options object manually.
@@ -37,6 +38,7 @@ export const installAction = async (
       yes: options.yes ?? parentOptions?.yes,
       dryRun: options.dryRun,
       agentHooks: options.agentHooks,
+      global: options.global,
       projectRoot: options.cwd ?? process.cwd(),
     });
   } catch (error) {
diff --git a/packages/react-doctor/src/cli/index.ts b/packages/react-doctor/src/cli/index.ts
index b409bc11b..2bf484eaf 100644
--- a/packages/react-doctor/src/cli/index.ts
+++ b/packages/react-doctor/src/cli/index.ts
@@ -106,6 +106,7 @@ ${formatExampleLines([
   ["react-doctor install", "interactive setup"],
   ["react-doctor install --yes", "non-interactive; all detected agents"],
   ["react-doctor install --dry-run", "preview without writing files"],
+  ["react-doctor install --global", "install the skill for every project"],
   ["react-doctor install --agent-hooks", "also install native agent hooks"],
 ])}
 
@@ -223,6 +224,10 @@ program
   .description("Install the react-doctor skill into your coding agents and optional git hook")
   .option("-y, --yes", "skip prompts, install for all detected agents")
   .option("--dry-run", "show what would be installed without writing files")
+  .option(
+    "--global",
+    "install the skill in your home agent dirs (~/.cursor, ~/.claude, …) for every project instead of just this one",
+  )
   .option("--agent-hooks", "install native non-blocking agent hooks for Claude Code and Cursor")
   .option("-c, --cwd <cwd>", "working directory", process.cwd())
   .option("--color", "force colored output")
diff --git a/packages/react-doctor/src/cli/utils/install-react-doctor.ts b/packages/react-doctor/src/cli/utils/install-react-doctor.ts
index 1c6adb14e..f73fbc2da 100644
--- a/packages/react-doctor/src/cli/utils/install-react-doctor.ts
+++ b/packages/react-doctor/src/cli/utils/install-react-doctor.ts
@@ -336,6 +336,11 @@ interface InstallReactDoctorOptions {
   yes?: boolean;
   dryRun?: boolean;
   agentHooks?: boolean;
+  // Install the skill into each agent's home directory (~/.cursor, ~/.claude, …)
+  // so it applies to every project, instead of this repo's local agent dirs.
+  // Undefined means "ask interactively" (defaults to local on non-interactive
+  // runs); the `--global` flag sets it explicitly.
+  global?: boolean;
   // Overrides for tests; production callers leave these unset.
   sourceDir?: string;
   projectRoot?: string;
@@ -364,6 +369,7 @@ const installReactDoctorSkillStep = async (
   sourceDir: string,
   selectedAgents: SkillAgentType[],
   projectRoot: string,
+  installGlobally: boolean,
 ): Promise<void> => {
   const installSpinner = spinner(`Installing ${SKILL_NAME} skill...`).start();
   try {
@@ -372,6 +378,7 @@ const installReactDoctorSkillStep = async (
       agents: selectedAgents,
       cwd: projectRoot,
       mode: "copy",
+      global: installGlobally,
     });
 
     if (installResult.skills.length === 0) {
@@ -388,7 +395,7 @@ const installReactDoctorSkillStep = async (
     }
 
     installSpinner.succeed(
-      `${SKILL_NAME} skill installed for ${selectedAgents
+      `${SKILL_NAME} skill installed ${installGlobally ? "globally" : "for this project"} for ${selectedAgents
         .map((agent) => getSkillAgentConfig(agent).displayName)
         .join(", ")}.`,
     );
@@ -628,9 +635,43 @@ export const runInstallReactDoctor = async (
   // overwrites the remembered set.
   if (!skipPrompts && !options.dryRun) rememberInstallAgents(selectedAgents);
 
+  // Skill install scope: project-local (default) copies into this repo's agent
+  // dirs (.cursor, .claude, …); global copies into each agent's home dir
+  // (~/.cursor, ~/.claude, …) so the skill applies to every project. The
+  // `--global` flag forces it; otherwise ask interactively, defaulting to local
+  // (and a non-interactive run stays local unless `--global` is passed).
+  const isGlobalInstall =
+    options.global !== undefined
+      ? options.global
+      : skipPrompts
+        ? false
+        : (
+            await prompt<"installScope">(
+              {
+                type: "select",
+                name: "installScope",
+                message: "Where should the skill be installed?",
+                choices: [
+                  {
+                    title: "This project",
+                    description: "Agent dirs in this repo (.cursor, .claude, …)",
+                    value: "local",
+                  },
+                  {
+                    title: "All projects (global)",
+                    description: "Your home agent dirs (~/.cursor, ~/.claude, …)",
+                    value: "global",
+                  },
+                ],
+                initial: 0,
+              },
+              promptOptions,
+            )
+          ).installScope === "global";
+
   let dependencyResult: InstallReactDoctorDependencyResult | undefined;
   if (!options.dryRun) {
-    await installReactDoctorSkillStep(sourceDir, selectedAgents, projectRoot);
+    await installReactDoctorSkillStep(sourceDir, selectedAgents, projectRoot, isGlobalInstall);
     dependencyResult = await installReactDoctorPackageSetup(
       projectRoot,
       options.installDependencyRunner,
@@ -739,11 +780,16 @@ export const runInstallReactDoctor = async (
   // }
 
   if (options.dryRun) {
-    logger.log(`Dry run — would install ${SKILL_NAME} skill for:`);
+    logger.log(
+      `Dry run — would install ${SKILL_NAME} skill ${isGlobalInstall ? "globally" : "for this project"} for:`,
+    );
     for (const agent of selectedAgents) {
       logger.dim(`  - ${getSkillAgentConfig(agent).displayName}`);
     }
     logger.dim(`  Source: ${sourceDir}`);
+    logger.dim(
+      `  Location: ${isGlobalInstall ? "global (home agent dirs)" : "this project (repo agent dirs)"}`,
+    );
     logger.dim("  Package script: doctor (or react-doctor if doctor exists)");
     logger.dim("  Dev dependency: react-doctor");
     if (shouldInstallGitHook) {
@@ -778,6 +824,9 @@ export const runInstallReactDoctor = async (
     // beyond the curated defaults, the default set should be widened.
     agentsDetected: detectedAgents.length,
     usedRememberedAgents,
+    // Adoption + kill metric for the new `--global` surface: what share of
+    // installs put the skill in the home agent dirs vs. this project.
+    global: isGlobalInstall,
     gitHook: shouldInstallGitHook,
     agentHooks: shouldInstallAgentHooks,
     workflow: didInstallWorkflow,
diff --git a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
index 4b0bf2ef1..eb233140c 100644
--- a/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
+++ b/packages/react-doctor/src/cli/utils/strip-unknown-cli-flags.ts
@@ -52,6 +52,7 @@ const INSTALL_FLAG_SPEC: CliFlagSpec = {
     "--agent-hooks",
     "--color",
     "--dry-run",
+    "--global",
     "--help",
     "--no-color",
     "--yes",
diff --git a/packages/react-doctor/tests/install-react-doctor.test.ts b/packages/react-doctor/tests/install-react-doctor.test.ts
index 359f53291..a11d6bd5b 100644
--- a/packages/react-doctor/tests/install-react-doctor.test.ts
+++ b/packages/react-doctor/tests/install-react-doctor.test.ts
@@ -118,6 +118,9 @@ interface RunInteractiveInstallReactDoctorForTestOptions {
   readonly promptQuestions?: unknown[];
   readonly detectedAgents?: readonly SkillAgentType[];
   readonly lastSelectedAgents?: readonly SkillAgentType[];
+  // Answer to the "Where should the skill be installed?" prompt. Defaults to
+  // "local" so tests never write into the real home agent dirs.
+  readonly installScope?: "local" | "global";
 }
 
 const runInteractiveInstallReactDoctorForTest = async (
@@ -142,6 +145,8 @@ const runInteractiveInstallReactDoctorForTest = async (
     if (typeof questionName !== "string") return answers;
     if (questionName === "agents") {
       answers[questionName] = ["cursor"];
+    } else if (questionName === "installScope") {
+      answers[questionName] = options.installScope ?? "local";
     } else if (questionName === "ciChoice") {
       answers[questionName] = options.setupOptions.includes("workflow") ? "ci-yes" : "ci-no";
     } else if (questionName === "upgradeChoice") {
@@ -791,6 +796,36 @@ describe("runInstallReactDoctor", () => {
     expect(selectionByAgent.get("cursor")).toBe(false);
   });
 
+  it("prompts for skill install location and installs into the project on the default (local) choice", async () => {
+    writeValidSkill(fixture.sourceDir);
+    writePackageJson(fixture.projectRoot, { scripts: {} });
+    const promptQuestions: unknown[] = [];
+
+    await runInteractiveInstallReactDoctorForTest({
+      sourceDir: fixture.sourceDir,
+      projectRoot: fixture.projectRoot,
+      gitHookPath: path.join(fixture.projectRoot, ".git/hooks/pre-commit"),
+      setupOptions: [],
+      promptQuestions,
+      installScope: "local",
+    });
+
+    const scopeQuestion = promptQuestions.find(
+      (question): question is { choices: Array<{ value: string }> } =>
+        typeof question === "object" &&
+        question !== null &&
+        "name" in question &&
+        (question as { name?: unknown }).name === "installScope",
+    );
+    expect(scopeQuestion).toBeDefined();
+    expect(scopeQuestion!.choices.map((choice) => choice.value)).toEqual(["local", "global"]);
+    // The default (local) choice copies the skill into this repo's agent dir,
+    // not the home agent dirs.
+    expect(
+      fs.existsSync(path.join(fixture.projectRoot, ".agents/skills/react-doctor/SKILL.md")),
+    ).toBe(true);
+  });
+
   it("remembers the interactive selection so the next install defaults to it", async () => {
     writeValidSkill(fixture.sourceDir);
     writePackageJson(fixture.projectRoot, { scripts: {} });

From 135fa5dd7e5b3d5ac8e539372488b164a4ad50ac Mon Sep 17 00:00:00 2001
From: Aiden Bai <aiden.bai05@gmail.com>
Date: Wed, 24 Jun 2026 04:46:52 -0700
Subject: [PATCH 38/38] fix(browser): set perf recording marker without
 swallowing failures

Fold the recording-start marker write into the awaited React-profiler
evaluate so a failed write can't be silently swallowed and leave a stale
floor; a no-reload re-run then can't over-count buffered LoAF/CLS from an
earlier run. A page that can't take the marker now fails the inspect
loudly (matching the React-profiler start it rode beside). (bugbot)
---
 packages/browser/src/session.ts | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/packages/browser/src/session.ts b/packages/browser/src/session.ts
index 2d6128e30..3d1c3e161 100644
--- a/packages/browser/src/session.ts
+++ b/packages/browser/src/session.ts
@@ -514,18 +514,17 @@ export class BrowserSession {
       // window: pre-action jank and idle frames are dropped while the setup work
       // below still falls inside the window for every signal alike. It lives on
       // the document, so an expression that navigates wipes it and the new page
-      // keeps its full load vitals (see collectPerformanceReport).
-      await this.page
-        .evaluate((markerKey) => {
-          Reflect.set(globalThis, markerKey, performance.now());
-        }, PERFORMANCE_RECORDING_MARKER)
-        .catch(() => {});
-
-      const reactStarted = await this.page.evaluate(() => {
+      // keeps its full load vitals (see collectPerformanceReport). It rides the
+      // same awaited evaluate that starts the React profiler so the write isn't
+      // silently swallowed: a page that can't take the marker fails the inspect
+      // loudly instead of leaving a stale floor for a later no-reload run to
+      // over-count buffered jank/CLS against.
+      const reactStarted = await this.page.evaluate((markerKey) => {
+        Reflect.set(globalThis, markerKey, performance.now());
         if (!globalThis.__REACT_PERF__) return false;
         globalThis.__REACT_PERF__.start();
         return true;
-      });
+      }, PERFORMANCE_RECORDING_MARKER);
 
       const scrollBefore = await this.readScroll();
       let result: unknown = null;