From d61a9f094bfb3e0a170992a06df1ac14ba596202 Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Fri, 19 Jun 2026 09:48:52 -0700 Subject: [PATCH 01/11] refactor(cfg): extract the control-flow graph into @react-doctor/cfg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the per-function CFG builder + dominance/reachability analyses out of oxlint-plugin-react-doctor into a new self-contained, internal @react-doctor/cfg package (owns its minimal ESTree glue so it has no dependency back on the plugin). The plugin bundles it at build time (devDependency), so the published surface is unchanged. CFG test suites and the run-cfg harness move with it. Pure extraction — no behavior change. --- packages/cfg/package.json | 30 ++++++++++++++++ packages/cfg/src/ast/es-tree-node-of-type.ts | 19 ++++++++++ packages/cfg/src/ast/es-tree-node-type.ts | 7 ++++ packages/cfg/src/ast/es-tree-node.ts | 15 ++++++++ packages/cfg/src/ast/has-type-property.ts | 4 +++ packages/cfg/src/ast/is-ast-node.ts | 7 ++++ packages/cfg/src/ast/is-function-like.ts | 21 +++++++++++ packages/cfg/src/ast/is-node-of-type.ts | 8 +++++ packages/cfg/src/ast/value-with-type.ts | 3 ++ .../src}/control-flow-graph.ts | 8 ++--- packages/cfg/src/index.ts | 14 ++++++++ .../cfg/tests/attach-parent-references.ts | 26 ++++++++++++++ ...control-flow-graph.expression-flow.test.ts | 2 +- ...control-flow-graph.oxc-unreachable.test.ts | 8 ++--- .../control-flow-graph.regression.test.ts | 2 +- .../tests}/control-flow-graph.test.ts | 8 ++--- .../control-flow-graph.try-finally.test.ts | 8 ++--- packages/cfg/tests/parse-fixture.ts | 36 +++++++++++++++++++ .../src/test-utils => cfg/tests}/run-cfg.ts | 8 ++--- packages/cfg/tsconfig.json | 8 +++++ packages/cfg/vite.config.ts | 16 +++++++++ .../oxlint-plugin-react-doctor/package.json | 1 + .../src/plugin/utils/rule-context.ts | 2 +- .../utils/wrap-with-semantic-context.ts | 4 +-- .../src/test-utils/run-rule.ts | 2 +- pnpm-lock.yaml | 16 +++++++++ 26 files changed, 257 insertions(+), 26 deletions(-) create mode 100644 packages/cfg/package.json create mode 100644 packages/cfg/src/ast/es-tree-node-of-type.ts create mode 100644 packages/cfg/src/ast/es-tree-node-type.ts create mode 100644 packages/cfg/src/ast/es-tree-node.ts create mode 100644 packages/cfg/src/ast/has-type-property.ts create mode 100644 packages/cfg/src/ast/is-ast-node.ts create mode 100644 packages/cfg/src/ast/is-function-like.ts create mode 100644 packages/cfg/src/ast/is-node-of-type.ts create mode 100644 packages/cfg/src/ast/value-with-type.ts rename packages/{oxlint-plugin-react-doctor/src/plugin/semantic => cfg/src}/control-flow-graph.ts (99%) create mode 100644 packages/cfg/src/index.ts create mode 100644 packages/cfg/tests/attach-parent-references.ts rename packages/{oxlint-plugin-react-doctor/src/plugin/semantic => cfg/tests}/control-flow-graph.expression-flow.test.ts (99%) rename packages/{oxlint-plugin-react-doctor/src/plugin/semantic => cfg/tests}/control-flow-graph.oxc-unreachable.test.ts (94%) rename packages/{oxlint-plugin-react-doctor/src/plugin/semantic => cfg/tests}/control-flow-graph.regression.test.ts (99%) rename packages/{oxlint-plugin-react-doctor/src/plugin/semantic => cfg/tests}/control-flow-graph.test.ts (97%) rename packages/{oxlint-plugin-react-doctor/src/plugin/semantic => cfg/tests}/control-flow-graph.try-finally.test.ts (94%) create mode 100644 packages/cfg/tests/parse-fixture.ts rename packages/{oxlint-plugin-react-doctor/src/test-utils => cfg/tests}/run-cfg.ts (95%) create mode 100644 packages/cfg/tsconfig.json create mode 100644 packages/cfg/vite.config.ts diff --git a/packages/cfg/package.json b/packages/cfg/package.json new file mode 100644 index 000000000..09dc17212 --- /dev/null +++ b/packages/cfg/package.json @@ -0,0 +1,30 @@ +{ + "name": "@react-doctor/cfg", + "version": "0.5.6", + "private": true, + "description": "Internal: per-function control-flow graph + dominance/reachability analyses over an ESTree AST. Models the React Compiler HIR / oxc_cfg terminal taxonomy so rules can ask whether a node runs unconditionally, is reachable, dominates, or sits inside a loop. Not published.", + "license": "MIT", + "type": "module", + "sideEffects": false, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + }, + "scripts": { + "build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && cross-env NODE_ENV=production vp pack", + "test": "vp test run", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@typescript-eslint/types": "^8.59.3" + }, + "devDependencies": { + "@types/node": "^25.6.0", + "oxc-parser": "^0.135.0" + }, + "engines": { + "node": "^20.19.0 || >=22.13.0" + } +} diff --git a/packages/cfg/src/ast/es-tree-node-of-type.ts b/packages/cfg/src/ast/es-tree-node-of-type.ts new file mode 100644 index 000000000..277c3dea4 --- /dev/null +++ b/packages/cfg/src/ast/es-tree-node-of-type.ts @@ -0,0 +1,19 @@ +import type { TSESTree } from "@typescript-eslint/types"; +import type { EsTreeNode } from "./es-tree-node.js"; +import type { EsTreeNodeType } from "./es-tree-node-type.js"; + +// Distributes over the union so we can override `parent` without collapsing +// discriminants. TSESTree pins each node's parent to a specific node kind +// (e.g. JSXAttribute.parent: JSXOpeningElement), but a walker assigns parent +// freely as it descends, so we relax it to `EsTreeNode | null` here. +type WithLooseParent = NodeType extends NodeType + ? Omit & { parent?: EsTreeNode | null } + : never; + +// Resolves a string `type` discriminant to the real TSESTree shape when known, +// falling back to a loose `EsTreeNode & { type }` for nodes that TSESTree +// doesn't model (none today, but keeps the helper safe under future TS evolution). +export type EsTreeNodeOfType = + Extract extends never + ? EsTreeNode & { type: NodeType } + : WithLooseParent>; diff --git a/packages/cfg/src/ast/es-tree-node-type.ts b/packages/cfg/src/ast/es-tree-node-type.ts new file mode 100644 index 000000000..9c876f215 --- /dev/null +++ b/packages/cfg/src/ast/es-tree-node-type.ts @@ -0,0 +1,7 @@ +import type { TSESTree } from "@typescript-eslint/types"; + +// TSESTree models `type` as `AST_NODE_TYPES` enum members. Wrapping the union +// in a template literal widens it to the underlying string-literal values, so +// callers can pass plain strings like `"FunctionDeclaration"` to `isNodeOfType` +// without importing the enum. +export type EsTreeNodeType = `${TSESTree.Node["type"]}`; diff --git a/packages/cfg/src/ast/es-tree-node.ts b/packages/cfg/src/ast/es-tree-node.ts new file mode 100644 index 000000000..2f451088d --- /dev/null +++ b/packages/cfg/src/ast/es-tree-node.ts @@ -0,0 +1,15 @@ +import type { TSESTree } from "@typescript-eslint/types"; + +// Distributes over the TSESTree.Node union so each member gets its `parent` +// relaxed independently — TSESTree pins each node's parent to a specific +// kind (e.g. JSXAttribute.parent: JSXOpeningElement) but a walker assigns +// parent freely as it descends, so we re-broaden to `EsTreeNode | null` here. +type WithLooseParent = NodeType extends NodeType + ? Omit & { parent?: EsTreeNode | null } + : never; + +// THE AST node type the CFG builder walks. It's the full TSESTree +// discriminated union (every concrete node kind) with the `parent` field +// relaxed. Structurally identical to the consuming plugin's own +// `EsTreeNode`, so nodes cross the package boundary without a cast. +export type EsTreeNode = WithLooseParent; diff --git a/packages/cfg/src/ast/has-type-property.ts b/packages/cfg/src/ast/has-type-property.ts new file mode 100644 index 000000000..544ffdbda --- /dev/null +++ b/packages/cfg/src/ast/has-type-property.ts @@ -0,0 +1,4 @@ +import type { ValueWithType } from "./value-with-type.js"; + +export const hasTypeProperty = (value: unknown): value is ValueWithType => + Boolean(value && typeof value === "object" && "type" in value); diff --git a/packages/cfg/src/ast/is-ast-node.ts b/packages/cfg/src/ast/is-ast-node.ts new file mode 100644 index 000000000..839a3e860 --- /dev/null +++ b/packages/cfg/src/ast/is-ast-node.ts @@ -0,0 +1,7 @@ +import type { EsTreeNode } from "./es-tree-node.js"; +import { hasTypeProperty } from "./has-type-property.js"; + +export const isAstNode = (value: unknown): value is EsTreeNode => { + if (!hasTypeProperty(value)) return false; + return typeof value.type === "string"; +}; diff --git a/packages/cfg/src/ast/is-function-like.ts b/packages/cfg/src/ast/is-function-like.ts new file mode 100644 index 000000000..ab75d37af --- /dev/null +++ b/packages/cfg/src/ast/is-function-like.ts @@ -0,0 +1,21 @@ +import type { EsTreeNode } from "./es-tree-node.js"; +import type { EsTreeNodeOfType } from "./es-tree-node-of-type.js"; +import { isNodeOfType } from "./is-node-of-type.js"; + +// Type-guard for the three "function-like" ESTree node shapes: +// `ArrowFunctionExpression`, `FunctionExpression`, `FunctionDeclaration`. +// Accepts `null | undefined` so callers walking parent chains don't need +// their own pre-check. A function boundary is where the CFG stops +// descending — every function gets its own graph. +export const isFunctionLike = ( + node: EsTreeNode | null | undefined, +): node is + | EsTreeNodeOfType<"ArrowFunctionExpression"> + | EsTreeNodeOfType<"FunctionExpression"> + | EsTreeNodeOfType<"FunctionDeclaration"> => + Boolean( + node && + (isNodeOfType(node, "ArrowFunctionExpression") || + isNodeOfType(node, "FunctionExpression") || + isNodeOfType(node, "FunctionDeclaration")), + ); diff --git a/packages/cfg/src/ast/is-node-of-type.ts b/packages/cfg/src/ast/is-node-of-type.ts new file mode 100644 index 000000000..04045075b --- /dev/null +++ b/packages/cfg/src/ast/is-node-of-type.ts @@ -0,0 +1,8 @@ +import type { EsTreeNodeOfType } from "./es-tree-node-of-type.js"; +import type { EsTreeNodeType } from "./es-tree-node-type.js"; +import { hasTypeProperty } from "./has-type-property.js"; + +export const isNodeOfType = ( + node: unknown, + type: NodeType, +): node is EsTreeNodeOfType => Boolean(hasTypeProperty(node) && node.type === type); diff --git a/packages/cfg/src/ast/value-with-type.ts b/packages/cfg/src/ast/value-with-type.ts new file mode 100644 index 000000000..e85a2cc5c --- /dev/null +++ b/packages/cfg/src/ast/value-with-type.ts @@ -0,0 +1,3 @@ +export interface ValueWithType { + type?: unknown; +} diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.ts b/packages/cfg/src/control-flow-graph.ts similarity index 99% rename from packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.ts rename to packages/cfg/src/control-flow-graph.ts index 0ecca6f49..1d2ba155e 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.ts +++ b/packages/cfg/src/control-flow-graph.ts @@ -1,7 +1,7 @@ -import type { EsTreeNode } from "../utils/es-tree-node.js"; -import { isAstNode } from "../utils/is-ast-node.js"; -import { isFunctionLike } from "../utils/is-function-like.js"; -import { isNodeOfType } from "../utils/is-node-of-type.js"; +import type { EsTreeNode } from "./ast/es-tree-node.js"; +import { isAstNode } from "./ast/is-ast-node.js"; +import { isFunctionLike } from "./ast/is-function-like.js"; +import { isNodeOfType } from "./ast/is-node-of-type.js"; // Per-function CFG. Mirrors the subset of `oxc_cfg` we need to answer: // "Is this AST node guaranteed to execute on every call to its diff --git a/packages/cfg/src/index.ts b/packages/cfg/src/index.ts new file mode 100644 index 000000000..caaa112ca --- /dev/null +++ b/packages/cfg/src/index.ts @@ -0,0 +1,14 @@ +export { analyzeControlFlow } from "./control-flow-graph.js"; +export type { + BasicBlock, + CfgEdge, + CfgEdgeKind, + ControlFlowAnalysis, + FunctionCfg, +} from "./control-flow-graph.js"; +export type { EsTreeNode } from "./ast/es-tree-node.js"; +export type { EsTreeNodeOfType } from "./ast/es-tree-node-of-type.js"; +export type { EsTreeNodeType } from "./ast/es-tree-node-type.js"; +export { isAstNode } from "./ast/is-ast-node.js"; +export { isFunctionLike } from "./ast/is-function-like.js"; +export { isNodeOfType } from "./ast/is-node-of-type.js"; diff --git a/packages/cfg/tests/attach-parent-references.ts b/packages/cfg/tests/attach-parent-references.ts new file mode 100644 index 000000000..b5f8103a6 --- /dev/null +++ b/packages/cfg/tests/attach-parent-references.ts @@ -0,0 +1,26 @@ +import type { EsTreeNode } from "../src/ast/es-tree-node.js"; +import { isAstNode } from "../src/ast/is-ast-node.js"; + +// Walks the AST setting each child's `.parent` to its owning parent node. +// `oxc-parser` emits an unparented AST, but the CFG's `enclosingFunction` +// walk relies on `node.parent`, so we re-attach here before analyzing a +// freshly parsed fixture. +export const attachParentReferences = (root: EsTreeNode): void => { + const visit = (node: EsTreeNode, parent: EsTreeNode | null): void => { + const writableNode = node as unknown as { parent?: EsTreeNode | null }; + writableNode.parent = parent; + const nodeRecord = node as unknown as Record; + for (const key of Object.keys(nodeRecord)) { + if (key === "parent") continue; + const child = nodeRecord[key]; + if (Array.isArray(child)) { + for (const item of child) { + if (isAstNode(item)) visit(item, node); + } + } else if (isAstNode(child)) { + visit(child, node); + } + } + }; + visit(root, null); +}; diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.expression-flow.test.ts b/packages/cfg/tests/control-flow-graph.expression-flow.test.ts similarity index 99% rename from packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.expression-flow.test.ts rename to packages/cfg/tests/control-flow-graph.expression-flow.test.ts index 9d1253751..6bcd35077 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.expression-flow.test.ts +++ b/packages/cfg/tests/control-flow-graph.expression-flow.test.ts @@ -1,4 +1,4 @@ -import { runCfgCases } from "../../test-utils/run-cfg.js"; +import { runCfgCases } from "./run-cfg.js"; // Expression-level control flow — the CFG lowers a ternary's arms, a // `&&`/`||`/`??` right operand, and a logical assignment's RHS into their own diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.oxc-unreachable.test.ts b/packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts similarity index 94% rename from packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.oxc-unreachable.test.ts rename to packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts index e0d6ce3e1..f31c24d16 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.oxc-unreachable.test.ts +++ b/packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts @@ -1,8 +1,8 @@ import { describe, expect, it } from "vite-plus/test"; -import { analyzeControlFlow } from "./control-flow-graph.js"; -import { attachParentReferences } from "../../test-utils/attach-parent-references.js"; -import { parseFixture } from "../../test-utils/parse-fixture.js"; -import type { EsTreeNode } from "../utils/es-tree-node.js"; +import { analyzeControlFlow } from "../src/control-flow-graph.js"; +import { attachParentReferences } from "./attach-parent-references.js"; +import { parseFixture } from "./parse-fixture.js"; +import type { EsTreeNode } from "../src/ast/es-tree-node.js"; // Control-flow fixtures ported from oxc's `eslint/no-unreachable` test // suite (crates/oxc_linter/src/rules/eslint/no_unreachable.rs). oxc diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.regression.test.ts b/packages/cfg/tests/control-flow-graph.regression.test.ts similarity index 99% rename from packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.regression.test.ts rename to packages/cfg/tests/control-flow-graph.regression.test.ts index 2945639e6..7ef4178ee 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.regression.test.ts +++ b/packages/cfg/tests/control-flow-graph.regression.test.ts @@ -1,4 +1,4 @@ -import { runCfgCases } from "../../test-utils/run-cfg.js"; +import { runCfgCases } from "./run-cfg.js"; // CFG-only regression catalog mined from the Faire monorepo: control-flow // bug classes where AST matching is insufficient and the same shape is a bug diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.test.ts b/packages/cfg/tests/control-flow-graph.test.ts similarity index 97% rename from packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.test.ts rename to packages/cfg/tests/control-flow-graph.test.ts index b46b1ce8c..5c53ed995 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.test.ts +++ b/packages/cfg/tests/control-flow-graph.test.ts @@ -1,8 +1,8 @@ import { describe, expect, it } from "@voidzero-dev/vite-plus-test"; -import { analyzeControlFlow } from "./control-flow-graph.js"; -import { attachParentReferences } from "../../test-utils/attach-parent-references.js"; -import { parseFixture } from "../../test-utils/parse-fixture.js"; -import type { EsTreeNode } from "../utils/es-tree-node.js"; +import { analyzeControlFlow } from "../src/control-flow-graph.js"; +import { attachParentReferences } from "./attach-parent-references.js"; +import { parseFixture } from "./parse-fixture.js"; +import type { EsTreeNode } from "../src/ast/es-tree-node.js"; const analyze = (code: string) => { const parsed = parseFixture(code); diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.try-finally.test.ts b/packages/cfg/tests/control-flow-graph.try-finally.test.ts similarity index 94% rename from packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.try-finally.test.ts rename to packages/cfg/tests/control-flow-graph.try-finally.test.ts index c3aad8bf6..357e02eac 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/semantic/control-flow-graph.try-finally.test.ts +++ b/packages/cfg/tests/control-flow-graph.try-finally.test.ts @@ -1,8 +1,8 @@ import { describe, expect, it } from "vite-plus/test"; -import { analyzeControlFlow } from "./control-flow-graph.js"; -import { attachParentReferences } from "../../test-utils/attach-parent-references.js"; -import { parseFixture } from "../../test-utils/parse-fixture.js"; -import type { EsTreeNode } from "../utils/es-tree-node.js"; +import { analyzeControlFlow } from "../src/control-flow-graph.js"; +import { attachParentReferences } from "./attach-parent-references.js"; +import { parseFixture } from "./parse-fixture.js"; +import type { EsTreeNode } from "../src/ast/es-tree-node.js"; // try / catch / finally control-flow, ported from oxc's `no-unreachable`, // `no-unsafe-finally`, and `getter-return` suites. These exercise the diff --git a/packages/cfg/tests/parse-fixture.ts b/packages/cfg/tests/parse-fixture.ts new file mode 100644 index 000000000..bdce709e6 --- /dev/null +++ b/packages/cfg/tests/parse-fixture.ts @@ -0,0 +1,36 @@ +import * as path from "node:path"; +import { parseSync } from "oxc-parser"; +import type { EsTreeNode } from "../src/ast/es-tree-node.js"; + +interface ParseFixtureResult { + program: EsTreeNode; + errors: ReadonlyArray<{ message: string }>; +} + +const FILENAME_TO_LANG: Record = { + ".ts": "ts", + ".tsx": "tsx", + ".js": "js", + ".jsx": "jsx", + ".mjs": "js", + ".cjs": "js", + ".mts": "ts", + ".cts": "ts", +}; + +const resolveLang = (filename: string): "ts" | "tsx" | "js" | "jsx" => { + const extension = path.extname(filename).toLowerCase(); + return FILENAME_TO_LANG[extension] ?? "tsx"; +}; + +// Parses a code fixture using oxc-parser (the same engine oxlint uses at +// runtime) with `astType: "ts"` so the returned AST is TSESTree-shaped — +// matching the type universe `EsTreeNode` describes. The default filename +// ends in `.tsx` so JSX always parses. +export const parseFixture = (code: string, filename = "fixture.tsx"): ParseFixtureResult => { + const result = parseSync(filename, code, { astType: "ts", lang: resolveLang(filename) }); + return { + program: result.program as unknown as EsTreeNode, + errors: result.errors.map((parseError) => ({ message: parseError.message })), + }; +}; diff --git a/packages/oxlint-plugin-react-doctor/src/test-utils/run-cfg.ts b/packages/cfg/tests/run-cfg.ts similarity index 95% rename from packages/oxlint-plugin-react-doctor/src/test-utils/run-cfg.ts rename to packages/cfg/tests/run-cfg.ts index e1bc71e53..8fae3815e 100644 --- a/packages/oxlint-plugin-react-doctor/src/test-utils/run-cfg.ts +++ b/packages/cfg/tests/run-cfg.ts @@ -1,10 +1,10 @@ import { describe, expect, it } from "vite-plus/test"; -import { analyzeControlFlow } from "../plugin/semantic/control-flow-graph.js"; -import type { ControlFlowAnalysis } from "../plugin/semantic/control-flow-graph.js"; +import { analyzeControlFlow } from "../src/control-flow-graph.js"; +import type { ControlFlowAnalysis } from "../src/control-flow-graph.js"; import { attachParentReferences } from "./attach-parent-references.js"; import { parseFixture } from "./parse-fixture.js"; -import { isAstNode } from "../plugin/utils/is-ast-node.js"; -import type { EsTreeNode } from "../plugin/utils/es-tree-node.js"; +import { isAstNode } from "../src/ast/is-ast-node.js"; +import type { EsTreeNode } from "../src/ast/es-tree-node.js"; // A control-flow node addressed by the name of a marker call in the // fixture, e.g. `acquire()` is `"acquire"`. When a fixture calls the same diff --git a/packages/cfg/tsconfig.json b/packages/cfg/tsconfig.json new file mode 100644 index 000000000..8de23db81 --- /dev/null +++ b/packages/cfg/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "types": ["node"] + }, + "include": ["src"] +} diff --git a/packages/cfg/vite.config.ts b/packages/cfg/vite.config.ts new file mode 100644 index 000000000..4da0800cd --- /dev/null +++ b/packages/cfg/vite.config.ts @@ -0,0 +1,16 @@ +import { defineConfig } from "vite-plus"; + +export default defineConfig({ + pack: [ + { + entry: { index: "./src/index.ts" }, + deps: { + neverBundle: ["@typescript-eslint/types"], + }, + dts: true, + target: "node20", + platform: "node", + fixedExtension: false, + }, + ], +}); diff --git a/packages/oxlint-plugin-react-doctor/package.json b/packages/oxlint-plugin-react-doctor/package.json index b0ebc04d4..7757831ba 100644 --- a/packages/oxlint-plugin-react-doctor/package.json +++ b/packages/oxlint-plugin-react-doctor/package.json @@ -58,6 +58,7 @@ "oxc-parser": "^0.135.0" }, "devDependencies": { + "@react-doctor/cfg": "workspace:*", "@types/node": "^25.6.0" }, "engines": { diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/utils/rule-context.ts b/packages/oxlint-plugin-react-doctor/src/plugin/utils/rule-context.ts index d078fa2f1..998d7b558 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/utils/rule-context.ts +++ b/packages/oxlint-plugin-react-doctor/src/plugin/utils/rule-context.ts @@ -1,5 +1,5 @@ import type { ReportDescriptor } from "./report-descriptor.js"; -import type { ControlFlowAnalysis } from "../semantic/control-flow-graph.js"; +import type { ControlFlowAnalysis } from "@react-doctor/cfg"; import type { ScopeAnalysis } from "../semantic/scope-analysis.js"; // The "base" context the host (oxlint at runtime, ESLint via the diff --git a/packages/oxlint-plugin-react-doctor/src/plugin/utils/wrap-with-semantic-context.ts b/packages/oxlint-plugin-react-doctor/src/plugin/utils/wrap-with-semantic-context.ts index 19f571f9e..8fe7b33d4 100644 --- a/packages/oxlint-plugin-react-doctor/src/plugin/utils/wrap-with-semantic-context.ts +++ b/packages/oxlint-plugin-react-doctor/src/plugin/utils/wrap-with-semantic-context.ts @@ -6,8 +6,8 @@ import type { HostRule } from "./rule-plugin.js"; import type { RuleVisitors } from "./rule-visitors.js"; import { analyzeScopes } from "../semantic/scope-analysis.js"; import type { ScopeAnalysis } from "../semantic/scope-analysis.js"; -import { analyzeControlFlow } from "../semantic/control-flow-graph.js"; -import type { ControlFlowAnalysis } from "../semantic/control-flow-graph.js"; +import { analyzeControlFlow } from "@react-doctor/cfg"; +import type { ControlFlowAnalysis } from "@react-doctor/cfg"; // Wraps a rule so `context.scopes` and `context.cfg` exist at runtime // even when oxlint's host context doesn't pre-build them. We build the diff --git a/packages/oxlint-plugin-react-doctor/src/test-utils/run-rule.ts b/packages/oxlint-plugin-react-doctor/src/test-utils/run-rule.ts index c710fa5fd..a609d4eaa 100644 --- a/packages/oxlint-plugin-react-doctor/src/test-utils/run-rule.ts +++ b/packages/oxlint-plugin-react-doctor/src/test-utils/run-rule.ts @@ -8,7 +8,7 @@ import type { Rule } from "../plugin/utils/rule.js"; import type { RuleContext } from "../plugin/utils/rule-context.js"; import type { RuleVisitors } from "../plugin/utils/rule-visitors.js"; import { analyzeScopes } from "../plugin/semantic/scope-analysis.js"; -import { analyzeControlFlow } from "../plugin/semantic/control-flow-graph.js"; +import { analyzeControlFlow } from "@react-doctor/cfg"; export interface RunRuleOptions { filename?: string; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c91c725d5..a0397c009 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -59,6 +59,19 @@ importers: specifier: ^25.6.0 version: 25.6.0 + packages/cfg: + dependencies: + '@typescript-eslint/types': + specifier: ^8.59.3 + version: 8.59.3 + devDependencies: + '@types/node': + specifier: ^25.6.0 + version: 25.6.0 + oxc-parser: + specifier: ^0.135.0 + version: 0.135.0 + packages/core: dependencies: '@effect/platform-node-shared': @@ -152,6 +165,9 @@ importers: specifier: ^0.135.0 version: 0.135.0 devDependencies: + '@react-doctor/cfg': + specifier: workspace:* + version: link:../cfg '@types/node': specifier: ^25.6.0 version: 25.6.0 From 7652d4cb3c18e775379479d5d40dbbf0bf29450a Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Fri, 19 Jun 2026 09:54:47 -0700 Subject: [PATCH 02/11] feat(cfg): port oxc no-unreachable corpus + document the package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the partial hand-port with a full port of oxc's eslint/no-unreachable pass/fail corpus (37 cases) asserted directly against the graph's isUnreachable via dead()/live() markers — our CFG matches oxc's reachability semantics across the whole suite. Add a README documenting the analysis API, the modeled terminal taxonomy (statement- and expression-level), the deliberate non-goals, and fixture provenance. --- .changeset/refactor-extract-cfg-package.md | 7 + packages/cfg/README.md | 80 +++++++ ...trol-flow-graph.oxc-no-unreachable.test.ts | 207 ++++++++++++++++++ ...control-flow-graph.oxc-unreachable.test.ts | 107 --------- 4 files changed, 294 insertions(+), 107 deletions(-) create mode 100644 .changeset/refactor-extract-cfg-package.md create mode 100644 packages/cfg/README.md create mode 100644 packages/cfg/tests/control-flow-graph.oxc-no-unreachable.test.ts delete mode 100644 packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts diff --git a/.changeset/refactor-extract-cfg-package.md b/.changeset/refactor-extract-cfg-package.md new file mode 100644 index 000000000..bfb877adf --- /dev/null +++ b/.changeset/refactor-extract-cfg-package.md @@ -0,0 +1,7 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Extract the control-flow graph into a dedicated internal `@react-doctor/cfg` package. + +The per-function CFG builder and its dominance / reachability analyses now live in their own self-contained package (bundled into the plugin at build time, so the published surface is unchanged). The package ships a typed `analyzeControlFlow` API, a README documenting the modeled terminal taxonomy, and a full port of oxc's `eslint/no-unreachable` `pass` / `fail` corpus asserted directly against the graph's `isUnreachable`. diff --git a/packages/cfg/README.md b/packages/cfg/README.md new file mode 100644 index 000000000..8b5442271 --- /dev/null +++ b/packages/cfg/README.md @@ -0,0 +1,80 @@ +# @react-doctor/cfg + +Internal (unpublished) per-function **control-flow graph** for an ESTree AST, +plus the dominance / reachability analyses React Doctor's CFG-backed rules run +on. The `oxlint-plugin-react-doctor` package bundles it at build time, so it is +not a runtime dependency of anything published. + +It exists so a rule can ask precise control-flow questions — _does this node run +on every path?_, _is this node reachable from that one?_, _is it inside a +loop?_ — instead of pattern-matching the AST and hoping the shape generalizes. +That is the same class of question the React Compiler answers over its HIR and +oxc answers over `oxc_cfg`. + +## API + +```ts +import { analyzeControlFlow } from "@react-doctor/cfg"; + +const cfg = analyzeControlFlow(programRoot); // ControlFlowAnalysis +``` + +`analyzeControlFlow(program)` lazily builds one graph per function it +encounters and returns a `ControlFlowAnalysis`: + +| Method | Question it answers | +| ------------------------------------- | ------------------------------------------------------------------------------- | +| `isUnconditionalFromEntry(node)` | Does `node` run on **every** path from its function's entry to its exit? | +| `isReachable(fromNode, toNode)` | Can control flow from `fromNode` to `toNode` within the same function? | +| `dominates(aNode, bNode)` | Does `aNode` run on every path that reaches `bNode` (a guard before a sink)? | +| `postDominates(bNode, aNode)` | Does `bNode` run on every path from `aNode` to exit (cleanup after a resource)? | +| `isInsideLoop(node)` | Is `node`'s block part of a cycle in its own function's CFG? | +| `isUnreachable(node)` | Is `node`'s block dead code (after an unconditional return / throw / break)? | +| `cfgFor(fn)` / `enclosingFunction(n)` | The raw `FunctionCfg` (blocks + edges) / the function a node belongs to. | + +Each function boundary (`function` declaration / expression, arrow) gets its own +acyclic-except-for-loops graph; a callback that escapes a loop is **not** inside +that loop because it is a separate function. + +## What it models + +Statement-level terminals: `if` / `switch` / `for` / `for-in` / `for-of` / +`while` / `do-while` / labeled `break` & `continue` / `return` / `throw` / +`try` / `catch` / `finally` (normal completion is routed through `finalize` / +`join` edges so reachability after a `try` is correct). + +Expression-level terminals, lowered into basic blocks the way the React +Compiler lowers its HIR — so a hook or `setState` buried in a branch is seen as +conditional: + +- ternary `a ? b : c` +- logical `&&` / `||` / `??` (and logical-assignment `&&=` / `||=` / `??=`) +- optional chaining `a?.b?.()` (each `?.` branches to a shared short-circuit + target) + +Every node maps to the block where its evaluation **completes** (its join +point), which keeps dominance / reachability accurate through nested +expressions. + +Deliberately **not** modeled: per-instruction "maybe-throw" edges (every call +can throw); `var` / function-declaration hoisting as a reachability fact (that +is a rule policy, not a CFG fact). Both are documented at the top of +`src/control-flow-graph.ts`. + +## Tests & fixture provenance + +- `tests/control-flow-graph.oxc-no-unreachable.test.ts` — a port of oxc's + `eslint/no-unreachable` `pass` / `fail` corpus + (`crates/oxc_linter/src/rules/eslint/no_unreachable.rs`), asserted directly + against `isUnreachable`. Each upstream case is rewritten so the statement oxc + flags becomes a `dead()` marker (must be unreachable) or a `live()` marker + (must be reachable). +- `tests/control-flow-graph.try-finally.test.ts` — `try` / `catch` / `finally` + normal-completion edges. +- `tests/control-flow-graph.expression-flow.test.ts` — the expression-level + terminals above. +- `tests/control-flow-graph.regression.test.ts` — React-shaped regressions + (conditional hooks, `setState` in a branch). +- `tests/control-flow-graph.test.ts` — core graph construction. + +Run `pnpm --filter @react-doctor/cfg test`. diff --git a/packages/cfg/tests/control-flow-graph.oxc-no-unreachable.test.ts b/packages/cfg/tests/control-flow-graph.oxc-no-unreachable.test.ts new file mode 100644 index 000000000..ecd4ad116 --- /dev/null +++ b/packages/cfg/tests/control-flow-graph.oxc-no-unreachable.test.ts @@ -0,0 +1,207 @@ +import { runCfgCases } from "./run-cfg.js"; + +// Full port of oxc's `eslint/no-unreachable` corpus +// (`crates/oxc_linter/src/rules/eslint/no_unreachable.rs`, the `pass` / +// `fail` vecs). oxc asserts these through its rule; here we assert the +// underlying CFG fact directly via `isUnreachable`. Each upstream case is +// rewritten so the statement oxc cares about becomes a marker call: +// - `dead()` — oxc FAIL: the statement is unreachable. +// - `live()` — oxc PASS: the statement is reachable. +// The surrounding control flow is preserved verbatim. +// +// Deliberately omitted: cases whose only point is `var` / function- +// declaration HOISTING (e.g. `function foo() { return x; var x; }` passes +// in oxc because the *declaration* hoists). Hoisting is a rule policy, not +// a CFG reachability fact, and is a documented divergence — see the header +// of control-flow-graph.ts. + +runCfgCases("cfg-oxc-no-unreachable / fail (statement is unreachable)", [ + { + name: "code after return", + code: `function foo() { return x; dead(); }`, + unreachable: { dead: true }, + }, + { + name: "code after throw", + code: `function foo() { throw error; dead(); }`, + unreachable: { dead: true }, + }, + { + name: "code after break in loop", + code: `while (true) { break; dead(); }`, + unreachable: { dead: true }, + }, + { + name: "code after continue in loop", + code: `while (true) { continue; dead(); }`, + unreachable: { dead: true }, + }, + { + name: "code after return in a switch case", + code: `function foo() { switch (foo) { case 1: return; dead(); } }`, + unreachable: { dead: true }, + }, + { + name: "code after throw in a switch case", + code: `function foo() { switch (foo) { case 1: throw e; dead(); } }`, + unreachable: { dead: true }, + }, + { + name: "code after break in a switch case inside a loop", + code: `while (true) { switch (foo) { case 1: break; dead(); } }`, + unreachable: { dead: true }, + }, + { + name: "code after continue in a switch case inside a loop", + code: `while (true) { switch (foo) { case 1: continue; dead(); } }`, + unreachable: { dead: true }, + }, + { + name: "code after a top-level throw", + code: `var x = 1; throw "uh oh"; dead();`, + unreachable: { dead: true }, + }, + { + name: "both if branches terminate (return / throw)", + code: `function foo() { var x = 1; if (x) { return; } else { throw e; } dead(); }`, + unreachable: { dead: true }, + }, + { + name: "both if branches terminate, unbraced", + code: `function foo() { var x = 1; if (x) return; else throw -1; dead(); }`, + unreachable: { dead: true }, + }, + { + name: "try returns, empty finally — code after is unreachable", + code: `function foo() { var x = 1; try { return; } finally {} dead(); }`, + unreachable: { dead: true }, + }, + { + name: "finally returns — code after the try is unreachable", + code: `function foo() { var x = 1; try {} finally { return; } dead(); }`, + unreachable: { dead: true }, + }, + { + name: "do-while body returns on first iteration", + code: `function foo() { var x = 1; do { return; } while (x); dead(); }`, + unreachable: { dead: true }, + }, + { + name: "loop body both breaks and continues — trailing code unreachable", + code: `function foo() { var x = 1; while (x) { if (x) break; else continue; dead(); } }`, + unreachable: { dead: true }, + }, + { + name: "infinite for with a continue and no break", + code: `function foo() { var x = 1; for (;;) { if (x) continue; } dead(); }`, + unreachable: { dead: true }, + }, + { + name: "while (true) with empty body", + code: `function foo() { var x = 1; while (true) {} dead(); }`, + unreachable: { dead: true }, + }, + { + name: "do {} while (true)", + code: `function foo() { var x = 1; do {} while (true); dead(); }`, + unreachable: { dead: true }, + }, + { + name: "branches under an early return are unreachable", + code: `function foo() { return; if (Math.random() > 0.5) { dead(); } }`, + unreachable: { dead: true }, + }, + { + name: "code after return inside a nested function (own CFG)", + code: `function foo() { if (a) { function bar() { return; dead(); } } }`, + unreachable: { dead: true }, + }, + { + name: "return after an infinite loop that returns", + code: `function foo() { while (true) { return ""; } dead(); }`, + unreachable: { dead: true }, + }, +]); + +runCfgCases("cfg-oxc-no-unreachable / pass (statement is reachable)", [ + { + name: "code after an if that only conditionally returns", + code: `function foo() { var x = 1; if (x) { return; } live(); }`, + unreachable: { live: false }, + }, + { + name: "code after an if whose else returns", + code: `function foo() { var x = 1; if (x) {} else { return; } live(); }`, + unreachable: { live: false }, + }, + { + name: "switch with a break path reaches trailing code", + code: `function foo() { var x = 1; switch (x) { case 0: break; default: return; } live(); }`, + unreachable: { live: false }, + }, + { + name: "while loop may not run, trailing code reachable", + code: `function foo() { var x = 1; while (x) { return; } live(); }`, + unreachable: { live: false }, + }, + { + name: "for-in may not iterate, trailing code reachable", + code: `function foo() { var x = 1; for (x in {}) { return; } live(); }`, + unreachable: { live: false }, + }, + { + name: "finally always runs even when try returns", + code: `function foo() { var x = 1; try { return; } finally { live(); } }`, + unreachable: { live: false }, + }, + { + name: "infinite for with a break reaches trailing code", + code: `function foo() { var x = 1; for (;;) { if (x) break; } live(); }`, + unreachable: { live: false }, + }, + { + name: "labeled block break reaches code after the block", + code: `A: { break A; } live();`, + unreachable: { live: false }, + }, + { + name: "switch without default falls through to trailing code", + code: `function foo() { switch (authType) { case 1: return a(); case 2: return b(); case 3: return c(); } live(); }`, + unreachable: { live: false }, + }, + { + name: "code after try/catch/finally is reachable", + code: `try { a(); } catch (e) { b(); } finally { c(); } live();`, + unreachable: { live: false }, + }, + { + name: "code after try/finally is reachable", + code: `try { a(); } finally { b(); } live();`, + unreachable: { live: false }, + }, + { + name: "catch body is reachable when try has an infinite loop", + code: `try { while (true) { a(); } } catch { live(); }`, + unreachable: { live: false }, + }, + { + name: "finally body is reachable when try has an infinite loop", + code: `try { while (true) { a(); } } finally { live(); }`, + unreachable: { live: false }, + }, + { + name: "return after a conditionally-infinite loop is reachable", + code: `function foo() { if (Math.random() === 0.5) { while (true) { return "hi"; } } live(); }`, + unreachable: { live: false }, + }, + { + name: "sequential for loops in an else branch are reachable", + code: `if (a) { a(); } else { for (let i = 1; i <= 10; i++) { b(); } for (let i = 1; i <= 10; i++) { live(); } }`, + unreachable: { live: false }, + }, + { + name: "code after a try whose body throws into a catch", + code: `try { throw "error"; } catch (err) { b(); } live();`, + unreachable: { live: false }, + }, +]); diff --git a/packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts b/packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts deleted file mode 100644 index f31c24d16..000000000 --- a/packages/cfg/tests/control-flow-graph.oxc-unreachable.test.ts +++ /dev/null @@ -1,107 +0,0 @@ -import { describe, expect, it } from "vite-plus/test"; -import { analyzeControlFlow } from "../src/control-flow-graph.js"; -import { attachParentReferences } from "./attach-parent-references.js"; -import { parseFixture } from "./parse-fixture.js"; -import type { EsTreeNode } from "../src/ast/es-tree-node.js"; - -// Control-flow fixtures ported from oxc's `eslint/no-unreachable` test -// suite (crates/oxc_linter/src/rules/eslint/no_unreachable.rs). oxc -// asserts these via the rule; here they exercise `cfg.isUnreachable` -// directly. Every case shares a `x = 2` marker statement so the -// assertion is uniform: in oxc's FAIL cases that statement is -// unreachable, in its PASS cases it is reachable. -// -// Cases where our CFG deliberately diverges from oxc are omitted with a -// note: `var`/function-declaration hoisting (a rule policy, not a CFG -// fact). try/catch/finally normal-completion is now modeled via -// Finalize/Join edges and is covered in control-flow-graph.try-finally.test.ts. - -const analyze = (code: string) => { - const parsed = parseFixture(code); - attachParentReferences(parsed.program); - return { ...analyzeControlFlow(parsed.program), program: parsed.program, errors: parsed.errors }; -}; - -// Find the `x = 2` assignment-expression node shared by every fixture. -const findMarker = (root: EsTreeNode): EsTreeNode | null => { - let found: EsTreeNode | null = null; - const visit = (node: EsTreeNode): void => { - if (found) return; - if ( - node.type === "AssignmentExpression" && - (node as { left: EsTreeNode }).left.type === "Identifier" && - (node as { left: { name: string } }).left.name === "x" && - (node as { right: EsTreeNode }).right.type === "Literal" && - (node as { right: { value: unknown } }).right.value === 2 - ) { - found = node; - return; - } - const record = node as unknown as Record; - for (const key of Object.keys(record)) { - if (key === "parent") continue; - const child = record[key]; - if (Array.isArray(child)) { - for (const item of child) { - if (item && typeof item === "object" && "type" in item) visit(item as EsTreeNode); - } - } else if (child && typeof child === "object" && "type" in (child as object)) { - visit(child as EsTreeNode); - } - } - }; - visit(root); - return found; -}; - -// oxc FAIL cases: the `x = 2` marker is unreachable. -const UNREACHABLE_FIXTURES: ReadonlyArray = [ - "function foo() { var x = 1; if (x) { return; } else { throw e; } x = 2; }", - "function foo() { var x = 1; if (x) return; else throw -1; x = 2; }", - "function foo() { var x = 1; try { return; } finally {} x = 2; }", - "function foo() { var x = 1; try { } finally { return; } x = 2; }", - "function foo() { var x = 1; do { return; } while (x); x = 2; }", - "function foo() { var x = 1; for (;;) { if (x) continue; } x = 2; }", - // The infinite-loop cases are why we port oxc's loop handling — with - // no `break`, code after the loop is unreachable: - "function foo() { var x = 1; while (true) { } x = 2; }", - "function foo() { var x = 1; do { } while (true); x = 2; }", -]; - -// oxc PASS cases: the `x = 2` marker is reachable. -const REACHABLE_FIXTURES: ReadonlyArray = [ - "function foo() { var x = 1; if (x) { return; } x = 2; }", - "function foo() { var x = 1; if (x) { } else { return; } x = 2; }", - "function foo() { var x = 1; switch (x) { case 0: break; default: return; } x = 2; }", - "function foo() { var x = 1; while (x) { return; } x = 2; }", - "function foo() { var x = 1; for (x in {}) { return; } x = 2; }", - // Infinite loop, but an explicit `break` lets control reach the marker. - "function foo() { var x = 1; for (;;) { if (x) break; } x = 2; }", - "function foo() { var x = 1; for (;x == 1;) { if (x) continue; } x = 2; }", -]; - -describe("control-flow-graph: oxc no-unreachable fixtures", () => { - describe("unreachable marker (oxc FAIL cases)", () => { - for (const fixture of UNREACHABLE_FIXTURES) { - it(fixture, () => { - const analysis = analyze(fixture); - expect(analysis.errors).toEqual([]); - const marker = findMarker(analysis.program); - expect(marker).not.toBeNull(); - expect(analysis.isUnreachable(marker!)).toBe(true); - }); - } - }); - - describe("reachable marker (oxc PASS cases)", () => { - for (const fixture of REACHABLE_FIXTURES) { - it(fixture, () => { - const analysis = analyze(fixture); - expect(analysis.errors).toEqual([]); - const marker = findMarker(analysis.program); - expect(marker).not.toBeNull(); - expect(analysis.isUnreachable(marker!)).toBe(false); - }); - } - }); -}); From 0024f271b8e343807cee662bf5672ce26fe3e1fb Mon Sep 17 00:00:00 2001 From: Aiden Bai Date: Fri, 19 Jun 2026 20:37:52 -0700 Subject: [PATCH 03/11] feat(cfg): add formal-verification stack + RDE-validated verifier rules Builds Layers A-D on the CFG/SSA engine, all pure-TS, lazy, run once per scan: - dataflow framework (solveDataflow) + analyzeDefiniteAssignment (Layer A) - typestate protocol engine, verifyTypestate (Layer C) - bounded path-feasibility checker, isPathFeasible (Layer D), wired into typestate + definite-assignment to prune provably-infeasible counterexamples New rules consuming them: - no-use-before-define: sound lexical Temporal Dead Zone detection - no-stale-closure-capture: SSA-verified stale render-closure captures - no-unreleased-resource: inline effect-resource leak on some paths - no-dead-assignment, no-unreachable-code, no-set-state-in-render-loop RDE-validated against the OSS corpus and hardened: - no-use-before-define rewritten from definite-assignment to lexical TDZ (162 -> 0 false positives); fixes TS interface-member keys being recorded as references (scope-analysis) and treats class bodies as deferred scopes - no-unreleased-resource narrowed to React effect callbacks (useEffect/useLayoutEffect/useInsertionEffect + React.* member form), dropping class-lifecycle and non-React-framework noise (20 -> 0) - shared node-start util dedups the structural byte-offset read --- .../feat-cfg-formal-verification-stack.md | 17 + .changeset/feat-cfg-native-ssa.md | 9 + .changeset/feat-cfg-structural-parity.md | 7 + .changeset/feat-cfg-verifier-rules.md | 8 + packages/cfg/README.md | 195 ++- packages/cfg/src/analysis/block-edges.ts | 11 + packages/cfg/src/analysis/defs-uses.ts | 167 +++ packages/cfg/src/analysis/dominators.ts | 123 ++ .../src/analysis/eliminate-redundant-phi.ts | 68 ++ packages/cfg/src/analysis/enter-ssa.ts | 160 +++ .../cfg/src/analysis/enumerate-functions.ts | 19 + .../src/analysis/lexical-binding-resolver.ts | 163 +++ packages/cfg/src/analysis/loops.ts | 30 + packages/cfg/src/analysis/node-order.ts | 22 + packages/cfg/src/analysis/places-by-block.ts | 35 + packages/cfg/src/analysis/reachability.ts | 37 + .../cfg/src/analysis/reverse-postorder.ts | 33 + packages/cfg/src/analysis/unconditional.ts | 54 + packages/cfg/src/ast/for-each-child-node.ts | 19 + packages/cfg/src/build/build-expression.ts | 190 +++ packages/cfg/src/build/build-function-cfg.ts | 60 + packages/cfg/src/build/build-statement.ts | 434 +++++++ packages/cfg/src/build/cfg-builder.ts | 88 ++ packages/cfg/src/constant-condition.ts | 57 + packages/cfg/src/constants.ts | 16 + packages/cfg/src/control-flow-graph.ts | 1058 +---------------- .../cfg/src/dataflow/definite-assignment.ts | 165 +++ packages/cfg/src/dataflow/lattice.ts | 12 + packages/cfg/src/dataflow/solve.ts | 80 ++ packages/cfg/src/dot.ts | 76 ++ packages/cfg/src/index.ts | 45 +- packages/cfg/src/ir/basic-block.ts | 83 ++ packages/cfg/src/ir/instruction.ts | 20 + packages/cfg/src/ir/place.ts | 48 + packages/cfg/src/ir/terminal.ts | 55 + packages/cfg/src/path/enumerate-paths.ts | 67 ++ packages/cfg/src/path/feasibility.ts | 106 ++ packages/cfg/src/path/literal-facts.ts | 79 ++ packages/cfg/src/path/path-condition.ts | 99 ++ packages/cfg/src/path/prune-infeasible.ts | 27 + packages/cfg/src/path/ssa-value-atom.ts | 15 + packages/cfg/src/ssa.ts | 135 +++ packages/cfg/src/typestate/automaton.ts | 18 + .../cfg/src/typestate/feasibility-refiner.ts | 108 ++ packages/cfg/src/typestate/verify.ts | 250 ++++ ...ontrol-flow-graph.eslint-code-path.test.ts | 56 + .../control-flow-graph.loops-dot.test.ts | 110 ++ ...trol-flow-graph.oxc-no-fallthrough.test.ts | 46 + ...l-flow-graph.oxc-no-unsafe-finally.test.ts | 41 + .../control-flow-graph.react-compiler.test.ts | 73 ++ ...trol-flow-graph.returns-every-path.test.ts | 44 + .../control-flow-graph.terminal-shape.test.ts | 60 + packages/cfg/tests/dataflow.test.ts | 210 ++++ packages/cfg/tests/path-feasibility.test.ts | 171 +++ packages/cfg/tests/run-dataflow.ts | 76 ++ packages/cfg/tests/run-ssa.ts | 170 +++ packages/cfg/tests/run-typestate.ts | 114 ++ packages/cfg/tests/ssa.test.ts | 190 +++ packages/cfg/tests/typestate.test.ts | 176 +++ .../src/plugin/rule-registry.ts | 75 ++ .../correctness/no-dead-assignment.test.ts | 137 +++ .../rules/correctness/no-dead-assignment.ts | 63 + .../correctness/no-unreachable-code.test.ts | 234 ++++ .../rules/correctness/no-unreachable-code.ts | 92 ++ .../correctness/no-use-before-define.test.ts | 169 +++ .../rules/correctness/no-use-before-define.ts | 81 ++ .../security-scan/postmessage-origin-risk.ts | 11 +- .../no-set-state-in-render-loop.test.ts | 150 +++ .../no-set-state-in-render-loop.ts | 80 ++ .../no-stale-closure-capture.test.ts | 95 ++ .../no-stale-closure-capture.ts | 62 + .../no-unreleased-resource.test.ts | 181 +++ .../no-unreleased-resource.ts | 261 ++++ .../src/plugin/semantic/scope-analysis.ts | 6 + .../plugin/utils/is-captured-by-closure.ts | 32 + .../src/plugin/utils/node-start.ts | 7 + .../src/plugin/utils/rule-context.ts | 27 +- .../utils/wrap-with-semantic-context.ts | 91 +- .../src/test-utils/run-rule.ts | 20 +- 79 files changed, 6956 insertions(+), 1023 deletions(-) create mode 100644 .changeset/feat-cfg-formal-verification-stack.md create mode 100644 .changeset/feat-cfg-native-ssa.md create mode 100644 .changeset/feat-cfg-structural-parity.md create mode 100644 .changeset/feat-cfg-verifier-rules.md create mode 100644 packages/cfg/src/analysis/block-edges.ts create mode 100644 packages/cfg/src/analysis/defs-uses.ts create mode 100644 packages/cfg/src/analysis/dominators.ts create mode 100644 packages/cfg/src/analysis/eliminate-redundant-phi.ts create mode 100644 packages/cfg/src/analysis/enter-ssa.ts create mode 100644 packages/cfg/src/analysis/enumerate-functions.ts create mode 100644 packages/cfg/src/analysis/lexical-binding-resolver.ts create mode 100644 packages/cfg/src/analysis/loops.ts create mode 100644 packages/cfg/src/analysis/node-order.ts create mode 100644 packages/cfg/src/analysis/places-by-block.ts create mode 100644 packages/cfg/src/analysis/reachability.ts create mode 100644 packages/cfg/src/analysis/reverse-postorder.ts create mode 100644 packages/cfg/src/analysis/unconditional.ts create mode 100644 packages/cfg/src/ast/for-each-child-node.ts create mode 100644 packages/cfg/src/build/build-expression.ts create mode 100644 packages/cfg/src/build/build-function-cfg.ts create mode 100644 packages/cfg/src/build/build-statement.ts create mode 100644 packages/cfg/src/build/cfg-builder.ts create mode 100644 packages/cfg/src/constant-condition.ts create mode 100644 packages/cfg/src/constants.ts create mode 100644 packages/cfg/src/dataflow/definite-assignment.ts create mode 100644 packages/cfg/src/dataflow/lattice.ts create mode 100644 packages/cfg/src/dataflow/solve.ts create mode 100644 packages/cfg/src/dot.ts create mode 100644 packages/cfg/src/ir/basic-block.ts create mode 100644 packages/cfg/src/ir/instruction.ts create mode 100644 packages/cfg/src/ir/place.ts create mode 100644 packages/cfg/src/ir/terminal.ts create mode 100644 packages/cfg/src/path/enumerate-paths.ts create mode 100644 packages/cfg/src/path/feasibility.ts create mode 100644 packages/cfg/src/path/literal-facts.ts create mode 100644 packages/cfg/src/path/path-condition.ts create mode 100644 packages/cfg/src/path/prune-infeasible.ts create mode 100644 packages/cfg/src/path/ssa-value-atom.ts create mode 100644 packages/cfg/src/ssa.ts create mode 100644 packages/cfg/src/typestate/automaton.ts create mode 100644 packages/cfg/src/typestate/feasibility-refiner.ts create mode 100644 packages/cfg/src/typestate/verify.ts create mode 100644 packages/cfg/tests/control-flow-graph.eslint-code-path.test.ts create mode 100644 packages/cfg/tests/control-flow-graph.loops-dot.test.ts create mode 100644 packages/cfg/tests/control-flow-graph.oxc-no-fallthrough.test.ts create mode 100644 packages/cfg/tests/control-flow-graph.oxc-no-unsafe-finally.test.ts create mode 100644 packages/cfg/tests/control-flow-graph.react-compiler.test.ts create mode 100644 packages/cfg/tests/control-flow-graph.returns-every-path.test.ts create mode 100644 packages/cfg/tests/control-flow-graph.terminal-shape.test.ts create mode 100644 packages/cfg/tests/dataflow.test.ts create mode 100644 packages/cfg/tests/path-feasibility.test.ts create mode 100644 packages/cfg/tests/run-dataflow.ts create mode 100644 packages/cfg/tests/run-ssa.ts create mode 100644 packages/cfg/tests/run-typestate.ts create mode 100644 packages/cfg/tests/ssa.test.ts create mode 100644 packages/cfg/tests/typestate.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/correctness/no-dead-assignment.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/correctness/no-dead-assignment.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/correctness/no-unreachable-code.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/correctness/no-unreachable-code.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/correctness/no-use-before-define.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/correctness/no-use-before-define.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/state-and-effects/no-set-state-in-render-loop.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/state-and-effects/no-set-state-in-render-loop.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/state-and-effects/no-stale-closure-capture.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/state-and-effects/no-stale-closure-capture.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/state-and-effects/no-unreleased-resource.test.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/rules/state-and-effects/no-unreleased-resource.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/utils/is-captured-by-closure.ts create mode 100644 packages/oxlint-plugin-react-doctor/src/plugin/utils/node-start.ts diff --git a/.changeset/feat-cfg-formal-verification-stack.md b/.changeset/feat-cfg-formal-verification-stack.md new file mode 100644 index 000000000..1b58c0d78 --- /dev/null +++ b/.changeset/feat-cfg-formal-verification-stack.md @@ -0,0 +1,17 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Add a formal-verification stack to the control-flow graph and three path-sensitive rules. + +`@react-doctor/cfg` gains four layers on top of its CFG/SSA engine, all pure-TS, bundled at build time, lazy (a rule that never reads a layer pays nothing), and run once per scan: + +- **Dataflow framework** — `solveDataflow`, a generic monotone worklist fixpoint over a `Lattice` (one solver subsumes many analyses), and `analyzeDefiniteAssignment` built on it: a forward must-analysis over the SSA occurrence stream answering _is this read reached unassigned on some path?_ (a `declare` like `let x;` is neither read nor write, so a bare declaration never counts as an assignment). +- **Typestate engine** — `verifyTypestate(cfg, { automaton, classifier })` generalizes resource-protocol checking into a reusable automaton verified over the CFG, reporting error transitions (an illegal event) and leaked resources (a resource left non-accepting on a normal-completion path). Events are attributed to their real block and deduplicated, so the whole-body implicit-return never double-counts a call. +- **Path feasibility** — a bounded, dependency-free checker (`isPathFeasible` + `lowerGuard` / `pathConditionFacts`) that lowers a path's branch guards into facts over SSA values and refutes correlated-branch counterexamples via union-find congruence closure. It only ever _suppresses_ a diagnostic when the path search is complete and every counterexample is provably infeasible (e.g. `if (x) open(); … if (x) close();`), so it strictly removes false positives and is never unsound for bug-finding. + +Three new rules consume them: + +- `correctness/no-use-before-define` — a block-scoped binding (`let` / `const` / `class` / `using`) used lexically before its declaration runs, in the same synchronous execution, which always throws a `ReferenceError` from the Temporal Dead Zone. Sound by construction: quiet for hoisted `var` / function declarations, params, globals, and any access nested in a closure or class body that may run after the declaration. A declared-but-unassigned `let` read (`let x; if (c) x = 1; use(x)`) is `undefined`, not a TDZ crash, so it is deliberately not reported. +- `state-and-effects/no-stale-closure-capture` — a render-phase closure (a hook callback or handler) that captures a `let` binding reassigned later in the same render, so the closure sees a stale value. Quiet for `const` and bindings never reassigned after capture. +- `state-and-effects/no-unreleased-resource` — a resource opened inside a React effect callback (timer, subscription, event listener, `AbortController`) and released INLINE on some paths but leaked on an early return. Scoped to `useEffect` / `useLayoutEffect` / `useInsertionEffect` (including the namespaced `React.useEffect` form): the returned-cleanup contract stays owned by `effect-cleanup-not-on-every-path`, a `finally`-based release counts as run-on-every-path, and non-effect functions (class lifecycle methods, non-React frameworks like Solid's `createEffect`/`onCleanup`) are left alone. diff --git a/.changeset/feat-cfg-native-ssa.md b/.changeset/feat-cfg-native-ssa.md new file mode 100644 index 000000000..556816308 --- /dev/null +++ b/.changeset/feat-cfg-native-ssa.md @@ -0,0 +1,9 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Add native SSA to the control-flow graph and a path-sensitive dead-assignment rule. + +`@react-doctor/cfg` now builds variable-level **static single assignment** form over its oxc-native CFG via the Braun, Buchwald, Hack et al. (2013) on-the-fly sealed-block algorithm — the same algorithm the React Compiler's `EnterSSA` implements — followed by their redundant-φ elimination pass. It is a clean-room port (no Babel, MIT attribution): a minimal value model (`SsaIdentifier` / `Place` / `Phi`), per-instruction read/write extraction, a self-contained lexical binding resolver with an injectable seam (the plugin feeds in its own scope analyzer's binding identities), and an `analyzeSsa` query API (`versionAt`, `reachingDefinition`, `isLiveValue`, `isRedefinedBetween`, `bindingOf`, per-function φ + def blocks). The parity suite asserts the Braun φ placement equals the iterated dominance frontier of each binding's definitions (Cytron et al.), and `toDot` renders φ-functions. + +New `no-dead-assignment` rule uses it: it flags a write to a reassignable local whose value is never read because every path overwrites it first (`let total = expensive(); total = cheap(); return total;`). This is a value-flow question pure control flow can't answer — it complements `no-unused-vars` (which only sees wholly-unused bindings) and stays quiet for `const`, compound assignments, closure-captured bindings, and any write whose value is read on some path. diff --git a/.changeset/feat-cfg-structural-parity.md b/.changeset/feat-cfg-structural-parity.md new file mode 100644 index 000000000..5d7a9dbd7 --- /dev/null +++ b/.changeset/feat-cfg-structural-parity.md @@ -0,0 +1,7 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Upgrade `@react-doctor/cfg` to a full structural control-flow graph. + +Each basic block is now a typed instruction list ending in a first-class `Terminal` modeled on the React Compiler HIR taxonomy (`goto` / `if` / `switch` / loops / `logical` / `ternary` / `optional` / `try` / `return` / `throw`), with `fallthrough` join blocks and explicit `goto` lowering of `break` / `continue`. Dominance now uses the Cooper–Harvey–Kennedy immediate-dominator tree over reverse-postorder (plus the Cytron dominance frontier as the SSA seam). New analysis surface: `dominanceFrontier`, `isInfiniteLoopStart` (oxc-parity constant folding), and a Graphviz `toDot` export. The builder is split into `ir/` + `build/` + `analysis/` modules, and curated parity corpora from oxc (`no-fallthrough`, `no-unsafe-finally`, `getter-return`), ESLint code-path analysis, and React Compiler `BuildHIR` are ported as tests. The published plugin behavior is unchanged (all rule tests pass); this is an internal engine upgrade bundled at build time. diff --git a/.changeset/feat-cfg-verifier-rules.md b/.changeset/feat-cfg-verifier-rules.md new file mode 100644 index 000000000..73bf429c5 --- /dev/null +++ b/.changeset/feat-cfg-verifier-rules.md @@ -0,0 +1,8 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Add 2 new rules that use the structural control-flow graph as a verifier: + +- `no-unreachable-code` (Bugs): flags code that never runs because every path above it returns, throws, breaks, continues, or loops forever (via the CFG's `isUnreachable`). Hoisted function declarations, type-only TS declarations, and a bare `var x;` are left alone, matching ESLint's `no-unreachable` carve-outs. Global rule (runs on all JS/TS), so the defensive trailing `throw` after a switch whose every case returns is reported as dead code, consistent with `no-unreachable`. +- `no-set-state-in-render-loop` (Bugs): flags a `useState` setter called inside a render-phase loop (via the CFG's `isInsideLoop`), which fires every iteration and restarts rendering ("Too many re-renders"). Complements `no-set-state-in-render`, which only catches setters that run unconditionally; the two partition cleanly on `isUnconditionalFromEntry`, so an unconditional `for (;;)` / `while (true)` setter is owned by `no-set-state-in-render` and never double-reported. Setters in `.map()` / event-handler / effect callbacks (separate functions) stay quiet. diff --git a/packages/cfg/README.md b/packages/cfg/README.md index 8b5442271..496cde4ee 100644 --- a/packages/cfg/README.md +++ b/packages/cfg/README.md @@ -22,20 +22,167 @@ const cfg = analyzeControlFlow(programRoot); // ControlFlowAnalysis `analyzeControlFlow(program)` lazily builds one graph per function it encounters and returns a `ControlFlowAnalysis`: -| Method | Question it answers | -| ------------------------------------- | ------------------------------------------------------------------------------- | -| `isUnconditionalFromEntry(node)` | Does `node` run on **every** path from its function's entry to its exit? | -| `isReachable(fromNode, toNode)` | Can control flow from `fromNode` to `toNode` within the same function? | -| `dominates(aNode, bNode)` | Does `aNode` run on every path that reaches `bNode` (a guard before a sink)? | -| `postDominates(bNode, aNode)` | Does `bNode` run on every path from `aNode` to exit (cleanup after a resource)? | -| `isInsideLoop(node)` | Is `node`'s block part of a cycle in its own function's CFG? | -| `isUnreachable(node)` | Is `node`'s block dead code (after an unconditional return / throw / break)? | -| `cfgFor(fn)` / `enclosingFunction(n)` | The raw `FunctionCfg` (blocks + edges) / the function a node belongs to. | +| Method | Question it answers | +| ------------------------------------- | --------------------------------------------------------------------------------- | +| `isUnconditionalFromEntry(node)` | Does `node` run on **every** path from its function's entry to its exit? | +| `isReachable(fromNode, toNode)` | Can control flow from `fromNode` to `toNode` within the same function? | +| `dominates(aNode, bNode)` | Does `aNode` run on every path that reaches `bNode` (a guard before a sink)? | +| `postDominates(bNode, aNode)` | Does `bNode` run on every path from `aNode` to exit (cleanup after a resource)? | +| `isInsideLoop(node)` | Is `node`'s block part of a cycle in its own function's CFG? | +| `isUnreachable(node)` | Is `node`'s block dead code (after an unconditional return / throw / break)? | +| `dominanceFrontier(node)` | The dominance frontier of `node`'s block (Cytron et al.) — the SSA seam. | +| `isInfiniteLoopStart(node)` | Is the loop's test a compile-time truthy constant (oxc `is_infinite_loop_start`)? | +| `toDot(fn)` | Graphviz DOT of the function's CFG (debugging / parity snapshots). | +| `cfgFor(fn)` / `enclosingFunction(n)` | The raw `FunctionCfg` (blocks + edges) / the function a node belongs to. | Each function boundary (`function` declaration / expression, arrow) gets its own acyclic-except-for-loops graph; a callback that escapes a loop is **not** inside that loop because it is a separate function. +## Internal model + +Each `BasicBlock` is a typed `Instruction[]` (oxc's `InstructionKind`: +`statement` / `condition` / `iteration` / `return` / `implicit-return` / +`throw` / `break` / `continue`) ending in a first-class `Terminal`. The +`Terminal` union mirrors the React Compiler HIR taxonomy (`HIR/HIR.ts`): +`goto` / `if` / `switch` / `while` / `do-while` / `for` / `for-in` / `for-of` / +`logical` / `ternary` / `optional` / `try` / `return` / `throw` / +`unreachable`. Branching terminals carry a `fallthrough` join block (the +compiler's `TerminalWithFallthrough`), and `break` / `continue` are lowered to +explicit `goto` terminals at their resolved targets. + +Dominators and post-dominators use the **Cooper–Harvey–Kennedy** "A Simple, +Fast Dominance Algorithm" immediate-dominator tree over reverse-postorder (the +same algorithm the React Compiler uses), and we also compute the dominance +frontier (Cytron et al.) — both the public `dominanceFrontier` seam and the +verification oracle for the SSA layer below. + +## SSA (`analyzeSsa`) + +Variable-level **static single assignment** form over the same CFG, so a rule +can ask value-flow questions — _which definition reaches this use?_, _is this +write dead?_, _is this binding reassigned between two points?_ — that pure +control flow can't answer. + +```ts +import { analyzeSsa } from "@react-doctor/cfg"; + +// Self-contained: a built-in lexical resolver assigns binding identities. +const ssa = analyzeSsa(programRoot); + +// Or inject a host scope analyzer's binding ids (the oxlint plugin does this): +const ssa = analyzeSsa(programRoot, (idNode) => scopes.symbolFor(idNode)?.id ?? null); +``` + +| Method | Question it answers | +| --------------------------------------- | ---------------------------------------------------------------- | +| `versionAt(node)` | The SSA value read or written at an identifier node. | +| `reachingDefinition(useNode)` | The SSA value that flows into a use (its reaching def). | +| `isLiveValue(identifier)` | Is this value ever read (directly or through a live φ)? | +| `isRedefinedBetween(from, to, binding)` | Is `binding` written on a path between two nodes? | +| `bindingOf(node)` / `ssaFor(fn)` | The binding an identifier denotes / per-function φ + def blocks. | + +Construction is the **Braun, Buchwald, Hack et al. (2013)** on-the-fly +sealed-block algorithm — the same algorithm the React Compiler's `EnterSSA` +implements — followed by their `EliminateRedundantPhi` fixpoint pass. It needs +only `BasicBlock.predecessors`, per-block read/write occurrences, and a version +counter; no dominator tree. The dominance frontier is used purely as the test +oracle: minimal-SSA φ placement equals the iterated dominance frontier of each +binding's definitions (Cytron et al.), and the parity suite asserts exactly +that. Scope is variable-level (no field-level / `ObjectShape` SSA, no type +inference, no out-of-SSA `LeaveSSA`); a binding read inside a nested function is +a closure capture the per-function form leaves opaque. + +The algorithm is a clean-room port of the **MIT-licensed** React Compiler +SSA (`babel-plugin-react-compiler/src/SSA`), carrying no Babel dependency — +attribution only. + +### Source layout + +- `src/ir/` — the data model (`instruction.ts`, `terminal.ts`, `basic-block.ts`). +- `src/build/` — lowering (`cfg-builder.ts`, `build-expression.ts`, + `build-statement.ts`, `build-function-cfg.ts`). +- `src/analysis/` — `reverse-postorder.ts`, `dominators.ts` (forward + + post-dominator trees + frontier), `reachability.ts`, `unconditional.ts`, + `loops.ts`, `node-order.ts`, `block-edges.ts`. SSA: `defs-uses.ts` + (occurrence extraction), `lexical-binding-resolver.ts` (built-in resolver), + `enter-ssa.ts` (Braun construction), `eliminate-redundant-phi.ts`. +- `src/ir/place.ts` — the SSA value model (`SsaIdentifier` / `Place` / `Phi`). +- `src/dot.ts` — Graphviz export (renders φ-functions). `src/constant-condition.ts` + — the infinite-loop constant folder. `src/control-flow-graph.ts` — assembles + `analyzeControlFlow`; `src/ssa.ts` — assembles `analyzeSsa`. + +## Formal-verification stack + +Four layers build on the CFG/SSA above to answer _bug-finding_ questions +soundly. Everything is pure-TS, bundled at build time, lazy (a rule that never +reads a layer pays nothing), and runs once per scan — never in a hot loop. + +### Dataflow framework (`solveDataflow`, Layer A) + +A generic monotone worklist fixpoint over the CFG. Give it a `Lattice` +(`bottom` / `join` / `equals`), a `direction`, a `boundary` fact, and a +`transfer(block, inFact)`; it iterates reverse-postorder (forward) or its +reverse (backward) to a fixpoint and returns per-block entry/exit facts. One +solver subsumes many analyses. + +```ts +import { solveDataflow, analyzeDefiniteAssignment } from "@react-doctor/cfg"; +``` + +`analyzeDefiniteAssignment(program, resolveBinding?, { resolveValue? })` is the +first analysis built on it: a forward _must_ analysis (set-intersection at +joins) over the SSA occurrence stream. `isMaybeUnassignedAt(node)` answers +_does some entry→read path reach this read with no prior write?_ — the signal a +TDZ / read-before-write rule keys off. A `declare` occurrence (`let x;`) is +neither a read nor a write, so a bare declaration never counts as an assignment. + +### Typestate protocol engine (`verifyTypestate`, Layer C) + +Generalizes resource-protocol checking (e.g. the hand-rolled +effect-cleanup leak rule) into a reusable automaton verified over the CFG. + +```ts +import { verifyTypestate } from "@react-doctor/cfg"; + +verifyTypestate(cfg, { automaton, classifier, resolveValue? }); +``` + +A `TypestateAutomaton` is `{ initial, transition(state, event), errorStates, +acceptingStates }`; the `classifier` maps each instruction node to the protocol +events (`{ resource, event, node }`) in its subtree. Built on `solveDataflow` +(fact = each resource's set of possible states, joined by union), it reports two +failure modes: an **error transition** (an illegal event drove a resource into +an error state) and a **leaked resource** (a resource resting in a non-accepting +state on a normal-completion path — the exit joined over non-`throw` +predecessors). Events are attributed to the block they actually execute in +(`cfg.blockOf`) and deduplicated by node, so the whole-body `implicit-return` +instruction never double-counts a call. + +### Path feasibility (`isPathFeasible`, Layer D) + +A bounded, dependency-free consistency checker that refines B/C by **pruning +infeasible counterexample paths**. `lowerGuard` / `pathConditionFacts` lower a +path's branch guards (`if` / `&&` / `||` / `!` / equality) into facts over SSA +values (keyed by `versionAt`, so the _same_ value at two branches is one atom). +`isPathFeasible(facts)` runs a union-find congruence closure plus truthiness / +disequality constraints and returns `feasible` / `infeasible` / `unknown` +(`unknown` past the caps in `constants.ts`). + +The integration is deliberately one-directional: a diagnostic is suppressed +**only** when the path search is complete and _every_ counterexample is provably +`infeasible`. Any `feasible` / `unknown` counterexample, or an incomplete search, +leaves the diagnostic standing — so Layer D only ever removes false positives +(e.g. `if (x) open(); … if (x) close();` — the open-without-close path needs `x` +truthy and falsy at once) and is never unsound for bug-finding. Opt in by +passing `resolveValue` to `analyzeDefiniteAssignment` / `verifyTypestate`. + +Source: `src/dataflow/` (`lattice.ts`, `solve.ts`, `definite-assignment.ts`), +`src/typestate/` (`automaton.ts`, `verify.ts`), `src/path/` +(`literal-facts.ts`, `path-condition.ts`, `feasibility.ts`, +`enumerate-paths.ts`, `prune-infeasible.ts`). Tested by `tests/dataflow.test.ts`, +`tests/typestate.test.ts`, `tests/path-feasibility.test.ts`. + ## What it models Statement-level terminals: `if` / `switch` / `for` / `for-in` / `for-of` / @@ -59,16 +206,32 @@ expressions. Deliberately **not** modeled: per-instruction "maybe-throw" edges (every call can throw); `var` / function-declaration hoisting as a reachability fact (that is a rule policy, not a CFG fact). Both are documented at the top of -`src/control-flow-graph.ts`. +`src/ir/basic-block.ts`. ## Tests & fixture provenance -- `tests/control-flow-graph.oxc-no-unreachable.test.ts` — a port of oxc's +Parity is the deliverable: curated slices of three upstream suites, asserted +through the primitives above (and terminal-shape snapshots), so we can claim we +replicate oxc / ESLint / React Compiler CFG semantics. + +- `tests/control-flow-graph.oxc-no-unreachable.test.ts` — full port of oxc's `eslint/no-unreachable` `pass` / `fail` corpus - (`crates/oxc_linter/src/rules/eslint/no_unreachable.rs`), asserted directly - against `isUnreachable`. Each upstream case is rewritten so the statement oxc - flags becomes a `dead()` marker (must be unreachable) or a `live()` marker - (must be reachable). + (`crates/oxc_linter/src/rules/eslint/no_unreachable.rs`), via `isUnreachable` + (`dead()` / `live()` markers). +- `tests/control-flow-graph.oxc-no-fallthrough.test.ts` — oxc's + `eslint/no_fallthrough.rs`, as switch-case `isReachable` facts. +- `tests/control-flow-graph.oxc-no-unsafe-finally.test.ts` — oxc's + `eslint/no_unsafe_finally.rs`: an abrupt `finally` swallows normal completion. +- `tests/control-flow-graph.returns-every-path.test.ts` — oxc's + `eslint/getter_return.rs` / `consistent-return` post-dominance shapes. +- `tests/control-flow-graph.eslint-code-path.test.ts` — representative ESLint + code-path-analysis segment reachability (`no-unreachable`, `consistent-return`). +- `tests/control-flow-graph.react-compiler.test.ts` — React Compiler `BuildHIR` + control-flow shapes (if / switch / loops / try / logical / ternary / optional). +- `tests/control-flow-graph.terminal-shape.test.ts` — each construct lowers to + its React Compiler HIR `Terminal` kind. +- `tests/control-flow-graph.loops-dot.test.ts` — `isInfiniteLoopStart` const + folding + a DOT export snapshot. - `tests/control-flow-graph.try-finally.test.ts` — `try` / `catch` / `finally` normal-completion edges. - `tests/control-flow-graph.expression-flow.test.ts` — the expression-level @@ -76,5 +239,7 @@ is a rule policy, not a CFG fact). Both are documented at the top of - `tests/control-flow-graph.regression.test.ts` — React-shaped regressions (conditional hooks, `setState` in a branch). - `tests/control-flow-graph.test.ts` — core graph construction. +- `tests/ssa.test.ts` — SSA φ-placement parity vs. the iterated dominance + frontier oracle, value queries, and the φ DOT rendering. Run `pnpm --filter @react-doctor/cfg test`. diff --git a/packages/cfg/src/analysis/block-edges.ts b/packages/cfg/src/analysis/block-edges.ts new file mode 100644 index 000000000..4a8a825bb --- /dev/null +++ b/packages/cfg/src/analysis/block-edges.ts @@ -0,0 +1,11 @@ +import type { BasicBlock } from "../ir/basic-block.js"; + +// The successor blocks of `block`, dropping edge kinds. The forward +// relation for RPO / dominator walks. +export const successorBlocks = (block: BasicBlock): BasicBlock[] => + block.successors.map((edge) => edge.to); + +// The predecessor blocks of `block`. The reverse relation, and the input +// to SSA φ construction. +export const predecessorBlocks = (block: BasicBlock): BasicBlock[] => + block.predecessors.map((edge) => edge.from); diff --git a/packages/cfg/src/analysis/defs-uses.ts b/packages/cfg/src/analysis/defs-uses.ts new file mode 100644 index 000000000..0d8ad817e --- /dev/null +++ b/packages/cfg/src/analysis/defs-uses.ts @@ -0,0 +1,167 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { Place, ResolveBinding } from "../ir/place.js"; + +// Lower an ESTree subtree into the ordered list of binding reads/writes +// (`Place`s) the SSA builder consumes — the variable-level analogue of the +// React Compiler's `BuildHIR` operand/lvalue extraction, minus field-level +// granularity. We walk in evaluation order so a block's occurrence list +// matches the order the Braun renamer must see (`x = x + 1` reads the old +// `x` before writing the new one). Nested functions are skipped: each owns +// its own CFG and SSA. + +interface PlaceEmitter { + (place: Place): void; +} + +const emitIdentifier = ( + node: EsTreeNode, + kind: "read" | "write" | "declare", + resolveBinding: ResolveBinding, + emit: PlaceEmitter, +): void => { + if (!isNodeOfType(node, "Identifier")) return; + const binding = resolveBinding(node); + if (binding === null) return; + emit({ binding, name: node.name, kind, node }); +}; + +// A binding *target* (assignment lhs, declarator id, for-in/of left): an +// Identifier is a write; a member access writes a field we don't model, so +// its object is merely read; destructuring patterns recurse. +const walkWriteTarget = ( + node: EsTreeNode, + resolveBinding: ResolveBinding, + emit: PlaceEmitter, +): void => { + if (isNodeOfType(node, "Identifier")) { + emitIdentifier(node, "write", resolveBinding, emit); + return; + } + if (isNodeOfType(node, "ObjectPattern")) { + for (const property of node.properties) { + if (isNodeOfType(property, "RestElement")) { + walkWriteTarget(property.argument as EsTreeNode, resolveBinding, emit); + continue; + } + if (property.computed) walkReads(property.key as EsTreeNode, resolveBinding, emit); + walkWriteTarget(property.value as EsTreeNode, resolveBinding, emit); + } + return; + } + if (isNodeOfType(node, "ArrayPattern")) { + for (const element of node.elements) { + if (element) walkWriteTarget(element as EsTreeNode, resolveBinding, emit); + } + return; + } + if (isNodeOfType(node, "AssignmentPattern")) { + walkReads(node.right as EsTreeNode, resolveBinding, emit); + walkWriteTarget(node.left as EsTreeNode, resolveBinding, emit); + return; + } + if (isNodeOfType(node, "RestElement")) { + walkWriteTarget(node.argument as EsTreeNode, resolveBinding, emit); + return; + } + // `obj.x = …` / anything else: the target itself is read, not a binding write. + walkReads(node, resolveBinding, emit); +}; + +// Read occurrences in evaluation order. The default branch threads children +// left-to-right (source order ≈ evaluation order for the constructs SSA +// reasons about); the explicit cases fix the spots where they diverge. +const walkReads = (node: EsTreeNode, resolveBinding: ResolveBinding, emit: PlaceEmitter): void => { + if (isFunctionLike(node)) { + // A function declaration binds its own name; its body has its own SSA. + if (isNodeOfType(node, "FunctionDeclaration") && node.id) { + emitIdentifier(node.id as EsTreeNode, "write", resolveBinding, emit); + } + return; + } + + if (isNodeOfType(node, "Identifier")) { + emitIdentifier(node, "read", resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "VariableDeclaration")) { + for (const declarator of node.declarations) { + if (declarator.init) { + walkReads(declarator.init as EsTreeNode, resolveBinding, emit); + walkWriteTarget(declarator.id as EsTreeNode, resolveBinding, emit); + continue; + } + // `let x;` / `var x;` — a binding declared without a value. Only a + // bare Identifier can lack an initializer (init-less patterns are a + // syntax error), so this is the binding's declaration, not a store. + emitIdentifier(declarator.id as EsTreeNode, "declare", resolveBinding, emit); + } + return; + } + + if (isNodeOfType(node, "AssignmentExpression")) { + // Compound assignment (`+=`) reads the lhs before the rhs; plain `=` + // evaluates the rhs first, then stores. + if (node.operator !== "=") walkReadTarget(node.left as EsTreeNode, resolveBinding, emit); + walkReads(node.right as EsTreeNode, resolveBinding, emit); + walkWriteTarget(node.left as EsTreeNode, resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "UpdateExpression")) { + walkReadTarget(node.argument as EsTreeNode, resolveBinding, emit); + walkWriteTarget(node.argument as EsTreeNode, resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "MemberExpression")) { + walkReads(node.object as EsTreeNode, resolveBinding, emit); + if (node.computed) walkReads(node.property as EsTreeNode, resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "Property")) { + if (node.computed) walkReads(node.key as EsTreeNode, resolveBinding, emit); + walkReads(node.value as EsTreeNode, resolveBinding, emit); + return; + } + + forEachChildNode(node, (child) => walkReads(child, resolveBinding, emit)); +}; + +// A binding target evaluated as a read (compound-assignment / update lhs). +const walkReadTarget = ( + node: EsTreeNode, + resolveBinding: ResolveBinding, + emit: PlaceEmitter, +): void => { + if (isNodeOfType(node, "Identifier")) { + emitIdentifier(node, "read", resolveBinding, emit); + return; + } + walkReads(node, resolveBinding, emit); +}; + +// Ordered read/write occurrences of resolvable bindings inside `node`, +// stopping at nested function boundaries. +export const collectPlaces = (node: EsTreeNode, resolveBinding: ResolveBinding): Place[] => { + const places: Place[] = []; + walkReads(node, resolveBinding, (place) => places.push(place)); + return places; +}; + +// Parameter bindings are written once, at the function entry, before any +// body instruction runs. Default values are reads evaluated at entry too. +export const collectParameterPlaces = ( + parameters: ReadonlyArray, + resolveBinding: ResolveBinding, +): Place[] => { + const places: Place[] = []; + for (const parameter of parameters) { + walkWriteTarget(parameter, resolveBinding, (place) => places.push(place)); + } + return places; +}; diff --git a/packages/cfg/src/analysis/dominators.ts b/packages/cfg/src/analysis/dominators.ts new file mode 100644 index 000000000..0ca3c8e17 --- /dev/null +++ b/packages/cfg/src/analysis/dominators.ts @@ -0,0 +1,123 @@ +import type { BasicBlock } from "../ir/basic-block.js"; +import { predecessorBlocks, successorBlocks } from "./block-edges.js"; +import { reversePostorder } from "./reverse-postorder.js"; + +export interface DominatorTree { + // Blocks reachable from the tree's root (entry for dominators, exit for + // post-dominators). Queries outside this set return false. + readonly reachable: ReadonlySet; + // The immediate dominator of `block` (the root maps to itself); null if + // `block` is unreachable from the root. + readonly immediateDominatorOf: (block: BasicBlock) => BasicBlock | null; + // `ancestor` dominates `node`: it lies on `node`'s idom chain (a node + // dominates itself). For a post-dominator tree this reads as "`ancestor` + // post-dominates `node`". + readonly dominates: (ancestor: BasicBlock, node: BasicBlock) => boolean; + // The dominance frontier of `block` (Cytron et al.): the blocks where + // `block`'s dominance stops. The SSA-construction seam; no consumer uses + // it yet, but it is a cheap, high-fidelity parity artifact. + readonly dominanceFrontierOf: (block: BasicBlock) => ReadonlySet; +} + +// Cooper–Harvey–Kennedy "A Simple, Fast Dominance Algorithm": iterate the +// idom array over reverse-postorder until it stabilizes. `successorsOf` +// drives the RPO walk from `root`; `predecessorsOf` feeds the intersection +// step. For a post-dominator tree, callers pass the reversed relations +// (root = exit, successors = CFG predecessors, predecessors = CFG +// successors). Same algorithm the React Compiler uses (`Dominator.ts`). +export const buildDominatorTree = ( + root: BasicBlock, + successorsOf: (block: BasicBlock) => ReadonlyArray, + predecessorsOf: (block: BasicBlock) => ReadonlyArray, +): DominatorTree => { + const order = reversePostorder(root, successorsOf); + const rpoNumber = new Map(); + order.forEach((block, index) => rpoNumber.set(block, index)); + + const idom = new Map(); + for (const block of order) idom.set(block, null); + idom.set(root, root); + + // Walk both fingers up the partially-built tree until they meet — the + // nearest common dominator of two already-processed blocks. + const intersect = (left: BasicBlock, right: BasicBlock): BasicBlock => { + let finger1 = left; + let finger2 = right; + while (finger1 !== finger2) { + while (rpoNumber.get(finger1)! > rpoNumber.get(finger2)!) finger1 = idom.get(finger1)!; + while (rpoNumber.get(finger2)! > rpoNumber.get(finger1)!) finger2 = idom.get(finger2)!; + } + return finger1; + }; + + let changed = true; + while (changed) { + changed = false; + for (const block of order) { + if (block === root) continue; + let newIdom: BasicBlock | null = null; + for (const predecessor of predecessorsOf(block)) { + // Skip predecessors not yet processed (or unreachable from root — + // dead code can't influence runtime dominance). + if (idom.get(predecessor) == null) continue; + newIdom = newIdom === null ? predecessor : intersect(predecessor, newIdom); + } + if (newIdom !== null && idom.get(block) !== newIdom) { + idom.set(block, newIdom); + changed = true; + } + } + } + + const reachable = new Set(order); + + const dominates = (ancestor: BasicBlock, node: BasicBlock): boolean => { + if (!reachable.has(ancestor) || !reachable.has(node)) return false; + let current: BasicBlock | null = node; + while (current !== null) { + if (current === ancestor) return true; + const next: BasicBlock | null = idom.get(current) ?? null; + if (next === current) return false; // reached the root + current = next; + } + return false; + }; + + // Cytron et al.: for every join block (≥2 reachable predecessors), each + // predecessor `runner` adds the join to its dominance frontier until it + // hits the join's immediate dominator. + const dominanceFrontier = new Map>(); + for (const block of order) dominanceFrontier.set(block, new Set()); + for (const block of order) { + const predecessors = predecessorsOf(block).filter((predecessor) => reachable.has(predecessor)); + if (predecessors.length < 2) continue; + const blockIdom = idom.get(block); + for (const predecessor of predecessors) { + let runner: BasicBlock | null = predecessor; + while (runner !== null && runner !== blockIdom) { + dominanceFrontier.get(runner)!.add(block); + const next: BasicBlock | null = idom.get(runner) ?? null; + if (next === runner) break; + runner = next; + } + } + } + + const emptyFrontier: ReadonlySet = new Set(); + + return { + reachable, + immediateDominatorOf: (block) => idom.get(block) ?? null, + dominates, + dominanceFrontierOf: (block) => dominanceFrontier.get(block) ?? emptyFrontier, + }; +}; + +// Forward dominator tree rooted at the function entry. +export const computeDominatorTree = (entry: BasicBlock): DominatorTree => + buildDominatorTree(entry, successorBlocks, predecessorBlocks); + +// Post-dominator tree: the dominator tree of the reversed graph rooted at +// the function exit. `tree.dominates(a, b)` then means "a post-dominates b". +export const computePostDominatorTree = (exit: BasicBlock): DominatorTree => + buildDominatorTree(exit, predecessorBlocks, successorBlocks); diff --git a/packages/cfg/src/analysis/eliminate-redundant-phi.ts b/packages/cfg/src/analysis/eliminate-redundant-phi.ts new file mode 100644 index 000000000..93cf90467 --- /dev/null +++ b/packages/cfg/src/analysis/eliminate-redundant-phi.ts @@ -0,0 +1,68 @@ +import type { FunctionCfg } from "../ir/basic-block.js"; +import type { Phi, SsaIdentifier } from "../ir/place.js"; +import type { SsaConstruction } from "./enter-ssa.js"; +import { successorBlocks } from "./block-edges.js"; +import { reversePostorder } from "./reverse-postorder.js"; + +// Strip the trivial φs the on-the-fly builder leaves behind — a φ whose +// operands are all the same value `v` (ignoring self-references) is just +// `v`. Ports the React Compiler's `EliminateRedundantPhi`: a reverse- +// postorder rewrite pass repeated to a fixpoint, since collapsing one φ can +// expose another. Mutates the CFG's `block.phis` and rewrites every +// recorded read/write occurrence through the resulting substitution. +export const eliminateRedundantPhis = (cfg: FunctionCfg, construction: SsaConstruction): void => { + const order = reversePostorder(cfg.entry, successorBlocks); + const rewrite = new Map(); + const removed = new Set(); + + const resolve = (identifier: SsaIdentifier): SsaIdentifier => { + let current = identifier; + while (rewrite.has(current)) current = rewrite.get(current)!; + return current; + }; + + let changed = true; + while (changed) { + changed = false; + for (const block of order) { + for (const phi of block.phis) { + if (removed.has(phi)) continue; + let unique: SsaIdentifier | null = null; + let redundant = true; + for (const operand of phi.operands.values()) { + const resolved = resolve(operand); + if (resolved === phi.identifier) continue; // self-reference: ignore + if (unique === null) { + unique = resolved; + } else if (unique !== resolved) { + redundant = false; + break; + } + } + if (redundant && unique !== null) { + rewrite.set(phi.identifier, unique); + removed.add(phi); + changed = true; + } + } + } + } + + for (const block of order) { + const kept = block.phis.filter((phi) => !removed.has(phi)); + block.phis.length = 0; + for (const phi of kept) { + for (const [predecessor, operand] of phi.operands) { + phi.operands.set(predecessor, resolve(operand)); + } + block.phis.push(phi); + } + } + + for (const [node, identifier] of construction.readIdentifierAt) { + construction.readIdentifierAt.set(node, resolve(identifier)); + } + for (const [node, identifier] of construction.writeIdentifierAt) { + construction.writeIdentifierAt.set(node, resolve(identifier)); + } +}; diff --git a/packages/cfg/src/analysis/enter-ssa.ts b/packages/cfg/src/analysis/enter-ssa.ts new file mode 100644 index 000000000..13803f05e --- /dev/null +++ b/packages/cfg/src/analysis/enter-ssa.ts @@ -0,0 +1,160 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; +import type { BindingId, Phi, Place, SsaIdentifier } from "../ir/place.js"; +import { successorBlocks } from "./block-edges.js"; +import { reversePostorder } from "./reverse-postorder.js"; + +export interface SsaConstruction { + // The SSA value flowing INTO each read occurrence (its reaching def). + readonly readIdentifierAt: Map; + // The SSA value DEFINED at each write occurrence. + readonly writeIdentifierAt: Map; + // Reachable blocks that write each binding — the φ-placement oracle input. + readonly defBlocks: Map>; +} + +// On-the-fly SSA construction via Braun, Buchwald, Hack et al. (2013), +// "Simple and Efficient Construction of Static Single Assignment Form" +// (the algorithm the React Compiler's `EnterSSA` also implements). It needs +// only `BasicBlock.predecessors`, the per-block read/write occurrences, and +// a version counter — no dominator tree. Loop headers are read before their +// back-edge predecessor is filled, so they receive *incomplete* φs that are +// completed when the header is sealed (all predecessors filled). +export const enterSsa = ( + cfg: FunctionCfg, + placesByBlock: ReadonlyMap>, +): SsaConstruction => { + const readIdentifierAt = new Map(); + const writeIdentifierAt = new Map(); + const defBlocks = new Map>(); + + // currentDef[binding][block] — the SSA value of `binding` at the end of + // `block` (or the in-progress phi result, to break read cycles). + const currentDef = new Map>(); + const sealed = new Set(); + const filled = new Set(); + const incompletePhis = new Map>(); + const versionCounter = new Map(); + + const newVersion = (binding: BindingId, name: string): SsaIdentifier => { + const version = versionCounter.get(binding) ?? 0; + versionCounter.set(binding, version + 1); + return { binding, version, name }; + }; + + const writeVariable = (binding: BindingId, block: BasicBlock, value: SsaIdentifier): void => { + let perBlock = currentDef.get(binding); + if (!perBlock) { + perBlock = new Map(); + currentDef.set(binding, perBlock); + } + perBlock.set(block, value); + }; + + const recordPhi = (block: BasicBlock, phi: Phi): void => { + block.phis.push(phi); + }; + + const addPhiOperands = (binding: BindingId, block: BasicBlock, phi: Phi, name: string): void => { + for (const edge of block.predecessors) { + phi.operands.set(edge.from, readVariable(binding, edge.from, name)); + } + }; + + const readVariableRecursive = ( + binding: BindingId, + block: BasicBlock, + name: string, + ): SsaIdentifier => { + if (!sealed.has(block)) { + const identifier = newVersion(binding, name); + const phi: Phi = { identifier, operands: new Map() }; + let perBlock = incompletePhis.get(block); + if (!perBlock) { + perBlock = new Map(); + incompletePhis.set(block, perBlock); + } + perBlock.set(binding, phi); + writeVariable(binding, block, identifier); + return identifier; + } + if (block.predecessors.length === 1) { + const value = readVariable(binding, block.predecessors[0]!.from, name); + writeVariable(binding, block, value); + return value; + } + if (block.predecessors.length === 0) { + // Use of an unwritten binding (globals, use-before-def): a fresh, + // operand-less version standing in for the undefined value. + const identifier = newVersion(binding, name); + writeVariable(binding, block, identifier); + return identifier; + } + const identifier = newVersion(binding, name); + const phi: Phi = { identifier, operands: new Map() }; + writeVariable(binding, block, identifier); // break cycles first + addPhiOperands(binding, block, phi, name); + recordPhi(block, phi); + return identifier; + }; + + const readVariable = (binding: BindingId, block: BasicBlock, name: string): SsaIdentifier => { + const local = currentDef.get(binding)?.get(block); + if (local) return local; + return readVariableRecursive(binding, block, name); + }; + + const sealBlock = (block: BasicBlock): void => { + const incomplete = incompletePhis.get(block); + if (incomplete) { + for (const [binding, phi] of incomplete) { + addPhiOperands(binding, block, phi, phi.identifier.name); + recordPhi(block, phi); + } + } + sealed.add(block); + }; + + const fillBlock = (block: BasicBlock): void => { + for (const place of placesByBlock.get(block) ?? []) { + if (place.kind === "read") { + readIdentifierAt.set(place.node, readVariable(place.binding, block, place.name)); + continue; + } + const identifier = newVersion(place.binding, place.name); + writeVariable(place.binding, block, identifier); + writeIdentifierAt.set(place.node, identifier); + let blocks = defBlocks.get(place.binding); + if (!blocks) { + blocks = new Set(); + defBlocks.set(place.binding, blocks); + } + blocks.add(block); + } + filled.add(block); + }; + + const order = reversePostorder(cfg.entry, successorBlocks); + const allPredecessorsFilled = (block: BasicBlock): boolean => + block.predecessors.every((edge) => filled.has(edge.from)); + + for (const block of order) { + if (allPredecessorsFilled(block)) sealBlock(block); + fillBlock(block); + } + // Seal the blocks left unsealed (loop headers, whose back-edge + // predecessor was filled only after the header). Repeat to a fixpoint: + // sealing a header can spawn incomplete φs on other unsealed blocks. + let progressed = true; + while (progressed) { + progressed = false; + for (const block of order) { + if (!sealed.has(block)) { + sealBlock(block); + progressed = true; + } + } + } + + return { readIdentifierAt, writeIdentifierAt, defBlocks }; +}; diff --git a/packages/cfg/src/analysis/enumerate-functions.ts b/packages/cfg/src/analysis/enumerate-functions.ts new file mode 100644 index 000000000..e005b0772 --- /dev/null +++ b/packages/cfg/src/analysis/enumerate-functions.ts @@ -0,0 +1,19 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; + +// Every CFG owner in a program: the `Program` itself (its top-level code is +// a graph) plus every nested function-like. The shared driver for the +// per-function passes (SSA, definite-assignment) so they agree on exactly +// which scopes get a CFG. +export const enumerateFunctions = (program: EsTreeNode): EsTreeNode[] => { + const functionNodes: EsTreeNode[] = []; + if (isNodeOfType(program, "Program")) functionNodes.push(program); + const collect = (node: EsTreeNode): void => { + if (isFunctionLike(node)) functionNodes.push(node); + forEachChildNode(node, collect); + }; + collect(program); + return functionNodes; +}; diff --git a/packages/cfg/src/analysis/lexical-binding-resolver.ts b/packages/cfg/src/analysis/lexical-binding-resolver.ts new file mode 100644 index 000000000..de3f284ef --- /dev/null +++ b/packages/cfg/src/analysis/lexical-binding-resolver.ts @@ -0,0 +1,163 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { BindingId, ResolveBinding } from "../ir/place.js"; + +interface LexicalScope { + readonly parent: LexicalScope | null; + // A function or the program — the target `var`/`function` declarations + // hoist to, regardless of the block they appear in. Block scopes are not. + readonly isHoistBoundary: boolean; + readonly bindings: Map; +} + +// The nearest enclosing hoist boundary (function or program). +const hoistScopeOf = (scope: LexicalScope): LexicalScope => { + let current = scope; + while (!current.isHoistBoundary && current.parent !== null) current = current.parent; + return current; +}; + +// A block-like node opens a fresh lexical scope for its `let`/`const`/class +// declarations. Function bodies open a scope through the function itself +// (which also holds the parameters), so the body BlockStatement nesting is +// harmless. +const opensBlockScope = (node: EsTreeNode): boolean => + isNodeOfType(node, "BlockStatement") || + isNodeOfType(node, "ForStatement") || + isNodeOfType(node, "ForInStatement") || + isNodeOfType(node, "ForOfStatement") || + isNodeOfType(node, "SwitchStatement") || + isNodeOfType(node, "CatchClause"); + +// A lightweight lexical scope/shadowing/hoisting resolver, self-contained +// so SSA is testable without a host. Declarations are registered while +// walking; resolution is deferred until the whole tree is seen, so forward +// references and hoisting (`x; var x;`) resolve by scope-chain lookup. The +// oxlint plugin can inject its richer `scope-analysis` resolver instead. +export const createLexicalBindingResolver = (program: EsTreeNode): ResolveBinding => { + let nextBindingId = 0; + const scopeOfIdentifier = new Map(); + + const declare = (scope: LexicalScope, name: string): void => { + if (!scope.bindings.has(name)) scope.bindings.set(name, nextBindingId++); + }; + + // Register the binding names a destructuring/parameter pattern introduces + // (default-value and computed-key expressions are handled by the generic + // child walk, so they are skipped here). + const declarePattern = (pattern: EsTreeNode, scope: LexicalScope): void => { + if (isNodeOfType(pattern, "Identifier")) { + declare(scope, pattern.name); + return; + } + if (isNodeOfType(pattern, "ObjectPattern")) { + for (const property of pattern.properties) { + if (isNodeOfType(property, "RestElement")) { + declarePattern(property.argument as EsTreeNode, scope); + } else { + declarePattern(property.value as EsTreeNode, scope); + } + } + return; + } + if (isNodeOfType(pattern, "ArrayPattern")) { + for (const element of pattern.elements) { + if (element) declarePattern(element as EsTreeNode, scope); + } + return; + } + if (isNodeOfType(pattern, "AssignmentPattern")) { + declarePattern(pattern.left as EsTreeNode, scope); + return; + } + if (isNodeOfType(pattern, "RestElement")) { + declarePattern(pattern.argument as EsTreeNode, scope); + } + }; + + const registerDeclarations = (node: EsTreeNode, scope: LexicalScope): void => { + if (isNodeOfType(node, "VariableDeclaration")) { + const target = node.kind === "var" ? hoistScopeOf(scope) : scope; + for (const declarator of node.declarations) { + declarePattern(declarator.id as EsTreeNode, target); + } + return; + } + if (isNodeOfType(node, "FunctionDeclaration") && node.id) { + declare(hoistScopeOf(scope), node.id.name); + return; + } + if (isNodeOfType(node, "ClassDeclaration") && node.id) { + declare(scope, node.id.name); + return; + } + if (isNodeOfType(node, "ImportDeclaration")) { + for (const specifier of node.specifiers) { + declare(hoistScopeOf(scope), specifier.local.name); + } + } + }; + + const walk = (node: EsTreeNode, scope: LexicalScope): void => { + if (isNodeOfType(node, "Identifier")) { + scopeOfIdentifier.set(node, scope); + return; + } + + // A function declaration's own name belongs to the enclosing scope, not + // the function's; register it before descending into the new scope. + registerDeclarations(node, scope); + + if (isFunctionLike(node)) { + const functionScope: LexicalScope = { + parent: scope, + isHoistBoundary: true, + bindings: new Map(), + }; + // A named function expression's name is visible only inside itself. + if (isNodeOfType(node, "FunctionExpression") && node.id) { + declare(functionScope, node.id.name); + } + for (const parameter of node.params) declarePattern(parameter as EsTreeNode, functionScope); + forEachChildNode(node, (child) => walk(child, functionScope)); + return; + } + + if (opensBlockScope(node)) { + const blockScope: LexicalScope = { + parent: scope, + isHoistBoundary: false, + bindings: new Map(), + }; + if (isNodeOfType(node, "CatchClause") && node.param) { + declarePattern(node.param as EsTreeNode, blockScope); + } + forEachChildNode(node, (child) => walk(child, blockScope)); + return; + } + + forEachChildNode(node, (child) => walk(child, scope)); + }; + + const rootScope: LexicalScope = { + parent: null, + isHoistBoundary: true, + bindings: new Map(), + }; + // Resolution is deferred, so a single declaration-registering walk + // suffices — top-level forward references and hoisting resolve by chain. + forEachChildNode(program, (child) => walk(child, rootScope)); + + return (identifier: EsTreeNode): BindingId | null => { + if (!isNodeOfType(identifier, "Identifier")) return null; + let scope: LexicalScope | null = scopeOfIdentifier.get(identifier) ?? rootScope; + while (scope !== null) { + const binding = scope.bindings.get(identifier.name); + if (binding !== undefined) return binding; + scope = scope.parent; + } + return null; + }; +}; diff --git a/packages/cfg/src/analysis/loops.ts b/packages/cfg/src/analysis/loops.ts new file mode 100644 index 000000000..dfe110008 --- /dev/null +++ b/packages/cfg/src/analysis/loops.ts @@ -0,0 +1,30 @@ +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; + +// A block is on a cycle iff it can reach itself by following non-throw +// successor edges (loop back-edges are normal "uncond" edges; a +// throw→catch edge is not a loop). +export const computeCyclicBlocks = (cfg: FunctionCfg): Set => { + const cyclicBlocks = new Set(); + for (const startBlock of cfg.blocks) { + const visited = new Set(); + const queue: BasicBlock[] = []; + for (const edge of startBlock.successors) { + if (edge.kind !== "throw") queue.push(edge.to); + } + let isOnCycle = false; + while (queue.length > 0) { + const block = queue.shift()!; + if (block === startBlock) { + isOnCycle = true; + break; + } + if (visited.has(block)) continue; + visited.add(block); + for (const edge of block.successors) { + if (edge.kind !== "throw") queue.push(edge.to); + } + } + if (isOnCycle) cyclicBlocks.add(startBlock); + } + return cyclicBlocks; +}; diff --git a/packages/cfg/src/analysis/node-order.ts b/packages/cfg/src/analysis/node-order.ts new file mode 100644 index 000000000..b285ce1b0 --- /dev/null +++ b/packages/cfg/src/analysis/node-order.ts @@ -0,0 +1,22 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; + +// Source-order index for every node owned by this function (not +// descending into nested functions). Used to break ties for two nodes +// that share a basic block: within a straight-line block the earlier +// node dominates the later one. +export const computeNodeOrder = ( + functionNode: EsTreeNode, + body: EsTreeNode, +): Map => { + const nodeOrder = new Map(); + let nextOrder = 0; + const walk = (node: EsTreeNode): void => { + if (!nodeOrder.has(node)) nodeOrder.set(node, nextOrder++); + if (node !== functionNode && isFunctionLike(node)) return; + forEachChildNode(node, walk); + }; + walk(body); + return nodeOrder; +}; diff --git a/packages/cfg/src/analysis/places-by-block.ts b/packages/cfg/src/analysis/places-by-block.ts new file mode 100644 index 000000000..268fdcb66 --- /dev/null +++ b/packages/cfg/src/analysis/places-by-block.ts @@ -0,0 +1,35 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; +import type { Place, ResolveBinding } from "../ir/place.js"; +import { collectParameterPlaces, collectPlaces } from "./defs-uses.js"; + +// The ordered binding reads/writes of one function's CFG, bucketed into the +// block each occurrence executes in (`cfg.blockOf`). Parameters are written +// at the entry, before any body occurrence. This is the single source of +// truth feeding both SSA construction (`ssa.ts`) and the SSA-keyed dataflow +// analyses (`dataflow/definite-assignment.ts`) — both need the same +// per-block, evaluation-ordered occurrence stream. +export const collectPlacesByBlock = ( + cfg: FunctionCfg, + owner: EsTreeNode, + resolveBinding: ResolveBinding, +): Map => { + const parameters = isFunctionLike(owner) ? (owner.params as EsTreeNode[]) : []; + const body = isFunctionLike(owner) ? (owner.body as EsTreeNode) : owner; + const parameterPlaces = collectParameterPlaces(parameters, resolveBinding); + const bodyPlaces = collectPlaces(body, resolveBinding); + + const placesByBlock = new Map(); + const append = (block: BasicBlock, place: Place): void => { + const existing = placesByBlock.get(block); + if (existing) existing.push(place); + else placesByBlock.set(block, [place]); + }; + for (const place of parameterPlaces) append(cfg.entry, place); + for (const place of bodyPlaces) { + const block = cfg.blockOf(place.node); + if (block) append(block, place); + } + return placesByBlock; +}; diff --git a/packages/cfg/src/analysis/reachability.ts b/packages/cfg/src/analysis/reachability.ts new file mode 100644 index 000000000..77f25e1d6 --- /dev/null +++ b/packages/cfg/src/analysis/reachability.ts @@ -0,0 +1,37 @@ +import type { BasicBlock, CfgEdge, FunctionCfg } from "../ir/basic-block.js"; + +export const isBlockReachableFromBlock = ( + fromBlock: BasicBlock, + toBlock: BasicBlock, + includeEdge: (edge: CfgEdge) => boolean = () => true, +): boolean => { + const visited = new Set(); + const queue: BasicBlock[] = [fromBlock]; + while (queue.length > 0) { + const block = queue.shift()!; + for (const edge of block.successors) { + if (!includeEdge(edge)) continue; + if (edge.to === toBlock) return true; + if (!visited.has(edge.to)) { + visited.add(edge.to); + queue.push(edge.to); + } + } + } + return false; +}; + +// Blocks reachable from entry over EVERY edge kind (including catch +// edges). Used to answer `isUnreachable` — a block with no path from +// entry is dead code. +export const computeReachableFromEntry = (cfg: FunctionCfg): Set => { + const visited = new Set(); + const queue: BasicBlock[] = [cfg.entry]; + while (queue.length > 0) { + const block = queue.shift()!; + if (visited.has(block)) continue; + visited.add(block); + for (const edge of block.successors) queue.push(edge.to); + } + return visited; +}; diff --git a/packages/cfg/src/analysis/reverse-postorder.ts b/packages/cfg/src/analysis/reverse-postorder.ts new file mode 100644 index 000000000..636746462 --- /dev/null +++ b/packages/cfg/src/analysis/reverse-postorder.ts @@ -0,0 +1,33 @@ +import type { BasicBlock } from "../ir/basic-block.js"; + +// Reverse-postorder of every block reachable from `root` over the given +// successor relation. RPO guarantees a block appears before all blocks it +// strictly dominates, which is the ordering the Cooper-Harvey-Kennedy +// dominance algorithm and forward dataflow both require. Iterative DFS so +// deep CFGs can't blow the call stack. +export const reversePostorder = ( + root: BasicBlock, + successorsOf: (block: BasicBlock) => ReadonlyArray, +): BasicBlock[] => { + const postorder: BasicBlock[] = []; + const visited = new Set([root]); + const stack: Array<{ block: BasicBlock; nextSuccessor: number }> = [ + { block: root, nextSuccessor: 0 }, + ]; + while (stack.length > 0) { + const frame = stack[stack.length - 1]!; + const successors = successorsOf(frame.block); + if (frame.nextSuccessor < successors.length) { + const next = successors[frame.nextSuccessor++]!; + if (!visited.has(next)) { + visited.add(next); + stack.push({ block: next, nextSuccessor: 0 }); + } + } else { + postorder.push(frame.block); + stack.pop(); + } + } + postorder.reverse(); + return postorder; +}; diff --git a/packages/cfg/src/analysis/unconditional.ts b/packages/cfg/src/analysis/unconditional.ts new file mode 100644 index 000000000..0883cc307 --- /dev/null +++ b/packages/cfg/src/analysis/unconditional.ts @@ -0,0 +1,54 @@ +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; + +// A block B is "unconditional from entry" iff every execution path +// from entry to exit passes through B. We compute this by, for each +// block B, asking: if we removed B from the graph, is exit still +// reachable from entry? If NO, B is on every path → unconditional. +// +// Cost: O(|blocks|^2) — fine for function-sized CFGs (typically <100 +// blocks). Avoids needing a full dominator tree. +export const computeUnconditionalSet = (cfg: FunctionCfg): Set => { + // Skip "throw" edges when computing reachability — uncaught throws + // don't represent a normal completion path. This makes + // `if (x) throw; useHook();` evaluate as unconditional (the + // `useHook` block is the only normal path to exit). + const reachableFromEntry = (excluded: BasicBlock | null): Set => { + const visited = new Set(); + const queue: BasicBlock[] = []; + if (cfg.entry !== excluded) queue.push(cfg.entry); + while (queue.length > 0) { + const block = queue.shift()!; + if (visited.has(block)) continue; + visited.add(block); + for (const edge of block.successors) { + if (edge.kind === "throw") continue; + if (edge.to === excluded) continue; + queue.push(edge.to); + } + } + return visited; + }; + + // Whole-graph reachability: any block NOT in this set is dead code + // (e.g. statements after an unconditional `return;` / `throw;`). + // Dead-code blocks vacuously satisfy "unconditional from entry" + // because the call site is never reached at runtime — there's + // nothing to constrain. + const reachableFromEntryFull = reachableFromEntry(null); + + const unconditional = new Set(); + // Entry is trivially on every path. + unconditional.add(cfg.entry); + // Exit is on every (terminating) path. + unconditional.add(cfg.exit); + for (const block of cfg.blocks) { + if (unconditional.has(block)) continue; + if (!reachableFromEntryFull.has(block)) { + unconditional.add(block); + continue; + } + const stillReaches = reachableFromEntry(block).has(cfg.exit); + if (!stillReaches) unconditional.add(block); + } + return unconditional; +}; diff --git a/packages/cfg/src/ast/for-each-child-node.ts b/packages/cfg/src/ast/for-each-child-node.ts new file mode 100644 index 000000000..2e9870008 --- /dev/null +++ b/packages/cfg/src/ast/for-each-child-node.ts @@ -0,0 +1,19 @@ +import type { EsTreeNode } from "./es-tree-node.js"; +import { isAstNode } from "./is-ast-node.js"; + +// Visit every direct child AST node of `node` (array entries and single +// nodes alike), skipping the `parent` back-reference. The shared traversal +// mechanics behind the recursive walkers in build/ and analysis/; callers +// own the recursion and any function-boundary stop. +export const forEachChildNode = (node: EsTreeNode, visit: (child: EsTreeNode) => void): void => { + const record = node as unknown as Record; + for (const key of Object.keys(record)) { + if (key === "parent") continue; + const child = record[key]; + if (Array.isArray(child)) { + for (const item of child) if (isAstNode(item)) visit(item); + } else if (isAstNode(child)) { + visit(child); + } + } +}; diff --git a/packages/cfg/src/build/build-expression.ts b/packages/cfg/src/build/build-expression.ts new file mode 100644 index 000000000..784b35c21 --- /dev/null +++ b/packages/cfg/src/build/build-expression.ts @@ -0,0 +1,190 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isAstNode } from "../ast/is-ast-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { BasicBlock } from "../ir/basic-block.js"; +import { addEdge, createBlock, mapDescendantsToBlock, setTerminal } from "./cfg-builder.js"; +import type { CfgBuilder } from "./cfg-builder.js"; + +const LOGICAL_ASSIGNMENT_OPERATORS = new Set(["&&=", "||=", "??="]); + +export const isLogicalAssignment = (node: EsTreeNode): boolean => + isNodeOfType(node, "AssignmentExpression") && + LOGICAL_ASSIGNMENT_OPERATORS.has((node as { operator: string }).operator); + +// True when an expression subtree contains short-circuiting control flow +// we model as branches: a ternary, a `&&` / `||` / `??`, or a logical +// assignment (`&&=` / `||=` / `??=`). Stops at nested function boundaries — +// those get their own CFG. Lets `buildStatement` keep the cheap +// `mapDescendantsToBlock` path for straight-line code and only pay the +// block-splitting cost when an expression actually branches. +export const containsExpressionControlFlow = (node: EsTreeNode): boolean => { + let found = false; + const visit = (current: EsTreeNode): void => { + if (found) return; + if ( + isNodeOfType(current, "ConditionalExpression") || + isNodeOfType(current, "LogicalExpression") || + isNodeOfType(current, "ChainExpression") || + isLogicalAssignment(current) + ) { + found = true; + return; + } + if (isFunctionLike(current)) return; + forEachChildNode(current, visit); + }; + visit(node); + return found; +}; + +// Lower an expression's embedded control flow into the CFG — mirroring how +// the React Compiler's HIR (and oxc_cfg) give a ternary's arms, a logical +// operator's right operand, and a logical assignment's right operand their +// own basic blocks. A hook / setState / effect nested in any of those is +// then correctly seen as CONDITIONAL (short-circuited on some path), which +// statement-level lowering alone cannot see. Returns — and maps the node to +// — the block where its value becomes available (its join): a node's effect +// happens AFTER its operands, so a `wrap(cond ? a : b)` call lands in the +// post-arms block, not the pre-test one. Never descends into nested +// functions (they get their own CFG). +export const buildExpression = ( + builder: CfgBuilder, + node: EsTreeNode | null | undefined, + current: BasicBlock, +): BasicBlock => { + if (!node) return current; + if (isFunctionLike(node)) { + builder.nodeBlock.set(node, current); + return current; + } + + if (isNodeOfType(node, "ConditionalExpression")) { + const afterTest = buildExpression(builder, node.test as EsTreeNode, current); + const consequentBlock = createBlock(builder); + const alternateBlock = createBlock(builder); + const merge = createBlock(builder); + addEdge(afterTest, consequentBlock, "cond"); + addEdge(afterTest, alternateBlock, "cond"); + setTerminal(afterTest, { kind: "ternary", fallthrough: merge }); + const consequentEnd = buildExpression(builder, node.consequent as EsTreeNode, consequentBlock); + const alternateEnd = buildExpression(builder, node.alternate as EsTreeNode, alternateBlock); + addEdge(consequentEnd, merge, "uncond"); + addEdge(alternateEnd, merge, "uncond"); + builder.nodeBlock.set(node, merge); + return merge; + } + + if (isNodeOfType(node, "LogicalExpression") || isLogicalAssignment(node)) { + // The left/target operand is always evaluated; the right operand is + // conditional (short-circuited). From the post-left block one successor + // evaluates the RHS and one skips straight to the join. + const afterLeft = buildExpression(builder, (node as { left: EsTreeNode }).left, current); + const rightBlock = createBlock(builder); + const merge = createBlock(builder); + addEdge(afterLeft, rightBlock, "cond"); + addEdge(afterLeft, merge, "cond"); + setTerminal(afterLeft, { kind: "logical", fallthrough: merge }); + const rightEnd = buildExpression(builder, (node as { right: EsTreeNode }).right, rightBlock); + addEdge(rightEnd, merge, "uncond"); + builder.nodeBlock.set(node, merge); + return merge; + } + + if (isNodeOfType(node, "ChainExpression")) { + // Optional chain (`a?.b.c?.()`). The React Compiler models this with a + // SINGLE shared short-circuit target: any nullish optional link jumps to + // the same continuation (value = undefined). Everything to the right of + // a `?.` is conditional; the chain value is available at `merge`, where + // the short-circuit and the fully-evaluated paths rejoin. + const merge = createBlock(builder); + const chainEnd = buildOptionalChainLink( + builder, + (node as { expression: EsTreeNode }).expression, + current, + merge, + ); + addEdge(chainEnd, merge, "uncond"); + builder.nodeBlock.set(node, merge); + return merge; + } + + // Generic expression: evaluate children left-to-right, threading the + // block so a control-flow child splits the siblings that follow it. The + // node itself completes in the final cursor block. + let cursor = current; + forEachChildNode(node, (child) => { + cursor = buildExpression(builder, child, cursor); + }); + builder.nodeBlock.set(node, cursor); + return cursor; +}; + +// Lower one link of an optional chain in evaluation order (innermost +// object/callee first), branching to the shared `merge` (short-circuit) at +// each optional `?.`. Mirrors the compiler's `lowerOptional*Expression`: +// the base is evaluated unconditionally, then anything to the right of the +// `?.` — a computed property, a deeper access, or a call's arguments — +// evaluates in the conditional continuation. Returns the block where this +// link's value is available on the non-short-circuit path. +const buildOptionalChainLink = ( + builder: CfgBuilder, + node: EsTreeNode, + current: BasicBlock, + merge: BasicBlock, +): BasicBlock => { + if (isNodeOfType(node, "MemberExpression")) { + const afterObject = buildOptionalChainLink(builder, node.object as EsTreeNode, current, merge); + let cursor = afterObject; + if ((node as { optional?: boolean }).optional) { + const continuation = createBlock(builder); + addEdge(afterObject, continuation, "cond"); // base non-nullish → continue + addEdge(afterObject, merge, "cond"); // base nullish → short-circuit + setTerminal(afterObject, { kind: "optional", fallthrough: merge }); + cursor = continuation; + } + // A computed key (`a?.[expr]`) is only evaluated once the base is known + // non-nullish, so it belongs in the post-branch continuation. + if ((node as { computed?: boolean }).computed) { + cursor = buildExpression(builder, node.property as EsTreeNode, cursor); + } + builder.nodeBlock.set(node, cursor); + return cursor; + } + + if (isNodeOfType(node, "CallExpression")) { + const afterCallee = buildOptionalChainLink(builder, node.callee as EsTreeNode, current, merge); + let cursor = afterCallee; + if ((node as { optional?: boolean }).optional) { + const continuation = createBlock(builder); + addEdge(afterCallee, continuation, "cond"); + addEdge(afterCallee, merge, "cond"); + setTerminal(afterCallee, { kind: "optional", fallthrough: merge }); + cursor = continuation; + } + for (const argument of (node as { arguments: ReadonlyArray }).arguments) { + if (isAstNode(argument)) cursor = buildExpression(builder, argument, cursor); + } + builder.nodeBlock.set(node, cursor); + return cursor; + } + + // Chain base (an identifier, a parenthesized expression, a non-optional + // sub-expression): evaluate it normally. + return buildExpression(builder, node, current); +}; + +// Evaluate a sub-expression in `current`, returning the block where its +// value is available. Falls back to the cheap whole-subtree mapping when +// the expression has no embedded control flow. +export const buildSubExpression = ( + builder: CfgBuilder, + node: EsTreeNode | null | undefined, + current: BasicBlock, +): BasicBlock => { + if (!node) return current; + if (containsExpressionControlFlow(node)) return buildExpression(builder, node, current); + mapDescendantsToBlock(builder, node, current); + return current; +}; diff --git a/packages/cfg/src/build/build-function-cfg.ts b/packages/cfg/src/build/build-function-cfg.ts new file mode 100644 index 000000000..d778e7859 --- /dev/null +++ b/packages/cfg/src/build/build-function-cfg.ts @@ -0,0 +1,60 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; +import { + addEdge, + appendInstruction, + createBlock, + createBuilder, + setTerminal, +} from "./cfg-builder.js"; +import { buildSubExpression } from "./build-expression.js"; +import { buildStatements } from "./build-statement.js"; + +// Back-fill the terminal of every block that merely falls through: a +// single successor becomes a `goto` (normal variant); a block with no +// successor keeps the `unreachable` sentinel (a genuine orphan or the +// function exit). Branching blocks already carry an explicit terminal. +const finalizeTerminals = (blocks: ReadonlyArray, exit: BasicBlock): void => { + for (const block of blocks) { + if (block === exit) continue; + if (block.terminal.kind !== "unreachable") continue; + if (block.successors.length === 1) { + setTerminal(block, { kind: "goto", block: block.successors[0]!.to, variant: "normal" }); + } + } +}; + +export const buildFunctionCfg = (functionNode: EsTreeNode, body: EsTreeNode): FunctionCfg => { + const builder = createBuilder(); + const entry = createBlock(builder); + const exit = createBlock(builder); + builder.entry = entry; + builder.exit = exit; + + let bodyEnd: BasicBlock; + if (isNodeOfType(body, "BlockStatement") || isNodeOfType(body, "Program")) { + bodyEnd = buildStatements(builder, body.body as EsTreeNode[], entry); + } else { + // Arrow expression body: a single Expression. Lower its control flow so + // `() => cond ? useA() : useB()` sees the hooks as conditional. + bodyEnd = buildSubExpression(builder, body, entry); + } + // Implicit return / fall-off the end of the function body. + addEdge(bodyEnd, exit, "uncond"); + if (bodyEnd.terminal.kind === "unreachable") { + appendInstruction(bodyEnd, body, "implicit-return"); + setTerminal(bodyEnd, { kind: "return", argument: null }); + } + finalizeTerminals(builder.blocks, exit); + + const blockOf = (node: EsTreeNode): BasicBlock | null => builder.nodeBlock.get(node) ?? null; + + return { + owner: functionNode, + entry, + exit, + blocks: builder.blocks, + blockOf, + }; +}; diff --git a/packages/cfg/src/build/build-statement.ts b/packages/cfg/src/build/build-statement.ts new file mode 100644 index 000000000..3c1a7a36e --- /dev/null +++ b/packages/cfg/src/build/build-statement.ts @@ -0,0 +1,434 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import { isBlockReachableFromBlock } from "../analysis/reachability.js"; +import { isConstantTruthyTest } from "../constant-condition.js"; +import type { BasicBlock } from "../ir/basic-block.js"; +import type { TerminalCase } from "../ir/terminal.js"; +import { + addEdge, + appendInstruction, + appendNode, + createBlock, + mapDescendantsToBlock, + setTerminal, +} from "./cfg-builder.js"; +import type { CfgBuilder } from "./cfg-builder.js"; +import { + buildExpression, + buildSubExpression, + containsExpressionControlFlow, +} from "./build-expression.js"; + +// Returns true if the node introduces internal control flow we want to +// expand into the CFG (rather than treat as a single statement). +const hasInternalControlFlow = (node: EsTreeNode): boolean => { + switch (node.type) { + case "IfStatement": + case "WhileStatement": + case "DoWhileStatement": + case "ForStatement": + case "ForInStatement": + case "ForOfStatement": + case "SwitchStatement": + case "TryStatement": + case "ReturnStatement": + case "ThrowStatement": + case "BreakStatement": + case "ContinueStatement": + case "BlockStatement": + case "LabeledStatement": + return true; + default: + return false; + } +}; + +const findLabel = ( + builder: CfgBuilder, + name: string | null, +): { merge: BasicBlock; header: BasicBlock | null } | null => { + if (name === null) { + // Unlabeled break/continue → innermost loop or switch. + if (builder.loopStack.length > 0) { + const top = builder.loopStack[builder.loopStack.length - 1]!; + return { merge: top.merge, header: top.header }; + } + if (builder.switchStack.length > 0) { + const top = builder.switchStack[builder.switchStack.length - 1]!; + return { merge: top.merge, header: null }; + } + return null; + } + for (let index = builder.labelStack.length - 1; index >= 0; index--) { + const entry = builder.labelStack[index]!; + if (entry.label === name) return { merge: entry.merge, header: entry.header }; + } + return null; +}; + +// A protected region (the `try` body or a `catch` body) "completes +// normally" iff its end block is reachable from its entry without +// leaving via an exception (`throw`) or diverting into the `finally` +// (`finalize`). `return` / `throw` / `break` route elsewhere and strand +// the region end as an orphan, so it stays unreachable here. +const completesNormally = (regionEntry: BasicBlock, regionEnd: BasicBlock): boolean => + regionEntry === regionEnd || + isBlockReachableFromBlock( + regionEntry, + regionEnd, + (edge) => edge.kind !== "throw" && edge.kind !== "finalize", + ); + +// Process a list of statements inside a block. Returns the block where +// fall-through control flow ends up. Caller is responsible for +// connecting that to the next block (e.g. exit, merge). +export const buildStatements = ( + builder: CfgBuilder, + statements: ReadonlyArray, + current: BasicBlock, +): BasicBlock => { + let cursor = current; + for (const statement of statements) { + cursor = buildStatement(builder, statement, cursor); + } + return cursor; +}; + +// Process a single statement. Returns the block where control flow +// ends up after the statement (possibly an orphan if the statement is +// terminating). +export const buildStatement = ( + builder: CfgBuilder, + statement: EsTreeNode, + current: BasicBlock, +): BasicBlock => { + // Tag the statement node itself with the current block before + // descending — even for control-flow statements, the syntactic + // statement itself is "in" the current block. + builder.nodeBlock.set(statement, current); + + if (!hasInternalControlFlow(statement)) { + appendNode(builder, current, statement); + // A plain statement can still carry expression-level control flow + // (`const x = cond ? useA() : useB()`, `cond && setState()`): lower it + // so the branched sub-expressions land in their own blocks. Otherwise + // every descendant maps to the current block (cheap path). + if (containsExpressionControlFlow(statement)) { + return buildExpression(builder, statement, current); + } + mapDescendantsToBlock(builder, statement, current); + return current; + } + + if (isNodeOfType(statement, "BlockStatement")) { + return buildStatements(builder, statement.body as EsTreeNode[], current); + } + + if (isNodeOfType(statement, "LabeledStatement")) { + // Push the label onto the stack with a placeholder; the body will + // create the merge block for `break