diff --git a/.changeset/feat-cfg-formal-verification-stack.md b/.changeset/feat-cfg-formal-verification-stack.md new file mode 100644 index 000000000..ee651e51e --- /dev/null +++ b/.changeset/feat-cfg-formal-verification-stack.md @@ -0,0 +1,17 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Add a formal-verification stack to the control-flow graph and three path-sensitive rules. + +`@react-doctor/cfg` gains four layers on top of its CFG/SSA engine, all pure-TS, bundled at build time, lazy (a rule that never reads a layer pays nothing), and run once per scan: + +- **Dataflow framework** — `solveDataflow`, a generic monotone worklist fixpoint over a `Lattice` (one solver subsumes many analyses), and `analyzeDefiniteAssignment` built on it: a forward must-analysis over the SSA occurrence stream answering _is this read reached unassigned on some path?_ (a `declare` like `let x;` is neither read nor write, so a bare declaration never counts as an assignment). +- **Typestate engine** — `verifyTypestate(cfg, { automaton, classifier })` generalizes resource-protocol checking into a reusable automaton verified over the CFG, reporting error transitions (an illegal event) and leaked resources (a resource left non-accepting on a normal-completion path). Events are attributed to their real block and deduplicated, so the whole-body implicit-return never double-counts a call. +- **Path feasibility** — a bounded, dependency-free checker (`isPathFeasible` + `lowerGuard` / `pathConditionFacts`) that lowers a path's branch guards into facts over SSA values and refutes correlated-branch counterexamples via union-find congruence closure. It only ever _suppresses_ a diagnostic when the path search is complete and every counterexample is provably infeasible (e.g. `if (x) open(); … if (x) close();`), so it strictly removes false positives and is never unsound for bug-finding. + +Three new rules consume them: + +- `correctness/no-use-before-define` — a block-scoped binding (`let` / `const` / `class` / `using`) used lexically before its declaration runs, in the same synchronous execution, which always throws a `ReferenceError` from the Temporal Dead Zone. Sound by construction: quiet for hoisted `var` / function declarations, params, globals, and any access nested in a closure or class body that may run after the declaration. A declared-but-unassigned `let` read (`let x; if (c) x = 1; use(x)`) is `undefined`, not a TDZ crash, so it is deliberately not reported. +- `state-and-effects/no-stale-closure-capture` — a `useMemo` / `useCallback` closure that captures a `let` binding reassigned later in the same render, so the memoised value/function sees a stale value (the deps array signals the author intended the value at creation time). Quiet for `const` and bindings never reassigned after capture, and for the deferred effect hooks (`useEffect` / `useLayoutEffect`), whose callbacks run after render where reading the binding's final value is the intended pattern, not a stale capture. +- `state-and-effects/no-unreleased-resource` — a resource opened inside a React effect callback (timer, subscription, event listener, `AbortController`) and released INLINE on some paths but leaked on an early return. Scoped to `useEffect` / `useLayoutEffect` / `useInsertionEffect` (including the namespaced `React.useEffect` form): the returned-cleanup contract stays owned by `effect-cleanup-not-on-every-path`, a `finally`-based release counts as run-on-every-path, and non-effect functions (class lifecycle methods, non-React frameworks like Solid's `createEffect`/`onCleanup`) are left alone. diff --git a/.changeset/feat-cfg-native-ssa.md b/.changeset/feat-cfg-native-ssa.md new file mode 100644 index 000000000..556816308 --- /dev/null +++ b/.changeset/feat-cfg-native-ssa.md @@ -0,0 +1,9 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Add native SSA to the control-flow graph and a path-sensitive dead-assignment rule. + +`@react-doctor/cfg` now builds variable-level **static single assignment** form over its oxc-native CFG via the Braun, Buchwald, Hack et al. (2013) on-the-fly sealed-block algorithm — the same algorithm the React Compiler's `EnterSSA` implements — followed by their redundant-φ elimination pass. It is a clean-room port (no Babel, MIT attribution): a minimal value model (`SsaIdentifier` / `Place` / `Phi`), per-instruction read/write extraction, a self-contained lexical binding resolver with an injectable seam (the plugin feeds in its own scope analyzer's binding identities), and an `analyzeSsa` query API (`versionAt`, `reachingDefinition`, `isLiveValue`, `isRedefinedBetween`, `bindingOf`, per-function φ + def blocks). The parity suite asserts the Braun φ placement equals the iterated dominance frontier of each binding's definitions (Cytron et al.), and `toDot` renders φ-functions. + +New `no-dead-assignment` rule uses it: it flags a write to a reassignable local whose value is never read because every path overwrites it first (`let total = expensive(); total = cheap(); return total;`). This is a value-flow question pure control flow can't answer — it complements `no-unused-vars` (which only sees wholly-unused bindings) and stays quiet for `const`, compound assignments, closure-captured bindings, and any write whose value is read on some path. diff --git a/.changeset/feat-cfg-structural-parity.md b/.changeset/feat-cfg-structural-parity.md new file mode 100644 index 000000000..5d7a9dbd7 --- /dev/null +++ b/.changeset/feat-cfg-structural-parity.md @@ -0,0 +1,7 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Upgrade `@react-doctor/cfg` to a full structural control-flow graph. + +Each basic block is now a typed instruction list ending in a first-class `Terminal` modeled on the React Compiler HIR taxonomy (`goto` / `if` / `switch` / loops / `logical` / `ternary` / `optional` / `try` / `return` / `throw`), with `fallthrough` join blocks and explicit `goto` lowering of `break` / `continue`. Dominance now uses the Cooper–Harvey–Kennedy immediate-dominator tree over reverse-postorder (plus the Cytron dominance frontier as the SSA seam). New analysis surface: `dominanceFrontier`, `isInfiniteLoopStart` (oxc-parity constant folding), and a Graphviz `toDot` export. The builder is split into `ir/` + `build/` + `analysis/` modules, and curated parity corpora from oxc (`no-fallthrough`, `no-unsafe-finally`, `getter-return`), ESLint code-path analysis, and React Compiler `BuildHIR` are ported as tests. The published plugin behavior is unchanged (all rule tests pass); this is an internal engine upgrade bundled at build time. diff --git a/.changeset/feat-cfg-verifier-rules.md b/.changeset/feat-cfg-verifier-rules.md new file mode 100644 index 000000000..73bf429c5 --- /dev/null +++ b/.changeset/feat-cfg-verifier-rules.md @@ -0,0 +1,8 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Add 2 new rules that use the structural control-flow graph as a verifier: + +- `no-unreachable-code` (Bugs): flags code that never runs because every path above it returns, throws, breaks, continues, or loops forever (via the CFG's `isUnreachable`). Hoisted function declarations, type-only TS declarations, and a bare `var x;` are left alone, matching ESLint's `no-unreachable` carve-outs. Global rule (runs on all JS/TS), so the defensive trailing `throw` after a switch whose every case returns is reported as dead code, consistent with `no-unreachable`. +- `no-set-state-in-render-loop` (Bugs): flags a `useState` setter called inside a render-phase loop (via the CFG's `isInsideLoop`), which fires every iteration and restarts rendering ("Too many re-renders"). Complements `no-set-state-in-render`, which only catches setters that run unconditionally; the two partition cleanly on `isUnconditionalFromEntry`, so an unconditional `for (;;)` / `while (true)` setter is owned by `no-set-state-in-render` and never double-reported. Setters in `.map()` / event-handler / effect callbacks (separate functions) stay quiet. diff --git a/.changeset/refactor-extract-cfg-package.md b/.changeset/refactor-extract-cfg-package.md new file mode 100644 index 000000000..bfb877adf --- /dev/null +++ b/.changeset/refactor-extract-cfg-package.md @@ -0,0 +1,7 @@ +--- +"oxlint-plugin-react-doctor": patch +--- + +Extract the control-flow graph into a dedicated internal `@react-doctor/cfg` package. + +The per-function CFG builder and its dominance / reachability analyses now live in their own self-contained package (bundled into the plugin at build time, so the published surface is unchanged). The package ships a typed `analyzeControlFlow` API, a README documenting the modeled terminal taxonomy, and a full port of oxc's `eslint/no-unreachable` `pass` / `fail` corpus asserted directly against the graph's `isUnreachable`. diff --git a/packages/cfg/README.md b/packages/cfg/README.md new file mode 100644 index 000000000..496cde4ee --- /dev/null +++ b/packages/cfg/README.md @@ -0,0 +1,245 @@ +# @react-doctor/cfg + +Internal (unpublished) per-function **control-flow graph** for an ESTree AST, +plus the dominance / reachability analyses React Doctor's CFG-backed rules run +on. The `oxlint-plugin-react-doctor` package bundles it at build time, so it is +not a runtime dependency of anything published. + +It exists so a rule can ask precise control-flow questions — _does this node run +on every path?_, _is this node reachable from that one?_, _is it inside a +loop?_ — instead of pattern-matching the AST and hoping the shape generalizes. +That is the same class of question the React Compiler answers over its HIR and +oxc answers over `oxc_cfg`. + +## API + +```ts +import { analyzeControlFlow } from "@react-doctor/cfg"; + +const cfg = analyzeControlFlow(programRoot); // ControlFlowAnalysis +``` + +`analyzeControlFlow(program)` lazily builds one graph per function it +encounters and returns a `ControlFlowAnalysis`: + +| Method | Question it answers | +| ------------------------------------- | --------------------------------------------------------------------------------- | +| `isUnconditionalFromEntry(node)` | Does `node` run on **every** path from its function's entry to its exit? | +| `isReachable(fromNode, toNode)` | Can control flow from `fromNode` to `toNode` within the same function? | +| `dominates(aNode, bNode)` | Does `aNode` run on every path that reaches `bNode` (a guard before a sink)? | +| `postDominates(bNode, aNode)` | Does `bNode` run on every path from `aNode` to exit (cleanup after a resource)? | +| `isInsideLoop(node)` | Is `node`'s block part of a cycle in its own function's CFG? | +| `isUnreachable(node)` | Is `node`'s block dead code (after an unconditional return / throw / break)? | +| `dominanceFrontier(node)` | The dominance frontier of `node`'s block (Cytron et al.) — the SSA seam. | +| `isInfiniteLoopStart(node)` | Is the loop's test a compile-time truthy constant (oxc `is_infinite_loop_start`)? | +| `toDot(fn)` | Graphviz DOT of the function's CFG (debugging / parity snapshots). | +| `cfgFor(fn)` / `enclosingFunction(n)` | The raw `FunctionCfg` (blocks + edges) / the function a node belongs to. | + +Each function boundary (`function` declaration / expression, arrow) gets its own +acyclic-except-for-loops graph; a callback that escapes a loop is **not** inside +that loop because it is a separate function. + +## Internal model + +Each `BasicBlock` is a typed `Instruction[]` (oxc's `InstructionKind`: +`statement` / `condition` / `iteration` / `return` / `implicit-return` / +`throw` / `break` / `continue`) ending in a first-class `Terminal`. The +`Terminal` union mirrors the React Compiler HIR taxonomy (`HIR/HIR.ts`): +`goto` / `if` / `switch` / `while` / `do-while` / `for` / `for-in` / `for-of` / +`logical` / `ternary` / `optional` / `try` / `return` / `throw` / +`unreachable`. Branching terminals carry a `fallthrough` join block (the +compiler's `TerminalWithFallthrough`), and `break` / `continue` are lowered to +explicit `goto` terminals at their resolved targets. + +Dominators and post-dominators use the **Cooper–Harvey–Kennedy** "A Simple, +Fast Dominance Algorithm" immediate-dominator tree over reverse-postorder (the +same algorithm the React Compiler uses), and we also compute the dominance +frontier (Cytron et al.) — both the public `dominanceFrontier` seam and the +verification oracle for the SSA layer below. + +## SSA (`analyzeSsa`) + +Variable-level **static single assignment** form over the same CFG, so a rule +can ask value-flow questions — _which definition reaches this use?_, _is this +write dead?_, _is this binding reassigned between two points?_ — that pure +control flow can't answer. + +```ts +import { analyzeSsa } from "@react-doctor/cfg"; + +// Self-contained: a built-in lexical resolver assigns binding identities. +const ssa = analyzeSsa(programRoot); + +// Or inject a host scope analyzer's binding ids (the oxlint plugin does this): +const ssa = analyzeSsa(programRoot, (idNode) => scopes.symbolFor(idNode)?.id ?? null); +``` + +| Method | Question it answers | +| --------------------------------------- | ---------------------------------------------------------------- | +| `versionAt(node)` | The SSA value read or written at an identifier node. | +| `reachingDefinition(useNode)` | The SSA value that flows into a use (its reaching def). | +| `isLiveValue(identifier)` | Is this value ever read (directly or through a live φ)? | +| `isRedefinedBetween(from, to, binding)` | Is `binding` written on a path between two nodes? | +| `bindingOf(node)` / `ssaFor(fn)` | The binding an identifier denotes / per-function φ + def blocks. | + +Construction is the **Braun, Buchwald, Hack et al. (2013)** on-the-fly +sealed-block algorithm — the same algorithm the React Compiler's `EnterSSA` +implements — followed by their `EliminateRedundantPhi` fixpoint pass. It needs +only `BasicBlock.predecessors`, per-block read/write occurrences, and a version +counter; no dominator tree. The dominance frontier is used purely as the test +oracle: minimal-SSA φ placement equals the iterated dominance frontier of each +binding's definitions (Cytron et al.), and the parity suite asserts exactly +that. Scope is variable-level (no field-level / `ObjectShape` SSA, no type +inference, no out-of-SSA `LeaveSSA`); a binding read inside a nested function is +a closure capture the per-function form leaves opaque. + +The algorithm is a clean-room port of the **MIT-licensed** React Compiler +SSA (`babel-plugin-react-compiler/src/SSA`), carrying no Babel dependency — +attribution only. + +### Source layout + +- `src/ir/` — the data model (`instruction.ts`, `terminal.ts`, `basic-block.ts`). +- `src/build/` — lowering (`cfg-builder.ts`, `build-expression.ts`, + `build-statement.ts`, `build-function-cfg.ts`). +- `src/analysis/` — `reverse-postorder.ts`, `dominators.ts` (forward + + post-dominator trees + frontier), `reachability.ts`, `unconditional.ts`, + `loops.ts`, `node-order.ts`, `block-edges.ts`. SSA: `defs-uses.ts` + (occurrence extraction), `lexical-binding-resolver.ts` (built-in resolver), + `enter-ssa.ts` (Braun construction), `eliminate-redundant-phi.ts`. +- `src/ir/place.ts` — the SSA value model (`SsaIdentifier` / `Place` / `Phi`). +- `src/dot.ts` — Graphviz export (renders φ-functions). `src/constant-condition.ts` + — the infinite-loop constant folder. `src/control-flow-graph.ts` — assembles + `analyzeControlFlow`; `src/ssa.ts` — assembles `analyzeSsa`. + +## Formal-verification stack + +Four layers build on the CFG/SSA above to answer _bug-finding_ questions +soundly. Everything is pure-TS, bundled at build time, lazy (a rule that never +reads a layer pays nothing), and runs once per scan — never in a hot loop. + +### Dataflow framework (`solveDataflow`, Layer A) + +A generic monotone worklist fixpoint over the CFG. Give it a `Lattice` +(`bottom` / `join` / `equals`), a `direction`, a `boundary` fact, and a +`transfer(block, inFact)`; it iterates reverse-postorder (forward) or its +reverse (backward) to a fixpoint and returns per-block entry/exit facts. One +solver subsumes many analyses. + +```ts +import { solveDataflow, analyzeDefiniteAssignment } from "@react-doctor/cfg"; +``` + +`analyzeDefiniteAssignment(program, resolveBinding?, { resolveValue? })` is the +first analysis built on it: a forward _must_ analysis (set-intersection at +joins) over the SSA occurrence stream. `isMaybeUnassignedAt(node)` answers +_does some entry→read path reach this read with no prior write?_ — the signal a +TDZ / read-before-write rule keys off. A `declare` occurrence (`let x;`) is +neither a read nor a write, so a bare declaration never counts as an assignment. + +### Typestate protocol engine (`verifyTypestate`, Layer C) + +Generalizes resource-protocol checking (e.g. the hand-rolled +effect-cleanup leak rule) into a reusable automaton verified over the CFG. + +```ts +import { verifyTypestate } from "@react-doctor/cfg"; + +verifyTypestate(cfg, { automaton, classifier, resolveValue? }); +``` + +A `TypestateAutomaton` is `{ initial, transition(state, event), errorStates, +acceptingStates }`; the `classifier` maps each instruction node to the protocol +events (`{ resource, event, node }`) in its subtree. Built on `solveDataflow` +(fact = each resource's set of possible states, joined by union), it reports two +failure modes: an **error transition** (an illegal event drove a resource into +an error state) and a **leaked resource** (a resource resting in a non-accepting +state on a normal-completion path — the exit joined over non-`throw` +predecessors). Events are attributed to the block they actually execute in +(`cfg.blockOf`) and deduplicated by node, so the whole-body `implicit-return` +instruction never double-counts a call. + +### Path feasibility (`isPathFeasible`, Layer D) + +A bounded, dependency-free consistency checker that refines B/C by **pruning +infeasible counterexample paths**. `lowerGuard` / `pathConditionFacts` lower a +path's branch guards (`if` / `&&` / `||` / `!` / equality) into facts over SSA +values (keyed by `versionAt`, so the _same_ value at two branches is one atom). +`isPathFeasible(facts)` runs a union-find congruence closure plus truthiness / +disequality constraints and returns `feasible` / `infeasible` / `unknown` +(`unknown` past the caps in `constants.ts`). + +The integration is deliberately one-directional: a diagnostic is suppressed +**only** when the path search is complete and _every_ counterexample is provably +`infeasible`. Any `feasible` / `unknown` counterexample, or an incomplete search, +leaves the diagnostic standing — so Layer D only ever removes false positives +(e.g. `if (x) open(); … if (x) close();` — the open-without-close path needs `x` +truthy and falsy at once) and is never unsound for bug-finding. Opt in by +passing `resolveValue` to `analyzeDefiniteAssignment` / `verifyTypestate`. + +Source: `src/dataflow/` (`lattice.ts`, `solve.ts`, `definite-assignment.ts`), +`src/typestate/` (`automaton.ts`, `verify.ts`), `src/path/` +(`literal-facts.ts`, `path-condition.ts`, `feasibility.ts`, +`enumerate-paths.ts`, `prune-infeasible.ts`). Tested by `tests/dataflow.test.ts`, +`tests/typestate.test.ts`, `tests/path-feasibility.test.ts`. + +## What it models + +Statement-level terminals: `if` / `switch` / `for` / `for-in` / `for-of` / +`while` / `do-while` / labeled `break` & `continue` / `return` / `throw` / +`try` / `catch` / `finally` (normal completion is routed through `finalize` / +`join` edges so reachability after a `try` is correct). + +Expression-level terminals, lowered into basic blocks the way the React +Compiler lowers its HIR — so a hook or `setState` buried in a branch is seen as +conditional: + +- ternary `a ? b : c` +- logical `&&` / `||` / `??` (and logical-assignment `&&=` / `||=` / `??=`) +- optional chaining `a?.b?.()` (each `?.` branches to a shared short-circuit + target) + +Every node maps to the block where its evaluation **completes** (its join +point), which keeps dominance / reachability accurate through nested +expressions. + +Deliberately **not** modeled: per-instruction "maybe-throw" edges (every call +can throw); `var` / function-declaration hoisting as a reachability fact (that +is a rule policy, not a CFG fact). Both are documented at the top of +`src/ir/basic-block.ts`. + +## Tests & fixture provenance + +Parity is the deliverable: curated slices of three upstream suites, asserted +through the primitives above (and terminal-shape snapshots), so we can claim we +replicate oxc / ESLint / React Compiler CFG semantics. + +- `tests/control-flow-graph.oxc-no-unreachable.test.ts` — full port of oxc's + `eslint/no-unreachable` `pass` / `fail` corpus + (`crates/oxc_linter/src/rules/eslint/no_unreachable.rs`), via `isUnreachable` + (`dead()` / `live()` markers). +- `tests/control-flow-graph.oxc-no-fallthrough.test.ts` — oxc's + `eslint/no_fallthrough.rs`, as switch-case `isReachable` facts. +- `tests/control-flow-graph.oxc-no-unsafe-finally.test.ts` — oxc's + `eslint/no_unsafe_finally.rs`: an abrupt `finally` swallows normal completion. +- `tests/control-flow-graph.returns-every-path.test.ts` — oxc's + `eslint/getter_return.rs` / `consistent-return` post-dominance shapes. +- `tests/control-flow-graph.eslint-code-path.test.ts` — representative ESLint + code-path-analysis segment reachability (`no-unreachable`, `consistent-return`). +- `tests/control-flow-graph.react-compiler.test.ts` — React Compiler `BuildHIR` + control-flow shapes (if / switch / loops / try / logical / ternary / optional). +- `tests/control-flow-graph.terminal-shape.test.ts` — each construct lowers to + its React Compiler HIR `Terminal` kind. +- `tests/control-flow-graph.loops-dot.test.ts` — `isInfiniteLoopStart` const + folding + a DOT export snapshot. +- `tests/control-flow-graph.try-finally.test.ts` — `try` / `catch` / `finally` + normal-completion edges. +- `tests/control-flow-graph.expression-flow.test.ts` — the expression-level + terminals above. +- `tests/control-flow-graph.regression.test.ts` — React-shaped regressions + (conditional hooks, `setState` in a branch). +- `tests/control-flow-graph.test.ts` — core graph construction. +- `tests/ssa.test.ts` — SSA φ-placement parity vs. the iterated dominance + frontier oracle, value queries, and the φ DOT rendering. + +Run `pnpm --filter @react-doctor/cfg test`. diff --git a/packages/cfg/package.json b/packages/cfg/package.json new file mode 100644 index 000000000..09dc17212 --- /dev/null +++ b/packages/cfg/package.json @@ -0,0 +1,30 @@ +{ + "name": "@react-doctor/cfg", + "version": "0.5.6", + "private": true, + "description": "Internal: per-function control-flow graph + dominance/reachability analyses over an ESTree AST. Models the React Compiler HIR / oxc_cfg terminal taxonomy so rules can ask whether a node runs unconditionally, is reachable, dominates, or sits inside a loop. Not published.", + "license": "MIT", + "type": "module", + "sideEffects": false, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + }, + "scripts": { + "build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && cross-env NODE_ENV=production vp pack", + "test": "vp test run", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@typescript-eslint/types": "^8.59.3" + }, + "devDependencies": { + "@types/node": "^25.6.0", + "oxc-parser": "^0.135.0" + }, + "engines": { + "node": "^20.19.0 || >=22.13.0" + } +} diff --git a/packages/cfg/src/analysis/block-edges.ts b/packages/cfg/src/analysis/block-edges.ts new file mode 100644 index 000000000..4a8a825bb --- /dev/null +++ b/packages/cfg/src/analysis/block-edges.ts @@ -0,0 +1,11 @@ +import type { BasicBlock } from "../ir/basic-block.js"; + +// The successor blocks of `block`, dropping edge kinds. The forward +// relation for RPO / dominator walks. +export const successorBlocks = (block: BasicBlock): BasicBlock[] => + block.successors.map((edge) => edge.to); + +// The predecessor blocks of `block`. The reverse relation, and the input +// to SSA φ construction. +export const predecessorBlocks = (block: BasicBlock): BasicBlock[] => + block.predecessors.map((edge) => edge.from); diff --git a/packages/cfg/src/analysis/defs-uses.ts b/packages/cfg/src/analysis/defs-uses.ts new file mode 100644 index 000000000..0d8ad817e --- /dev/null +++ b/packages/cfg/src/analysis/defs-uses.ts @@ -0,0 +1,167 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { Place, ResolveBinding } from "../ir/place.js"; + +// Lower an ESTree subtree into the ordered list of binding reads/writes +// (`Place`s) the SSA builder consumes — the variable-level analogue of the +// React Compiler's `BuildHIR` operand/lvalue extraction, minus field-level +// granularity. We walk in evaluation order so a block's occurrence list +// matches the order the Braun renamer must see (`x = x + 1` reads the old +// `x` before writing the new one). Nested functions are skipped: each owns +// its own CFG and SSA. + +interface PlaceEmitter { + (place: Place): void; +} + +const emitIdentifier = ( + node: EsTreeNode, + kind: "read" | "write" | "declare", + resolveBinding: ResolveBinding, + emit: PlaceEmitter, +): void => { + if (!isNodeOfType(node, "Identifier")) return; + const binding = resolveBinding(node); + if (binding === null) return; + emit({ binding, name: node.name, kind, node }); +}; + +// A binding *target* (assignment lhs, declarator id, for-in/of left): an +// Identifier is a write; a member access writes a field we don't model, so +// its object is merely read; destructuring patterns recurse. +const walkWriteTarget = ( + node: EsTreeNode, + resolveBinding: ResolveBinding, + emit: PlaceEmitter, +): void => { + if (isNodeOfType(node, "Identifier")) { + emitIdentifier(node, "write", resolveBinding, emit); + return; + } + if (isNodeOfType(node, "ObjectPattern")) { + for (const property of node.properties) { + if (isNodeOfType(property, "RestElement")) { + walkWriteTarget(property.argument as EsTreeNode, resolveBinding, emit); + continue; + } + if (property.computed) walkReads(property.key as EsTreeNode, resolveBinding, emit); + walkWriteTarget(property.value as EsTreeNode, resolveBinding, emit); + } + return; + } + if (isNodeOfType(node, "ArrayPattern")) { + for (const element of node.elements) { + if (element) walkWriteTarget(element as EsTreeNode, resolveBinding, emit); + } + return; + } + if (isNodeOfType(node, "AssignmentPattern")) { + walkReads(node.right as EsTreeNode, resolveBinding, emit); + walkWriteTarget(node.left as EsTreeNode, resolveBinding, emit); + return; + } + if (isNodeOfType(node, "RestElement")) { + walkWriteTarget(node.argument as EsTreeNode, resolveBinding, emit); + return; + } + // `obj.x = …` / anything else: the target itself is read, not a binding write. + walkReads(node, resolveBinding, emit); +}; + +// Read occurrences in evaluation order. The default branch threads children +// left-to-right (source order ≈ evaluation order for the constructs SSA +// reasons about); the explicit cases fix the spots where they diverge. +const walkReads = (node: EsTreeNode, resolveBinding: ResolveBinding, emit: PlaceEmitter): void => { + if (isFunctionLike(node)) { + // A function declaration binds its own name; its body has its own SSA. + if (isNodeOfType(node, "FunctionDeclaration") && node.id) { + emitIdentifier(node.id as EsTreeNode, "write", resolveBinding, emit); + } + return; + } + + if (isNodeOfType(node, "Identifier")) { + emitIdentifier(node, "read", resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "VariableDeclaration")) { + for (const declarator of node.declarations) { + if (declarator.init) { + walkReads(declarator.init as EsTreeNode, resolveBinding, emit); + walkWriteTarget(declarator.id as EsTreeNode, resolveBinding, emit); + continue; + } + // `let x;` / `var x;` — a binding declared without a value. Only a + // bare Identifier can lack an initializer (init-less patterns are a + // syntax error), so this is the binding's declaration, not a store. + emitIdentifier(declarator.id as EsTreeNode, "declare", resolveBinding, emit); + } + return; + } + + if (isNodeOfType(node, "AssignmentExpression")) { + // Compound assignment (`+=`) reads the lhs before the rhs; plain `=` + // evaluates the rhs first, then stores. + if (node.operator !== "=") walkReadTarget(node.left as EsTreeNode, resolveBinding, emit); + walkReads(node.right as EsTreeNode, resolveBinding, emit); + walkWriteTarget(node.left as EsTreeNode, resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "UpdateExpression")) { + walkReadTarget(node.argument as EsTreeNode, resolveBinding, emit); + walkWriteTarget(node.argument as EsTreeNode, resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "MemberExpression")) { + walkReads(node.object as EsTreeNode, resolveBinding, emit); + if (node.computed) walkReads(node.property as EsTreeNode, resolveBinding, emit); + return; + } + + if (isNodeOfType(node, "Property")) { + if (node.computed) walkReads(node.key as EsTreeNode, resolveBinding, emit); + walkReads(node.value as EsTreeNode, resolveBinding, emit); + return; + } + + forEachChildNode(node, (child) => walkReads(child, resolveBinding, emit)); +}; + +// A binding target evaluated as a read (compound-assignment / update lhs). +const walkReadTarget = ( + node: EsTreeNode, + resolveBinding: ResolveBinding, + emit: PlaceEmitter, +): void => { + if (isNodeOfType(node, "Identifier")) { + emitIdentifier(node, "read", resolveBinding, emit); + return; + } + walkReads(node, resolveBinding, emit); +}; + +// Ordered read/write occurrences of resolvable bindings inside `node`, +// stopping at nested function boundaries. +export const collectPlaces = (node: EsTreeNode, resolveBinding: ResolveBinding): Place[] => { + const places: Place[] = []; + walkReads(node, resolveBinding, (place) => places.push(place)); + return places; +}; + +// Parameter bindings are written once, at the function entry, before any +// body instruction runs. Default values are reads evaluated at entry too. +export const collectParameterPlaces = ( + parameters: ReadonlyArray, + resolveBinding: ResolveBinding, +): Place[] => { + const places: Place[] = []; + for (const parameter of parameters) { + walkWriteTarget(parameter, resolveBinding, (place) => places.push(place)); + } + return places; +}; diff --git a/packages/cfg/src/analysis/dominators.ts b/packages/cfg/src/analysis/dominators.ts new file mode 100644 index 000000000..0ca3c8e17 --- /dev/null +++ b/packages/cfg/src/analysis/dominators.ts @@ -0,0 +1,123 @@ +import type { BasicBlock } from "../ir/basic-block.js"; +import { predecessorBlocks, successorBlocks } from "./block-edges.js"; +import { reversePostorder } from "./reverse-postorder.js"; + +export interface DominatorTree { + // Blocks reachable from the tree's root (entry for dominators, exit for + // post-dominators). Queries outside this set return false. + readonly reachable: ReadonlySet; + // The immediate dominator of `block` (the root maps to itself); null if + // `block` is unreachable from the root. + readonly immediateDominatorOf: (block: BasicBlock) => BasicBlock | null; + // `ancestor` dominates `node`: it lies on `node`'s idom chain (a node + // dominates itself). For a post-dominator tree this reads as "`ancestor` + // post-dominates `node`". + readonly dominates: (ancestor: BasicBlock, node: BasicBlock) => boolean; + // The dominance frontier of `block` (Cytron et al.): the blocks where + // `block`'s dominance stops. The SSA-construction seam; no consumer uses + // it yet, but it is a cheap, high-fidelity parity artifact. + readonly dominanceFrontierOf: (block: BasicBlock) => ReadonlySet; +} + +// Cooper–Harvey–Kennedy "A Simple, Fast Dominance Algorithm": iterate the +// idom array over reverse-postorder until it stabilizes. `successorsOf` +// drives the RPO walk from `root`; `predecessorsOf` feeds the intersection +// step. For a post-dominator tree, callers pass the reversed relations +// (root = exit, successors = CFG predecessors, predecessors = CFG +// successors). Same algorithm the React Compiler uses (`Dominator.ts`). +export const buildDominatorTree = ( + root: BasicBlock, + successorsOf: (block: BasicBlock) => ReadonlyArray, + predecessorsOf: (block: BasicBlock) => ReadonlyArray, +): DominatorTree => { + const order = reversePostorder(root, successorsOf); + const rpoNumber = new Map(); + order.forEach((block, index) => rpoNumber.set(block, index)); + + const idom = new Map(); + for (const block of order) idom.set(block, null); + idom.set(root, root); + + // Walk both fingers up the partially-built tree until they meet — the + // nearest common dominator of two already-processed blocks. + const intersect = (left: BasicBlock, right: BasicBlock): BasicBlock => { + let finger1 = left; + let finger2 = right; + while (finger1 !== finger2) { + while (rpoNumber.get(finger1)! > rpoNumber.get(finger2)!) finger1 = idom.get(finger1)!; + while (rpoNumber.get(finger2)! > rpoNumber.get(finger1)!) finger2 = idom.get(finger2)!; + } + return finger1; + }; + + let changed = true; + while (changed) { + changed = false; + for (const block of order) { + if (block === root) continue; + let newIdom: BasicBlock | null = null; + for (const predecessor of predecessorsOf(block)) { + // Skip predecessors not yet processed (or unreachable from root — + // dead code can't influence runtime dominance). + if (idom.get(predecessor) == null) continue; + newIdom = newIdom === null ? predecessor : intersect(predecessor, newIdom); + } + if (newIdom !== null && idom.get(block) !== newIdom) { + idom.set(block, newIdom); + changed = true; + } + } + } + + const reachable = new Set(order); + + const dominates = (ancestor: BasicBlock, node: BasicBlock): boolean => { + if (!reachable.has(ancestor) || !reachable.has(node)) return false; + let current: BasicBlock | null = node; + while (current !== null) { + if (current === ancestor) return true; + const next: BasicBlock | null = idom.get(current) ?? null; + if (next === current) return false; // reached the root + current = next; + } + return false; + }; + + // Cytron et al.: for every join block (≥2 reachable predecessors), each + // predecessor `runner` adds the join to its dominance frontier until it + // hits the join's immediate dominator. + const dominanceFrontier = new Map>(); + for (const block of order) dominanceFrontier.set(block, new Set()); + for (const block of order) { + const predecessors = predecessorsOf(block).filter((predecessor) => reachable.has(predecessor)); + if (predecessors.length < 2) continue; + const blockIdom = idom.get(block); + for (const predecessor of predecessors) { + let runner: BasicBlock | null = predecessor; + while (runner !== null && runner !== blockIdom) { + dominanceFrontier.get(runner)!.add(block); + const next: BasicBlock | null = idom.get(runner) ?? null; + if (next === runner) break; + runner = next; + } + } + } + + const emptyFrontier: ReadonlySet = new Set(); + + return { + reachable, + immediateDominatorOf: (block) => idom.get(block) ?? null, + dominates, + dominanceFrontierOf: (block) => dominanceFrontier.get(block) ?? emptyFrontier, + }; +}; + +// Forward dominator tree rooted at the function entry. +export const computeDominatorTree = (entry: BasicBlock): DominatorTree => + buildDominatorTree(entry, successorBlocks, predecessorBlocks); + +// Post-dominator tree: the dominator tree of the reversed graph rooted at +// the function exit. `tree.dominates(a, b)` then means "a post-dominates b". +export const computePostDominatorTree = (exit: BasicBlock): DominatorTree => + buildDominatorTree(exit, predecessorBlocks, successorBlocks); diff --git a/packages/cfg/src/analysis/eliminate-redundant-phi.ts b/packages/cfg/src/analysis/eliminate-redundant-phi.ts new file mode 100644 index 000000000..93cf90467 --- /dev/null +++ b/packages/cfg/src/analysis/eliminate-redundant-phi.ts @@ -0,0 +1,68 @@ +import type { FunctionCfg } from "../ir/basic-block.js"; +import type { Phi, SsaIdentifier } from "../ir/place.js"; +import type { SsaConstruction } from "./enter-ssa.js"; +import { successorBlocks } from "./block-edges.js"; +import { reversePostorder } from "./reverse-postorder.js"; + +// Strip the trivial φs the on-the-fly builder leaves behind — a φ whose +// operands are all the same value `v` (ignoring self-references) is just +// `v`. Ports the React Compiler's `EliminateRedundantPhi`: a reverse- +// postorder rewrite pass repeated to a fixpoint, since collapsing one φ can +// expose another. Mutates the CFG's `block.phis` and rewrites every +// recorded read/write occurrence through the resulting substitution. +export const eliminateRedundantPhis = (cfg: FunctionCfg, construction: SsaConstruction): void => { + const order = reversePostorder(cfg.entry, successorBlocks); + const rewrite = new Map(); + const removed = new Set(); + + const resolve = (identifier: SsaIdentifier): SsaIdentifier => { + let current = identifier; + while (rewrite.has(current)) current = rewrite.get(current)!; + return current; + }; + + let changed = true; + while (changed) { + changed = false; + for (const block of order) { + for (const phi of block.phis) { + if (removed.has(phi)) continue; + let unique: SsaIdentifier | null = null; + let redundant = true; + for (const operand of phi.operands.values()) { + const resolved = resolve(operand); + if (resolved === phi.identifier) continue; // self-reference: ignore + if (unique === null) { + unique = resolved; + } else if (unique !== resolved) { + redundant = false; + break; + } + } + if (redundant && unique !== null) { + rewrite.set(phi.identifier, unique); + removed.add(phi); + changed = true; + } + } + } + } + + for (const block of order) { + const kept = block.phis.filter((phi) => !removed.has(phi)); + block.phis.length = 0; + for (const phi of kept) { + for (const [predecessor, operand] of phi.operands) { + phi.operands.set(predecessor, resolve(operand)); + } + block.phis.push(phi); + } + } + + for (const [node, identifier] of construction.readIdentifierAt) { + construction.readIdentifierAt.set(node, resolve(identifier)); + } + for (const [node, identifier] of construction.writeIdentifierAt) { + construction.writeIdentifierAt.set(node, resolve(identifier)); + } +}; diff --git a/packages/cfg/src/analysis/enter-ssa.ts b/packages/cfg/src/analysis/enter-ssa.ts new file mode 100644 index 000000000..13803f05e --- /dev/null +++ b/packages/cfg/src/analysis/enter-ssa.ts @@ -0,0 +1,160 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; +import type { BindingId, Phi, Place, SsaIdentifier } from "../ir/place.js"; +import { successorBlocks } from "./block-edges.js"; +import { reversePostorder } from "./reverse-postorder.js"; + +export interface SsaConstruction { + // The SSA value flowing INTO each read occurrence (its reaching def). + readonly readIdentifierAt: Map; + // The SSA value DEFINED at each write occurrence. + readonly writeIdentifierAt: Map; + // Reachable blocks that write each binding — the φ-placement oracle input. + readonly defBlocks: Map>; +} + +// On-the-fly SSA construction via Braun, Buchwald, Hack et al. (2013), +// "Simple and Efficient Construction of Static Single Assignment Form" +// (the algorithm the React Compiler's `EnterSSA` also implements). It needs +// only `BasicBlock.predecessors`, the per-block read/write occurrences, and +// a version counter — no dominator tree. Loop headers are read before their +// back-edge predecessor is filled, so they receive *incomplete* φs that are +// completed when the header is sealed (all predecessors filled). +export const enterSsa = ( + cfg: FunctionCfg, + placesByBlock: ReadonlyMap>, +): SsaConstruction => { + const readIdentifierAt = new Map(); + const writeIdentifierAt = new Map(); + const defBlocks = new Map>(); + + // currentDef[binding][block] — the SSA value of `binding` at the end of + // `block` (or the in-progress phi result, to break read cycles). + const currentDef = new Map>(); + const sealed = new Set(); + const filled = new Set(); + const incompletePhis = new Map>(); + const versionCounter = new Map(); + + const newVersion = (binding: BindingId, name: string): SsaIdentifier => { + const version = versionCounter.get(binding) ?? 0; + versionCounter.set(binding, version + 1); + return { binding, version, name }; + }; + + const writeVariable = (binding: BindingId, block: BasicBlock, value: SsaIdentifier): void => { + let perBlock = currentDef.get(binding); + if (!perBlock) { + perBlock = new Map(); + currentDef.set(binding, perBlock); + } + perBlock.set(block, value); + }; + + const recordPhi = (block: BasicBlock, phi: Phi): void => { + block.phis.push(phi); + }; + + const addPhiOperands = (binding: BindingId, block: BasicBlock, phi: Phi, name: string): void => { + for (const edge of block.predecessors) { + phi.operands.set(edge.from, readVariable(binding, edge.from, name)); + } + }; + + const readVariableRecursive = ( + binding: BindingId, + block: BasicBlock, + name: string, + ): SsaIdentifier => { + if (!sealed.has(block)) { + const identifier = newVersion(binding, name); + const phi: Phi = { identifier, operands: new Map() }; + let perBlock = incompletePhis.get(block); + if (!perBlock) { + perBlock = new Map(); + incompletePhis.set(block, perBlock); + } + perBlock.set(binding, phi); + writeVariable(binding, block, identifier); + return identifier; + } + if (block.predecessors.length === 1) { + const value = readVariable(binding, block.predecessors[0]!.from, name); + writeVariable(binding, block, value); + return value; + } + if (block.predecessors.length === 0) { + // Use of an unwritten binding (globals, use-before-def): a fresh, + // operand-less version standing in for the undefined value. + const identifier = newVersion(binding, name); + writeVariable(binding, block, identifier); + return identifier; + } + const identifier = newVersion(binding, name); + const phi: Phi = { identifier, operands: new Map() }; + writeVariable(binding, block, identifier); // break cycles first + addPhiOperands(binding, block, phi, name); + recordPhi(block, phi); + return identifier; + }; + + const readVariable = (binding: BindingId, block: BasicBlock, name: string): SsaIdentifier => { + const local = currentDef.get(binding)?.get(block); + if (local) return local; + return readVariableRecursive(binding, block, name); + }; + + const sealBlock = (block: BasicBlock): void => { + const incomplete = incompletePhis.get(block); + if (incomplete) { + for (const [binding, phi] of incomplete) { + addPhiOperands(binding, block, phi, phi.identifier.name); + recordPhi(block, phi); + } + } + sealed.add(block); + }; + + const fillBlock = (block: BasicBlock): void => { + for (const place of placesByBlock.get(block) ?? []) { + if (place.kind === "read") { + readIdentifierAt.set(place.node, readVariable(place.binding, block, place.name)); + continue; + } + const identifier = newVersion(place.binding, place.name); + writeVariable(place.binding, block, identifier); + writeIdentifierAt.set(place.node, identifier); + let blocks = defBlocks.get(place.binding); + if (!blocks) { + blocks = new Set(); + defBlocks.set(place.binding, blocks); + } + blocks.add(block); + } + filled.add(block); + }; + + const order = reversePostorder(cfg.entry, successorBlocks); + const allPredecessorsFilled = (block: BasicBlock): boolean => + block.predecessors.every((edge) => filled.has(edge.from)); + + for (const block of order) { + if (allPredecessorsFilled(block)) sealBlock(block); + fillBlock(block); + } + // Seal the blocks left unsealed (loop headers, whose back-edge + // predecessor was filled only after the header). Repeat to a fixpoint: + // sealing a header can spawn incomplete φs on other unsealed blocks. + let progressed = true; + while (progressed) { + progressed = false; + for (const block of order) { + if (!sealed.has(block)) { + sealBlock(block); + progressed = true; + } + } + } + + return { readIdentifierAt, writeIdentifierAt, defBlocks }; +}; diff --git a/packages/cfg/src/analysis/enumerate-functions.ts b/packages/cfg/src/analysis/enumerate-functions.ts new file mode 100644 index 000000000..e005b0772 --- /dev/null +++ b/packages/cfg/src/analysis/enumerate-functions.ts @@ -0,0 +1,19 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; + +// Every CFG owner in a program: the `Program` itself (its top-level code is +// a graph) plus every nested function-like. The shared driver for the +// per-function passes (SSA, definite-assignment) so they agree on exactly +// which scopes get a CFG. +export const enumerateFunctions = (program: EsTreeNode): EsTreeNode[] => { + const functionNodes: EsTreeNode[] = []; + if (isNodeOfType(program, "Program")) functionNodes.push(program); + const collect = (node: EsTreeNode): void => { + if (isFunctionLike(node)) functionNodes.push(node); + forEachChildNode(node, collect); + }; + collect(program); + return functionNodes; +}; diff --git a/packages/cfg/src/analysis/lexical-binding-resolver.ts b/packages/cfg/src/analysis/lexical-binding-resolver.ts new file mode 100644 index 000000000..de3f284ef --- /dev/null +++ b/packages/cfg/src/analysis/lexical-binding-resolver.ts @@ -0,0 +1,163 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { BindingId, ResolveBinding } from "../ir/place.js"; + +interface LexicalScope { + readonly parent: LexicalScope | null; + // A function or the program — the target `var`/`function` declarations + // hoist to, regardless of the block they appear in. Block scopes are not. + readonly isHoistBoundary: boolean; + readonly bindings: Map; +} + +// The nearest enclosing hoist boundary (function or program). +const hoistScopeOf = (scope: LexicalScope): LexicalScope => { + let current = scope; + while (!current.isHoistBoundary && current.parent !== null) current = current.parent; + return current; +}; + +// A block-like node opens a fresh lexical scope for its `let`/`const`/class +// declarations. Function bodies open a scope through the function itself +// (which also holds the parameters), so the body BlockStatement nesting is +// harmless. +const opensBlockScope = (node: EsTreeNode): boolean => + isNodeOfType(node, "BlockStatement") || + isNodeOfType(node, "ForStatement") || + isNodeOfType(node, "ForInStatement") || + isNodeOfType(node, "ForOfStatement") || + isNodeOfType(node, "SwitchStatement") || + isNodeOfType(node, "CatchClause"); + +// A lightweight lexical scope/shadowing/hoisting resolver, self-contained +// so SSA is testable without a host. Declarations are registered while +// walking; resolution is deferred until the whole tree is seen, so forward +// references and hoisting (`x; var x;`) resolve by scope-chain lookup. The +// oxlint plugin can inject its richer `scope-analysis` resolver instead. +export const createLexicalBindingResolver = (program: EsTreeNode): ResolveBinding => { + let nextBindingId = 0; + const scopeOfIdentifier = new Map(); + + const declare = (scope: LexicalScope, name: string): void => { + if (!scope.bindings.has(name)) scope.bindings.set(name, nextBindingId++); + }; + + // Register the binding names a destructuring/parameter pattern introduces + // (default-value and computed-key expressions are handled by the generic + // child walk, so they are skipped here). + const declarePattern = (pattern: EsTreeNode, scope: LexicalScope): void => { + if (isNodeOfType(pattern, "Identifier")) { + declare(scope, pattern.name); + return; + } + if (isNodeOfType(pattern, "ObjectPattern")) { + for (const property of pattern.properties) { + if (isNodeOfType(property, "RestElement")) { + declarePattern(property.argument as EsTreeNode, scope); + } else { + declarePattern(property.value as EsTreeNode, scope); + } + } + return; + } + if (isNodeOfType(pattern, "ArrayPattern")) { + for (const element of pattern.elements) { + if (element) declarePattern(element as EsTreeNode, scope); + } + return; + } + if (isNodeOfType(pattern, "AssignmentPattern")) { + declarePattern(pattern.left as EsTreeNode, scope); + return; + } + if (isNodeOfType(pattern, "RestElement")) { + declarePattern(pattern.argument as EsTreeNode, scope); + } + }; + + const registerDeclarations = (node: EsTreeNode, scope: LexicalScope): void => { + if (isNodeOfType(node, "VariableDeclaration")) { + const target = node.kind === "var" ? hoistScopeOf(scope) : scope; + for (const declarator of node.declarations) { + declarePattern(declarator.id as EsTreeNode, target); + } + return; + } + if (isNodeOfType(node, "FunctionDeclaration") && node.id) { + declare(hoistScopeOf(scope), node.id.name); + return; + } + if (isNodeOfType(node, "ClassDeclaration") && node.id) { + declare(scope, node.id.name); + return; + } + if (isNodeOfType(node, "ImportDeclaration")) { + for (const specifier of node.specifiers) { + declare(hoistScopeOf(scope), specifier.local.name); + } + } + }; + + const walk = (node: EsTreeNode, scope: LexicalScope): void => { + if (isNodeOfType(node, "Identifier")) { + scopeOfIdentifier.set(node, scope); + return; + } + + // A function declaration's own name belongs to the enclosing scope, not + // the function's; register it before descending into the new scope. + registerDeclarations(node, scope); + + if (isFunctionLike(node)) { + const functionScope: LexicalScope = { + parent: scope, + isHoistBoundary: true, + bindings: new Map(), + }; + // A named function expression's name is visible only inside itself. + if (isNodeOfType(node, "FunctionExpression") && node.id) { + declare(functionScope, node.id.name); + } + for (const parameter of node.params) declarePattern(parameter as EsTreeNode, functionScope); + forEachChildNode(node, (child) => walk(child, functionScope)); + return; + } + + if (opensBlockScope(node)) { + const blockScope: LexicalScope = { + parent: scope, + isHoistBoundary: false, + bindings: new Map(), + }; + if (isNodeOfType(node, "CatchClause") && node.param) { + declarePattern(node.param as EsTreeNode, blockScope); + } + forEachChildNode(node, (child) => walk(child, blockScope)); + return; + } + + forEachChildNode(node, (child) => walk(child, scope)); + }; + + const rootScope: LexicalScope = { + parent: null, + isHoistBoundary: true, + bindings: new Map(), + }; + // Resolution is deferred, so a single declaration-registering walk + // suffices — top-level forward references and hoisting resolve by chain. + forEachChildNode(program, (child) => walk(child, rootScope)); + + return (identifier: EsTreeNode): BindingId | null => { + if (!isNodeOfType(identifier, "Identifier")) return null; + let scope: LexicalScope | null = scopeOfIdentifier.get(identifier) ?? rootScope; + while (scope !== null) { + const binding = scope.bindings.get(identifier.name); + if (binding !== undefined) return binding; + scope = scope.parent; + } + return null; + }; +}; diff --git a/packages/cfg/src/analysis/loops.ts b/packages/cfg/src/analysis/loops.ts new file mode 100644 index 000000000..dfe110008 --- /dev/null +++ b/packages/cfg/src/analysis/loops.ts @@ -0,0 +1,30 @@ +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; + +// A block is on a cycle iff it can reach itself by following non-throw +// successor edges (loop back-edges are normal "uncond" edges; a +// throw→catch edge is not a loop). +export const computeCyclicBlocks = (cfg: FunctionCfg): Set => { + const cyclicBlocks = new Set(); + for (const startBlock of cfg.blocks) { + const visited = new Set(); + const queue: BasicBlock[] = []; + for (const edge of startBlock.successors) { + if (edge.kind !== "throw") queue.push(edge.to); + } + let isOnCycle = false; + while (queue.length > 0) { + const block = queue.shift()!; + if (block === startBlock) { + isOnCycle = true; + break; + } + if (visited.has(block)) continue; + visited.add(block); + for (const edge of block.successors) { + if (edge.kind !== "throw") queue.push(edge.to); + } + } + if (isOnCycle) cyclicBlocks.add(startBlock); + } + return cyclicBlocks; +}; diff --git a/packages/cfg/src/analysis/node-order.ts b/packages/cfg/src/analysis/node-order.ts new file mode 100644 index 000000000..b285ce1b0 --- /dev/null +++ b/packages/cfg/src/analysis/node-order.ts @@ -0,0 +1,22 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; + +// Source-order index for every node owned by this function (not +// descending into nested functions). Used to break ties for two nodes +// that share a basic block: within a straight-line block the earlier +// node dominates the later one. +export const computeNodeOrder = ( + functionNode: EsTreeNode, + body: EsTreeNode, +): Map => { + const nodeOrder = new Map(); + let nextOrder = 0; + const walk = (node: EsTreeNode): void => { + if (!nodeOrder.has(node)) nodeOrder.set(node, nextOrder++); + if (node !== functionNode && isFunctionLike(node)) return; + forEachChildNode(node, walk); + }; + walk(body); + return nodeOrder; +}; diff --git a/packages/cfg/src/analysis/places-by-block.ts b/packages/cfg/src/analysis/places-by-block.ts new file mode 100644 index 000000000..268fdcb66 --- /dev/null +++ b/packages/cfg/src/analysis/places-by-block.ts @@ -0,0 +1,35 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; +import type { Place, ResolveBinding } from "../ir/place.js"; +import { collectParameterPlaces, collectPlaces } from "./defs-uses.js"; + +// The ordered binding reads/writes of one function's CFG, bucketed into the +// block each occurrence executes in (`cfg.blockOf`). Parameters are written +// at the entry, before any body occurrence. This is the single source of +// truth feeding both SSA construction (`ssa.ts`) and the SSA-keyed dataflow +// analyses (`dataflow/definite-assignment.ts`) — both need the same +// per-block, evaluation-ordered occurrence stream. +export const collectPlacesByBlock = ( + cfg: FunctionCfg, + owner: EsTreeNode, + resolveBinding: ResolveBinding, +): Map => { + const parameters = isFunctionLike(owner) ? (owner.params as EsTreeNode[]) : []; + const body = isFunctionLike(owner) ? (owner.body as EsTreeNode) : owner; + const parameterPlaces = collectParameterPlaces(parameters, resolveBinding); + const bodyPlaces = collectPlaces(body, resolveBinding); + + const placesByBlock = new Map(); + const append = (block: BasicBlock, place: Place): void => { + const existing = placesByBlock.get(block); + if (existing) existing.push(place); + else placesByBlock.set(block, [place]); + }; + for (const place of parameterPlaces) append(cfg.entry, place); + for (const place of bodyPlaces) { + const block = cfg.blockOf(place.node); + if (block) append(block, place); + } + return placesByBlock; +}; diff --git a/packages/cfg/src/analysis/reachability.ts b/packages/cfg/src/analysis/reachability.ts new file mode 100644 index 000000000..77f25e1d6 --- /dev/null +++ b/packages/cfg/src/analysis/reachability.ts @@ -0,0 +1,37 @@ +import type { BasicBlock, CfgEdge, FunctionCfg } from "../ir/basic-block.js"; + +export const isBlockReachableFromBlock = ( + fromBlock: BasicBlock, + toBlock: BasicBlock, + includeEdge: (edge: CfgEdge) => boolean = () => true, +): boolean => { + const visited = new Set(); + const queue: BasicBlock[] = [fromBlock]; + while (queue.length > 0) { + const block = queue.shift()!; + for (const edge of block.successors) { + if (!includeEdge(edge)) continue; + if (edge.to === toBlock) return true; + if (!visited.has(edge.to)) { + visited.add(edge.to); + queue.push(edge.to); + } + } + } + return false; +}; + +// Blocks reachable from entry over EVERY edge kind (including catch +// edges). Used to answer `isUnreachable` — a block with no path from +// entry is dead code. +export const computeReachableFromEntry = (cfg: FunctionCfg): Set => { + const visited = new Set(); + const queue: BasicBlock[] = [cfg.entry]; + while (queue.length > 0) { + const block = queue.shift()!; + if (visited.has(block)) continue; + visited.add(block); + for (const edge of block.successors) queue.push(edge.to); + } + return visited; +}; diff --git a/packages/cfg/src/analysis/reverse-postorder.ts b/packages/cfg/src/analysis/reverse-postorder.ts new file mode 100644 index 000000000..636746462 --- /dev/null +++ b/packages/cfg/src/analysis/reverse-postorder.ts @@ -0,0 +1,33 @@ +import type { BasicBlock } from "../ir/basic-block.js"; + +// Reverse-postorder of every block reachable from `root` over the given +// successor relation. RPO guarantees a block appears before all blocks it +// strictly dominates, which is the ordering the Cooper-Harvey-Kennedy +// dominance algorithm and forward dataflow both require. Iterative DFS so +// deep CFGs can't blow the call stack. +export const reversePostorder = ( + root: BasicBlock, + successorsOf: (block: BasicBlock) => ReadonlyArray, +): BasicBlock[] => { + const postorder: BasicBlock[] = []; + const visited = new Set([root]); + const stack: Array<{ block: BasicBlock; nextSuccessor: number }> = [ + { block: root, nextSuccessor: 0 }, + ]; + while (stack.length > 0) { + const frame = stack[stack.length - 1]!; + const successors = successorsOf(frame.block); + if (frame.nextSuccessor < successors.length) { + const next = successors[frame.nextSuccessor++]!; + if (!visited.has(next)) { + visited.add(next); + stack.push({ block: next, nextSuccessor: 0 }); + } + } else { + postorder.push(frame.block); + stack.pop(); + } + } + postorder.reverse(); + return postorder; +}; diff --git a/packages/cfg/src/analysis/unconditional.ts b/packages/cfg/src/analysis/unconditional.ts new file mode 100644 index 000000000..0883cc307 --- /dev/null +++ b/packages/cfg/src/analysis/unconditional.ts @@ -0,0 +1,54 @@ +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; + +// A block B is "unconditional from entry" iff every execution path +// from entry to exit passes through B. We compute this by, for each +// block B, asking: if we removed B from the graph, is exit still +// reachable from entry? If NO, B is on every path → unconditional. +// +// Cost: O(|blocks|^2) — fine for function-sized CFGs (typically <100 +// blocks). Avoids needing a full dominator tree. +export const computeUnconditionalSet = (cfg: FunctionCfg): Set => { + // Skip "throw" edges when computing reachability — uncaught throws + // don't represent a normal completion path. This makes + // `if (x) throw; useHook();` evaluate as unconditional (the + // `useHook` block is the only normal path to exit). + const reachableFromEntry = (excluded: BasicBlock | null): Set => { + const visited = new Set(); + const queue: BasicBlock[] = []; + if (cfg.entry !== excluded) queue.push(cfg.entry); + while (queue.length > 0) { + const block = queue.shift()!; + if (visited.has(block)) continue; + visited.add(block); + for (const edge of block.successors) { + if (edge.kind === "throw") continue; + if (edge.to === excluded) continue; + queue.push(edge.to); + } + } + return visited; + }; + + // Whole-graph reachability: any block NOT in this set is dead code + // (e.g. statements after an unconditional `return;` / `throw;`). + // Dead-code blocks vacuously satisfy "unconditional from entry" + // because the call site is never reached at runtime — there's + // nothing to constrain. + const reachableFromEntryFull = reachableFromEntry(null); + + const unconditional = new Set(); + // Entry is trivially on every path. + unconditional.add(cfg.entry); + // Exit is on every (terminating) path. + unconditional.add(cfg.exit); + for (const block of cfg.blocks) { + if (unconditional.has(block)) continue; + if (!reachableFromEntryFull.has(block)) { + unconditional.add(block); + continue; + } + const stillReaches = reachableFromEntry(block).has(cfg.exit); + if (!stillReaches) unconditional.add(block); + } + return unconditional; +}; diff --git a/packages/cfg/src/ast/es-tree-node-of-type.ts b/packages/cfg/src/ast/es-tree-node-of-type.ts new file mode 100644 index 000000000..277c3dea4 --- /dev/null +++ b/packages/cfg/src/ast/es-tree-node-of-type.ts @@ -0,0 +1,19 @@ +import type { TSESTree } from "@typescript-eslint/types"; +import type { EsTreeNode } from "./es-tree-node.js"; +import type { EsTreeNodeType } from "./es-tree-node-type.js"; + +// Distributes over the union so we can override `parent` without collapsing +// discriminants. TSESTree pins each node's parent to a specific node kind +// (e.g. JSXAttribute.parent: JSXOpeningElement), but a walker assigns parent +// freely as it descends, so we relax it to `EsTreeNode | null` here. +type WithLooseParent = NodeType extends NodeType + ? Omit & { parent?: EsTreeNode | null } + : never; + +// Resolves a string `type` discriminant to the real TSESTree shape when known, +// falling back to a loose `EsTreeNode & { type }` for nodes that TSESTree +// doesn't model (none today, but keeps the helper safe under future TS evolution). +export type EsTreeNodeOfType = + Extract extends never + ? EsTreeNode & { type: NodeType } + : WithLooseParent>; diff --git a/packages/cfg/src/ast/es-tree-node-type.ts b/packages/cfg/src/ast/es-tree-node-type.ts new file mode 100644 index 000000000..9c876f215 --- /dev/null +++ b/packages/cfg/src/ast/es-tree-node-type.ts @@ -0,0 +1,7 @@ +import type { TSESTree } from "@typescript-eslint/types"; + +// TSESTree models `type` as `AST_NODE_TYPES` enum members. Wrapping the union +// in a template literal widens it to the underlying string-literal values, so +// callers can pass plain strings like `"FunctionDeclaration"` to `isNodeOfType` +// without importing the enum. +export type EsTreeNodeType = `${TSESTree.Node["type"]}`; diff --git a/packages/cfg/src/ast/es-tree-node.ts b/packages/cfg/src/ast/es-tree-node.ts new file mode 100644 index 000000000..2f451088d --- /dev/null +++ b/packages/cfg/src/ast/es-tree-node.ts @@ -0,0 +1,15 @@ +import type { TSESTree } from "@typescript-eslint/types"; + +// Distributes over the TSESTree.Node union so each member gets its `parent` +// relaxed independently — TSESTree pins each node's parent to a specific +// kind (e.g. JSXAttribute.parent: JSXOpeningElement) but a walker assigns +// parent freely as it descends, so we re-broaden to `EsTreeNode | null` here. +type WithLooseParent = NodeType extends NodeType + ? Omit & { parent?: EsTreeNode | null } + : never; + +// THE AST node type the CFG builder walks. It's the full TSESTree +// discriminated union (every concrete node kind) with the `parent` field +// relaxed. Structurally identical to the consuming plugin's own +// `EsTreeNode`, so nodes cross the package boundary without a cast. +export type EsTreeNode = WithLooseParent; diff --git a/packages/cfg/src/ast/for-each-child-node.ts b/packages/cfg/src/ast/for-each-child-node.ts new file mode 100644 index 000000000..2e9870008 --- /dev/null +++ b/packages/cfg/src/ast/for-each-child-node.ts @@ -0,0 +1,19 @@ +import type { EsTreeNode } from "./es-tree-node.js"; +import { isAstNode } from "./is-ast-node.js"; + +// Visit every direct child AST node of `node` (array entries and single +// nodes alike), skipping the `parent` back-reference. The shared traversal +// mechanics behind the recursive walkers in build/ and analysis/; callers +// own the recursion and any function-boundary stop. +export const forEachChildNode = (node: EsTreeNode, visit: (child: EsTreeNode) => void): void => { + const record = node as unknown as Record; + for (const key of Object.keys(record)) { + if (key === "parent") continue; + const child = record[key]; + if (Array.isArray(child)) { + for (const item of child) if (isAstNode(item)) visit(item); + } else if (isAstNode(child)) { + visit(child); + } + } +}; diff --git a/packages/cfg/src/ast/has-type-property.ts b/packages/cfg/src/ast/has-type-property.ts new file mode 100644 index 000000000..544ffdbda --- /dev/null +++ b/packages/cfg/src/ast/has-type-property.ts @@ -0,0 +1,4 @@ +import type { ValueWithType } from "./value-with-type.js"; + +export const hasTypeProperty = (value: unknown): value is ValueWithType => + Boolean(value && typeof value === "object" && "type" in value); diff --git a/packages/cfg/src/ast/is-ast-node.ts b/packages/cfg/src/ast/is-ast-node.ts new file mode 100644 index 000000000..839a3e860 --- /dev/null +++ b/packages/cfg/src/ast/is-ast-node.ts @@ -0,0 +1,7 @@ +import type { EsTreeNode } from "./es-tree-node.js"; +import { hasTypeProperty } from "./has-type-property.js"; + +export const isAstNode = (value: unknown): value is EsTreeNode => { + if (!hasTypeProperty(value)) return false; + return typeof value.type === "string"; +}; diff --git a/packages/cfg/src/ast/is-function-like.ts b/packages/cfg/src/ast/is-function-like.ts new file mode 100644 index 000000000..ab75d37af --- /dev/null +++ b/packages/cfg/src/ast/is-function-like.ts @@ -0,0 +1,21 @@ +import type { EsTreeNode } from "./es-tree-node.js"; +import type { EsTreeNodeOfType } from "./es-tree-node-of-type.js"; +import { isNodeOfType } from "./is-node-of-type.js"; + +// Type-guard for the three "function-like" ESTree node shapes: +// `ArrowFunctionExpression`, `FunctionExpression`, `FunctionDeclaration`. +// Accepts `null | undefined` so callers walking parent chains don't need +// their own pre-check. A function boundary is where the CFG stops +// descending — every function gets its own graph. +export const isFunctionLike = ( + node: EsTreeNode | null | undefined, +): node is + | EsTreeNodeOfType<"ArrowFunctionExpression"> + | EsTreeNodeOfType<"FunctionExpression"> + | EsTreeNodeOfType<"FunctionDeclaration"> => + Boolean( + node && + (isNodeOfType(node, "ArrowFunctionExpression") || + isNodeOfType(node, "FunctionExpression") || + isNodeOfType(node, "FunctionDeclaration")), + ); diff --git a/packages/cfg/src/ast/is-node-of-type.ts b/packages/cfg/src/ast/is-node-of-type.ts new file mode 100644 index 000000000..04045075b --- /dev/null +++ b/packages/cfg/src/ast/is-node-of-type.ts @@ -0,0 +1,8 @@ +import type { EsTreeNodeOfType } from "./es-tree-node-of-type.js"; +import type { EsTreeNodeType } from "./es-tree-node-type.js"; +import { hasTypeProperty } from "./has-type-property.js"; + +export const isNodeOfType = ( + node: unknown, + type: NodeType, +): node is EsTreeNodeOfType => Boolean(hasTypeProperty(node) && node.type === type); diff --git a/packages/cfg/src/ast/value-with-type.ts b/packages/cfg/src/ast/value-with-type.ts new file mode 100644 index 000000000..e85a2cc5c --- /dev/null +++ b/packages/cfg/src/ast/value-with-type.ts @@ -0,0 +1,3 @@ +export interface ValueWithType { + type?: unknown; +} diff --git a/packages/cfg/src/build/build-expression.ts b/packages/cfg/src/build/build-expression.ts new file mode 100644 index 000000000..784b35c21 --- /dev/null +++ b/packages/cfg/src/build/build-expression.ts @@ -0,0 +1,190 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { forEachChildNode } from "../ast/for-each-child-node.js"; +import { isAstNode } from "../ast/is-ast-node.js"; +import { isFunctionLike } from "../ast/is-function-like.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { BasicBlock } from "../ir/basic-block.js"; +import { addEdge, createBlock, mapDescendantsToBlock, setTerminal } from "./cfg-builder.js"; +import type { CfgBuilder } from "./cfg-builder.js"; + +const LOGICAL_ASSIGNMENT_OPERATORS = new Set(["&&=", "||=", "??="]); + +export const isLogicalAssignment = (node: EsTreeNode): boolean => + isNodeOfType(node, "AssignmentExpression") && + LOGICAL_ASSIGNMENT_OPERATORS.has((node as { operator: string }).operator); + +// True when an expression subtree contains short-circuiting control flow +// we model as branches: a ternary, a `&&` / `||` / `??`, or a logical +// assignment (`&&=` / `||=` / `??=`). Stops at nested function boundaries — +// those get their own CFG. Lets `buildStatement` keep the cheap +// `mapDescendantsToBlock` path for straight-line code and only pay the +// block-splitting cost when an expression actually branches. +export const containsExpressionControlFlow = (node: EsTreeNode): boolean => { + let found = false; + const visit = (current: EsTreeNode): void => { + if (found) return; + if ( + isNodeOfType(current, "ConditionalExpression") || + isNodeOfType(current, "LogicalExpression") || + isNodeOfType(current, "ChainExpression") || + isLogicalAssignment(current) + ) { + found = true; + return; + } + if (isFunctionLike(current)) return; + forEachChildNode(current, visit); + }; + visit(node); + return found; +}; + +// Lower an expression's embedded control flow into the CFG — mirroring how +// the React Compiler's HIR (and oxc_cfg) give a ternary's arms, a logical +// operator's right operand, and a logical assignment's right operand their +// own basic blocks. A hook / setState / effect nested in any of those is +// then correctly seen as CONDITIONAL (short-circuited on some path), which +// statement-level lowering alone cannot see. Returns — and maps the node to +// — the block where its value becomes available (its join): a node's effect +// happens AFTER its operands, so a `wrap(cond ? a : b)` call lands in the +// post-arms block, not the pre-test one. Never descends into nested +// functions (they get their own CFG). +export const buildExpression = ( + builder: CfgBuilder, + node: EsTreeNode | null | undefined, + current: BasicBlock, +): BasicBlock => { + if (!node) return current; + if (isFunctionLike(node)) { + builder.nodeBlock.set(node, current); + return current; + } + + if (isNodeOfType(node, "ConditionalExpression")) { + const afterTest = buildExpression(builder, node.test as EsTreeNode, current); + const consequentBlock = createBlock(builder); + const alternateBlock = createBlock(builder); + const merge = createBlock(builder); + addEdge(afterTest, consequentBlock, "cond"); + addEdge(afterTest, alternateBlock, "cond"); + setTerminal(afterTest, { kind: "ternary", fallthrough: merge }); + const consequentEnd = buildExpression(builder, node.consequent as EsTreeNode, consequentBlock); + const alternateEnd = buildExpression(builder, node.alternate as EsTreeNode, alternateBlock); + addEdge(consequentEnd, merge, "uncond"); + addEdge(alternateEnd, merge, "uncond"); + builder.nodeBlock.set(node, merge); + return merge; + } + + if (isNodeOfType(node, "LogicalExpression") || isLogicalAssignment(node)) { + // The left/target operand is always evaluated; the right operand is + // conditional (short-circuited). From the post-left block one successor + // evaluates the RHS and one skips straight to the join. + const afterLeft = buildExpression(builder, (node as { left: EsTreeNode }).left, current); + const rightBlock = createBlock(builder); + const merge = createBlock(builder); + addEdge(afterLeft, rightBlock, "cond"); + addEdge(afterLeft, merge, "cond"); + setTerminal(afterLeft, { kind: "logical", fallthrough: merge }); + const rightEnd = buildExpression(builder, (node as { right: EsTreeNode }).right, rightBlock); + addEdge(rightEnd, merge, "uncond"); + builder.nodeBlock.set(node, merge); + return merge; + } + + if (isNodeOfType(node, "ChainExpression")) { + // Optional chain (`a?.b.c?.()`). The React Compiler models this with a + // SINGLE shared short-circuit target: any nullish optional link jumps to + // the same continuation (value = undefined). Everything to the right of + // a `?.` is conditional; the chain value is available at `merge`, where + // the short-circuit and the fully-evaluated paths rejoin. + const merge = createBlock(builder); + const chainEnd = buildOptionalChainLink( + builder, + (node as { expression: EsTreeNode }).expression, + current, + merge, + ); + addEdge(chainEnd, merge, "uncond"); + builder.nodeBlock.set(node, merge); + return merge; + } + + // Generic expression: evaluate children left-to-right, threading the + // block so a control-flow child splits the siblings that follow it. The + // node itself completes in the final cursor block. + let cursor = current; + forEachChildNode(node, (child) => { + cursor = buildExpression(builder, child, cursor); + }); + builder.nodeBlock.set(node, cursor); + return cursor; +}; + +// Lower one link of an optional chain in evaluation order (innermost +// object/callee first), branching to the shared `merge` (short-circuit) at +// each optional `?.`. Mirrors the compiler's `lowerOptional*Expression`: +// the base is evaluated unconditionally, then anything to the right of the +// `?.` — a computed property, a deeper access, or a call's arguments — +// evaluates in the conditional continuation. Returns the block where this +// link's value is available on the non-short-circuit path. +const buildOptionalChainLink = ( + builder: CfgBuilder, + node: EsTreeNode, + current: BasicBlock, + merge: BasicBlock, +): BasicBlock => { + if (isNodeOfType(node, "MemberExpression")) { + const afterObject = buildOptionalChainLink(builder, node.object as EsTreeNode, current, merge); + let cursor = afterObject; + if ((node as { optional?: boolean }).optional) { + const continuation = createBlock(builder); + addEdge(afterObject, continuation, "cond"); // base non-nullish → continue + addEdge(afterObject, merge, "cond"); // base nullish → short-circuit + setTerminal(afterObject, { kind: "optional", fallthrough: merge }); + cursor = continuation; + } + // A computed key (`a?.[expr]`) is only evaluated once the base is known + // non-nullish, so it belongs in the post-branch continuation. + if ((node as { computed?: boolean }).computed) { + cursor = buildExpression(builder, node.property as EsTreeNode, cursor); + } + builder.nodeBlock.set(node, cursor); + return cursor; + } + + if (isNodeOfType(node, "CallExpression")) { + const afterCallee = buildOptionalChainLink(builder, node.callee as EsTreeNode, current, merge); + let cursor = afterCallee; + if ((node as { optional?: boolean }).optional) { + const continuation = createBlock(builder); + addEdge(afterCallee, continuation, "cond"); + addEdge(afterCallee, merge, "cond"); + setTerminal(afterCallee, { kind: "optional", fallthrough: merge }); + cursor = continuation; + } + for (const argument of (node as { arguments: ReadonlyArray }).arguments) { + if (isAstNode(argument)) cursor = buildExpression(builder, argument, cursor); + } + builder.nodeBlock.set(node, cursor); + return cursor; + } + + // Chain base (an identifier, a parenthesized expression, a non-optional + // sub-expression): evaluate it normally. + return buildExpression(builder, node, current); +}; + +// Evaluate a sub-expression in `current`, returning the block where its +// value is available. Falls back to the cheap whole-subtree mapping when +// the expression has no embedded control flow. +export const buildSubExpression = ( + builder: CfgBuilder, + node: EsTreeNode | null | undefined, + current: BasicBlock, +): BasicBlock => { + if (!node) return current; + if (containsExpressionControlFlow(node)) return buildExpression(builder, node, current); + mapDescendantsToBlock(builder, node, current); + return current; +}; diff --git a/packages/cfg/src/build/build-function-cfg.ts b/packages/cfg/src/build/build-function-cfg.ts new file mode 100644 index 000000000..d778e7859 --- /dev/null +++ b/packages/cfg/src/build/build-function-cfg.ts @@ -0,0 +1,60 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import type { BasicBlock, FunctionCfg } from "../ir/basic-block.js"; +import { + addEdge, + appendInstruction, + createBlock, + createBuilder, + setTerminal, +} from "./cfg-builder.js"; +import { buildSubExpression } from "./build-expression.js"; +import { buildStatements } from "./build-statement.js"; + +// Back-fill the terminal of every block that merely falls through: a +// single successor becomes a `goto` (normal variant); a block with no +// successor keeps the `unreachable` sentinel (a genuine orphan or the +// function exit). Branching blocks already carry an explicit terminal. +const finalizeTerminals = (blocks: ReadonlyArray, exit: BasicBlock): void => { + for (const block of blocks) { + if (block === exit) continue; + if (block.terminal.kind !== "unreachable") continue; + if (block.successors.length === 1) { + setTerminal(block, { kind: "goto", block: block.successors[0]!.to, variant: "normal" }); + } + } +}; + +export const buildFunctionCfg = (functionNode: EsTreeNode, body: EsTreeNode): FunctionCfg => { + const builder = createBuilder(); + const entry = createBlock(builder); + const exit = createBlock(builder); + builder.entry = entry; + builder.exit = exit; + + let bodyEnd: BasicBlock; + if (isNodeOfType(body, "BlockStatement") || isNodeOfType(body, "Program")) { + bodyEnd = buildStatements(builder, body.body as EsTreeNode[], entry); + } else { + // Arrow expression body: a single Expression. Lower its control flow so + // `() => cond ? useA() : useB()` sees the hooks as conditional. + bodyEnd = buildSubExpression(builder, body, entry); + } + // Implicit return / fall-off the end of the function body. + addEdge(bodyEnd, exit, "uncond"); + if (bodyEnd.terminal.kind === "unreachable") { + appendInstruction(bodyEnd, body, "implicit-return"); + setTerminal(bodyEnd, { kind: "return", argument: null }); + } + finalizeTerminals(builder.blocks, exit); + + const blockOf = (node: EsTreeNode): BasicBlock | null => builder.nodeBlock.get(node) ?? null; + + return { + owner: functionNode, + entry, + exit, + blocks: builder.blocks, + blockOf, + }; +}; diff --git a/packages/cfg/src/build/build-statement.ts b/packages/cfg/src/build/build-statement.ts new file mode 100644 index 000000000..2bf2373f9 --- /dev/null +++ b/packages/cfg/src/build/build-statement.ts @@ -0,0 +1,476 @@ +import type { EsTreeNode } from "../ast/es-tree-node.js"; +import { isNodeOfType } from "../ast/is-node-of-type.js"; +import { isBlockReachableFromBlock } from "../analysis/reachability.js"; +import { isConstantTruthyTest } from "../constant-condition.js"; +import type { BasicBlock } from "../ir/basic-block.js"; +import type { TerminalCase } from "../ir/terminal.js"; +import { + addEdge, + appendInstruction, + appendNode, + createBlock, + mapDescendantsToBlock, + setTerminal, +} from "./cfg-builder.js"; +import type { CfgBuilder } from "./cfg-builder.js"; +import { + buildExpression, + buildSubExpression, + containsExpressionControlFlow, +} from "./build-expression.js"; + +// Returns true if the node introduces internal control flow we want to +// expand into the CFG (rather than treat as a single statement). +const hasInternalControlFlow = (node: EsTreeNode): boolean => { + switch (node.type) { + case "IfStatement": + case "WhileStatement": + case "DoWhileStatement": + case "ForStatement": + case "ForInStatement": + case "ForOfStatement": + case "SwitchStatement": + case "TryStatement": + case "ReturnStatement": + case "ThrowStatement": + case "BreakStatement": + case "ContinueStatement": + case "BlockStatement": + case "LabeledStatement": + return true; + default: + return false; + } +}; + +const findLabel = ( + builder: CfgBuilder, + name: string | null, +): { merge: BasicBlock; header: BasicBlock | null } | null => { + if (name === null) { + // Unlabeled break/continue → innermost loop or switch. + if (builder.loopStack.length > 0) { + const top = builder.loopStack[builder.loopStack.length - 1]!; + return { merge: top.merge, header: top.header }; + } + if (builder.switchStack.length > 0) { + const top = builder.switchStack[builder.switchStack.length - 1]!; + return { merge: top.merge, header: null }; + } + return null; + } + for (let index = builder.labelStack.length - 1; index >= 0; index--) { + const entry = builder.labelStack[index]!; + if (entry.label === name) return { merge: entry.merge, header: entry.header }; + } + return null; +}; + +// A loop wrapped by one or more `LabeledStatement`s is the `continue