johnsoncodehk
diff --git a/‎src/emit-lexer.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/emit-lexer.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/emit-parser.ts‎
Lines changed: 25 additions & 6 deletions b/‎src/emit-parser.ts‎
Lines changed: 25 additions & 6 deletions
diff --git a/‎src/emit-portable.ts‎
Lines changed: 7 additions & 12 deletions b/‎src/emit-portable.ts‎
Lines changed: 7 additions & 12 deletions
diff --git a/‎src/emit.ts‎
Lines changed: 33 additions & 0 deletions b/‎src/emit.ts‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎src/target-go.ts‎
Lines changed: 11 additions & 5 deletions b/‎src/target-go.ts‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎src/target-rust.ts‎
Lines changed: 11 additions & 5 deletions b/‎src/target-rust.ts‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎src/target-ts.ts‎
Lines changed: 10 additions & 3 deletions b/‎src/target-ts.ts‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎test/cst-match-totality.ts‎
Lines changed: 2 additions & 2 deletions b/‎test/cst-match-totality.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/emit-lexer-verify.ts‎
Lines changed: 2 additions & 2 deletions b/‎test/emit-lexer-verify.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/emit-parser-bench.ts‎
Lines changed: 2 additions & 2 deletions b/‎test/emit-parser-bench.ts‎
Lines changed: 2 additions & 2 deletions
@@ -45,7 +45,7 @@ const NON_ASCII_WS_FN =
 const nonAsciiWsConsume = (v: string, cont: boolean, indent: string): string =>
   `${indent}if (${v} > 127 && lxNonAsciiWs(${v})) { LX_WS.lastIndex = pos; const m = LX_WS.exec(source); if (m !== null) { if (m[0].includes('\\n')) pendingNl = true; pos += m[0].length;${cont ? ' continue;' : ''} } }`;
 
-export function emitLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
+export function emitSoaLexer(grammar: CstGrammar, st: LexerSymtab): string | null {
   // Out of scope: the markup / indentation / newline state machines.
   if (grammar.markup || grammar.indent || grammar.newline) return null;
   if (grammar.tokens.some(t => tokenBlockPatternSource(t) || t.blockOnly)) return null;
 
@@ -27,7 +27,8 @@
 import type { CstGrammar, RuleExpr, RuleDecl } from './types.ts';
 import { isKeywordLiteral, collectLiterals } from './grammar-utils.ts';
 import { analyzeGrammar, findEntryRule, type Sec } from './grammar-analysis.ts';
-import { emitLexer } from './emit-lexer.ts';
+import { emitSoaLexer } from './emit-lexer.ts';
+import type { Target } from './emit.ts';
 import { withAwaitYield } from './await-yield-fork.ts';
 
 // ── Static analysis ──
@@ -1092,7 +1093,28 @@ class Emitter {
 
 // ── Top-level emit ──
 
-export function emitParser(grammar: CstGrammar): string {
+// The `js` Target: the optimized SoA-int parser/lexer, wrapped behind the same two-method
+// Target contract as the portable ts/go/rust targets (see emit.ts). `emitJsLexer` derives the
+// standalone lexer; `emitJsParser` embeds whatever lexer source it is handed. Splitting the
+// lexer COMPUTATION from its EMBEDDING leaves the emitted bytes identical (both re-derive the
+// same deterministic symtab), so `emit-parser-verify` stays byte-for-byte.
+export const jsTarget: Target = {
+  name: 'javascript',
+  ext: 'js',
+  emitLexer: emitJsLexer,
+  emitParser: emitJsParser,
+};
+
+export function emitJsLexer(grammar: CstGrammar): string | null {
+  grammar = withAwaitYield(grammar);
+  const st = analyze(grammar).symtab;
+  return emitSoaLexer(grammar, {
+    typeKind: st.typeKind, kwLitKind: st.kwLitKind, puLitKind: st.puLitKind,
+    KIND_PUNCT: st.KIND_PUNCT, KIND_NAMED_FALLBACK: st.KIND_NAMED_FALLBACK,
+  });
+}
+
+export function emitJsParser(grammar: CstGrammar, lexSrc: string | null): string {
   // [Await]/[Yield] context: name-fork the body-reachable rule closure into $A/$Y/$AY
   // families (see await-yield-fork.ts). No-op for a grammar with no ctx markers. Done
   // HERE (not at grammar export) so the forks exist ONLY in the parser's rule identity
@@ -1127,11 +1149,8 @@ export function emitParser(grammar: CstGrammar): string {
   // The lexer: EMITTED (specialized, standalone — see emit-lexer.ts) when the grammar
   // is a plain token stream; the data-driven createLexer runtime otherwise
   // (markup/indent/newline state machines stay interpreter-only).
+  // `lexSrc` is handed in by the Target façade (emitParser reuses emitLexer) — see emit.ts.
   const st = a.symtab;
-  const lexSrc = emitLexer(grammar, {
-    typeKind: st.typeKind, kwLitKind: st.kwLitKind, puLitKind: st.puLitKind,
-    KIND_PUNCT: st.KIND_PUNCT, KIND_NAMED_FALLBACK: st.KIND_NAMED_FALLBACK,
-  });
   e.soa = lexSrc !== null;
   if (!lexSrc) {
     e.emit(`import { createLexer } from ${J(resolveLexerImport())};`);
 
@@ -1,6 +1,6 @@
 // ── emit-portable ──
 //
-// The target-agnostic emitter (issue #6). `emitPortableParser(grammar, target)` derives
+// The target-agnostic emitter (issue #6). `emitParser(grammar, target)` (see emit.ts) derives
 // a COMPLETE, self-contained parser in the target's language from the same CstGrammar the
 // TS engine uses. It is the agnosticism proof: ONE analysis → ONE intermediate form (IR)
 // → N language renderings, all producing the byte-identical CST the interpreter does.
@@ -120,17 +120,12 @@ export type ParserIR = {
   tpl: TplCfg | null;          // null unless the grammar has a template token
 };
 
-export interface Target {
-  name: string;
-  ext: string;                       // emitted file extension (no dot)
-  render(ir: ParserIR): string;      // the complete, compilable source
-}
-
-export function emitPortableParser(grammar: CstGrammar, target: Target): string {
-  // Apply the [Await]/[Yield] context fork exactly as createParser does, so `await`/`yield`
-  // are keywords inside async/generator bodies and identifiers outside — name-forked into
-  // $A/$Y/$AY rule families. Every other consumer (and the portable parser) sees plain rules.
-  return target.render(buildIR(withAwaitYield(grammar)));
+// The target-agnostic parse plan for a grammar. Applies the [Await]/[Yield] context fork
+// exactly as createParser does (so `await`/`yield` are keywords inside async/generator bodies
+// and identifiers outside — name-forked into $A/$Y/$AY rule families), then builds the IR each
+// portable Target (ts/go/rust) renders. The `Target` contract itself lives in emit.ts.
+export function portableIR(grammar: CstGrammar): ParserIR {
+  return buildIR(withAwaitYield(grammar));
 }
 
 // ── buildIR: grammar + analysis → the target-agnostic parse plan ──
 
@@ -0,0 +1,33 @@
+// The emit layer's public surface: exactly two APIs, both parameterized by a `Target`.
+//
+//   emitLexer(grammar, target)  → the lexer source for that target
+//   emitParser(grammar, target) → the parser source for that target, REUSING emitLexer
+//
+// A `Target` owns BOTH halves, so emitParser(grammar, target) reuses the SAME target's lexer —
+// jsTarget's parser embeds jsTarget's SoA-int lexer, goTarget's parser embeds goTarget's
+// Tok-list lexer. No cross-target lexer format is shared, so the optimized JS path keeps its
+// integer-bitmask token dispatch while the portable targets keep their clean byte scanner.
+//
+// Targets: `jsTarget` (the optimized SoA parser, emit-parser.ts) and the portable
+// `tsTarget`/`goTarget`/`rustTarget` (emit-portable.ts + target-*.ts).
+import type { CstGrammar } from './types.ts';
+
+export interface Target {
+  name: string;
+  ext: string;                                                  // emitted file extension (no dot)
+  emitLexer(grammar: CstGrammar): string | null;               // null ⇒ runtime-lexer fallback (jsTarget markup/indent grammars)
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string;   // the parser, embedding `lexerSrc`
+}
+
+export function emitLexer(grammar: CstGrammar, target: Target): string | null {
+  return target.emitLexer(grammar);
+}
+
+export function emitParser(grammar: CstGrammar, target: Target): string {
+  return target.emitParser(grammar, emitLexer(grammar, target));   // ← parser reuses lexer
+}
+
+export { jsTarget } from './emit-parser.ts';
+export { tsTarget } from './target-ts.ts';
+export { goTarget } from './target-go.ts';
+export { rustTarget } from './target-rust.ts';
@@ -1,16 +1,18 @@
 // The Go Target for emit-portable. Renders the same language-agnostic ParserIR as tsTarget
 // into a self-contained Go program (Go stdlib only — the lexer is regex-free, so it compiles
 // with no module dependencies). Its CST JSON is checked byte-for-byte against the interpreter,
-// so `emitPortableParser(grammar, goTarget)` is a real, verified Go parser derived from the
+// so `emitParser(grammar, goTarget)` is a real, verified Go parser derived from the
 // same grammar definition.
 //
 // ARENA allocation (to minimise GC pressure, as tsgo does): nodes live in a flat `nodes []Node`,
 // their children in a flat `kids []int32`, and in-progress children accumulate on a `scratch`
 // stack. A node is an int32 index, never a heap pointer. Backtracking truncates the three
 // slices to saved lengths; the slices keep their capacity across parses (reset to len 0), so a
 // warmed parser allocates ~nothing per parse.
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
-import type { TokenPattern } from './types.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, TplCfg } from './emit-portable.ts';
+import { portableIR } from './emit-portable.ts';
+import type { Target } from './emit.ts';
+import type { TokenPattern, CstGrammar } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -290,7 +292,11 @@ ${r.nudSeqs.map((seq) => `\t{ save := pos; sb := len(scratch); nb := len(nodes);
 export const goTarget: Target = {
   name: 'go',
   ext: 'go',
-  render(ir: ParserIR): string {
+  emitLexer(grammar: CstGrammar): string {
+    return lexer(portableIR(grammar));
+  },
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string {
+    const ir = portableIR(grammar);
     const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
     const matchTemplate = ir.tpl ? `func matchTemplate() int32 {
 \tt := peek()
@@ -344,7 +350,7 @@ var nodes []Node
 var kids []int32
 var scratch []int32
 
-${lexer(ir)}
+${lexerSrc ?? ''}
 
 func peek() *Tok {
 \tif pos < len(toks) { return &toks[pos] }
 
@@ -1,7 +1,7 @@
 // The Rust Target for emit-portable. Renders the same language-agnostic ParserIR as
 // tsTarget/goTarget into a self-contained Rust program (no external crates — the lexer is
 // regex-free, so it compiles with rustc alone, no Cargo/network). Its CST JSON is checked
-// byte-for-byte against the interpreter, so `emitPortableParser(grammar, rustTarget)` is a
+// byte-for-byte against the interpreter, so `emitParser(grammar, rustTarget)` is a
 // real, verified Rust parser derived from the same grammar definition.
 //
 // Rust ownership note: a CST node is OWNED (moved), unlike the TS/Go pointer trees. In the
@@ -11,8 +11,10 @@
 // returns it. Sub-sequence combinators (star/opt/sep) take non-capturing fn pointers
 // `fn(&mut Parser, &mut Vec<Cst>) -> bool`, threading the parser + kids as params (so nothing
 // is captured, sidestepping the borrow checker).
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
-import type { TokenPattern } from './types.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, TplCfg } from './emit-portable.ts';
+import { portableIR } from './emit-portable.ts';
+import type { Target } from './emit.ts';
+import type { TokenPattern, CstGrammar } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -312,7 +314,11 @@ ${r.nudSeqs.map((seq) => `        { let save = self.pos; let mut kids: Vec<Cst>
 export const rustTarget: Target = {
   name: 'rust',
   ext: 'rs',
-  render(ir: ParserIR): string {
+  emitLexer(grammar: CstGrammar): string {
+    return lexer(portableIR(grammar));
+  },
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string {
+    const ir = portableIR(grammar);
     const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
     const matchTemplate = ir.tpl ? `    fn match_template(&mut self) -> Option<Cst> {
         let t = self.peek()?;
@@ -350,7 +356,7 @@ impl Cst {
 // offset/end inferred from first/last child (children non-empty).
 fn node(rule: &'static str, kids: Vec<Cst>) -> Cst { let o = kids[0].offset; let e = kids[kids.len() - 1].end; Cst::node(rule, kids, o, e) }
 
-${lexer(ir)}
+${lexerSrc ?? ''}
 
 struct Parser<'a> { toks: Vec<Tok<'a>>, pos: usize, capped: bool, suppress_next: Vec<&'static str>, src: &'a str }
 impl<'a> Parser<'a> {
 
@@ -4,7 +4,10 @@
 // index LEDs), and a CST→JSON printer over stdin. It is the reference rendering — its CST
 // is checked byte-for-byte against the interpreter (createParser), so a divergence in the
 // portable logic surfaces here before Go/Rust are compiled.
-import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, Target, TplCfg } from './emit-portable.ts';
+import type { ParserIR, RdRule, PrattRule, Step, Bracket, CharRange, LexTok, TplCfg } from './emit-portable.ts';
+import { portableIR } from './emit-portable.ts';
+import type { Target } from './emit.ts';
+import type { CstGrammar } from './types.ts';
 
 const J = (v: unknown) => JSON.stringify(v);
 const rangeCond = (v: string, rs: CharRange[]) =>
@@ -267,7 +270,11 @@ ${r.nudSeqs.map((seq) => `  { const save = pos; const kids: Cst[] = []; if (${se
 export const tsTarget: Target = {
   name: 'typescript',
   ext: 'ts',
-  render(ir: ParserIR): string {
+  emitLexer(grammar: CstGrammar): string {
+    return lexer(portableIR(grammar));
+  },
+  emitParser(grammar: CstGrammar, lexerSrc: string | null): string {
+    const ir = portableIR(grammar);
     const ruleFns = ir.rules.map((r) => (r.kind === 'pratt' ? prattRule(r, ir.tpl) : rdRule(r))).join('\n\n');
     const matchTemplate = ir.tpl ? `function matchTemplate(): Cst | null {
   const t = peek();
@@ -296,7 +303,7 @@ type Leaf = { tokenType: string; offset: number; end: number };
 type Node = { rule: string; children: Cst[]; offset: number; end: number };
 type Cst = Node | Leaf;
 
-${lexer(ir)}
+${lexerSrc ?? ''}
 
 let toks: Tok[] = [];
 let pos = 0;
 
@@ -13,7 +13,7 @@
 //   node test/cst-match-totality.ts
 import { existsSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { generateInputs } from './grammar-gen.ts';
 
 const GRAMMARS = ['typescript', 'javascript', 'typescriptreact', 'javascriptreact', 'yaml', 'html'];
@@ -52,7 +52,7 @@ for (const name of GRAMMARS) {
   const grammar = (await import(`../${name}.ts`)).default;
   const matchers = (await import(`../${name}.cst-match.ts`)).MATCHERS;
   const emPath = `/tmp/emitted-totality-${name}.mts`;
-  writeFileSync(emPath, emitParser(grammar));
+  writeFileSync(emPath, emitParser(grammar, jsTarget));
   const em = (await import(emPath + '?v=' + process.pid)) as Emitted;
   let parsed = 0;
   for (const input of generateInputs(grammar, { depth: 5, nestDepth: 5, cap: 7, fuzzRounds: 250, maxInputs: 1500, seed: 5 })) {
 
@@ -9,14 +9,14 @@
 //   node test/emit-lexer-verify.ts            # in-repo corpus (+ /tmp/ts-repo if present)
 import { readFileSync, writeFileSync } from 'node:fs';
 import { createLexer } from '../src/gen-lexer.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { inRepoCorpus, externalTsFiles } from './emit-corpus.ts';
 
 const grammar = (await import('../typescript.ts')).default;
 
 // The reference: createLexer with the SAME intern config the emitted parser bakes.
 const EMITTED = '/tmp/emit-lexer-verify-parser.mts';
-writeFileSync(EMITTED, emitParser(grammar));
+writeFileSync(EMITTED, emitParser(grammar, jsTarget));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 const src = readFileSync(EMITTED, 'utf-8');
 if (src.includes('createLexer(')) {
 
@@ -9,14 +9,14 @@
 //   node test/emit-parser-bench.ts            # the 4 bench files, N=20
 //   node test/emit-parser-bench.ts <N>        # custom timed-run count
 import { createParser } from '../src/gen-parser.ts';
-import { emitParser } from '../src/emit-parser.ts';
+import { emitParser, jsTarget } from '../src/emit.ts';
 import { readFileSync, writeFileSync } from 'fs';
 
 const grammar = (await import('../typescript.ts')).default;
 const oracle = createParser(grammar);
 
 const EMITTED = '/tmp/emitted-parser.mts';
-writeFileSync(EMITTED, emitParser(grammar));
+writeFileSync(EMITTED, emitParser(grammar, jsTarget));
 const emitted = await import(EMITTED + '?v=' + Date.now());
 
 const N = Number(process.argv[2]) || 20;