Issue #6: target-agnostic emitter — emitParser/emitLexer for JS/TS/Go/Rust #351
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [master] | |
| pull_request: | |
| # Nightly + on-demand FULL run: the tree-sitter job below only generates when tree-sitter/** | |
| # changed (the materialized grammar is its sole input), so these backstop the one input it can't | |
| # see in that diff — a tree-sitter-cli bump (lockfile) — and re-verify the "beats official" claim. | |
| schedule: | |
| - cron: '0 9 * * *' | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| with: | |
| submodules: true | |
| # Node 24+ runs the .ts sources directly (native type stripping) — no build, no tsx. | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: 24 | |
| - run: npm ci | |
| # Regenerate every grammar's artifacts FIRST: the uncommitted one | |
| # (*.cst-match.ts, gitignored) must exist before Typecheck | |
| # and the gates, which import it. Then fail if any COMMITTED artifact | |
| # drifts from the regenerated output (someone edited a grammar but forgot | |
| # to regenerate). Covers all grammars (sources at the repo root) + the | |
| # tree-sitter packages. | |
| - name: Generate editor artifacts (committed ones must be in sync) | |
| run: | | |
| npm run gen | |
| git diff --exit-code -- '*.tmLanguage.json' '*.language-configuration.json' '*.monarch.json' '*.contributes.json' tree-sitter | |
| - name: Typecheck | |
| run: npx tsc --noEmit | |
| # Every correctness GATE through ONE runner — sanity / agnostic / conformance / | |
| # highlighter / vue / yaml / the generative scope≡role check / the gap-ledger selftest | |
| # + --check (stale KNOWN-GAPS.md fails). One ✓/✗ summary, one exit code. The comparative | |
| # METRICS (scope-gap / src-coverage) and BENCH tools need the external TS corpus / VS Code | |
| # grammars and run in the readme-bench workflow, not here. See TESTING.md for the taxonomy. | |
| - name: Test | |
| run: npm run check | |
| # Engine-parity BREADTH guard. The `test` job already runs the three parity gates | |
| # (emit-parser-verify / emit-reject-messages / emit-lexer-verify) on the corpus-free | |
| # in-repo corpus — that is the standing mechanism that forces a gen-parser change to | |
| # propagate to emit-parser. This job adds the full external TS corpus for breadth, so a | |
| # divergence on some construct the in-repo corpus does not exercise still gets caught. | |
| # Gated on parser/grammar changes (like the treesitter job) so it doesn't clone the | |
| # corpus on doc-only pushes; schedule / workflow_dispatch force the full run. | |
| emit-parity: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| with: | |
| fetch-depth: 0 # need history to diff against the base for the path gate below | |
| - name: Did the parser/grammar inputs change? | |
| id: changed | |
| run: | | |
| if [ "${{ github.event_name }}" != "push" ] && [ "${{ github.event_name }}" != "pull_request" ]; then | |
| echo "value=true" >> "$GITHUB_OUTPUT"; echo "forced full run (${{ github.event_name }})"; exit 0 | |
| fi | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then base="${{ github.event.pull_request.base.sha }}"; else base="${{ github.event.before }}"; fi | |
| if [ -z "$base" ] || ! git cat-file -e "$base^{commit}" 2>/dev/null; then | |
| echo "value=true" >> "$GITHUB_OUTPUT"; echo "no usable base — running the gate"; exit 0 | |
| fi | |
| if git diff --name-only "$base" HEAD | grep -qE '^src/|^[^/]+\.ts$|^test/emit-'; then | |
| echo "value=true" >> "$GITHUB_OUTPUT"; echo "parser/grammar changed — running the breadth gate" | |
| else | |
| echo "value=false" >> "$GITHUB_OUTPUT"; echo "no parser/grammar change — skipping the corpus clone" | |
| fi | |
| - uses: actions/setup-node@v4 | |
| if: steps.changed.outputs.value == 'true' | |
| with: | |
| node-version: 24 | |
| - if: steps.changed.outputs.value == 'true' | |
| run: npm ci | |
| # Pinned-SHA, shallow, sparse clone of the TS conformance corpus to the fixed path the | |
| # parity gates auto-detect (same pin + technique as the readme-bench workflow). | |
| - name: Clone the pinned TS corpus | |
| if: steps.changed.outputs.value == 'true' | |
| run: | | |
| set -euo pipefail | |
| rm -rf /tmp/ts-repo; mkdir -p /tmp/ts-repo | |
| git -C /tmp/ts-repo init -q | |
| git -C /tmp/ts-repo remote add origin https://github.com/microsoft/TypeScript | |
| git -C /tmp/ts-repo config core.sparseCheckout true | |
| printf 'tests/cases/\n' > /tmp/ts-repo/.git/info/sparse-checkout | |
| git -C /tmp/ts-repo fetch -q --depth 1 --filter=blob:none origin 6fbce89821d93a5b761581d9ac540455f38e9acb | |
| git -C /tmp/ts-repo checkout -q FETCH_HEAD | |
| - name: Engine-parity over the full corpus | |
| if: steps.changed.outputs.value == 'true' | |
| run: | | |
| node test/emit-parser-verify.ts all | |
| node test/emit-reject-messages.ts | |
| node test/emit-lexer-verify.ts | |
| # The derived tree-sitter highlighter is the strongest thesis proof (a real GLR | |
| # parser from the same grammar, beating the official hand-written one). Build its | |
| # wasm and gate the accuracy so the 95.9% is verified, not just claimed. The | |
| # tree-sitter CLI bundles its own wasm toolchain — no emscripten/docker needed. | |
| treesitter: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| with: | |
| fetch-depth: 0 # need history to diff against the base for the path gate below | |
| # `tree-sitter generate` is ~5 min for the TS grammar (issue #46: the state count is at the | |
| # floor for a unified-grammar-derived parser, so the cost is irreducible) — but the generated | |
| # parser is a PURE FUNCTION of the committed tree-sitter/** (grammar.js + scanner.c + queries), | |
| # and the `test` job fails if those drift from the grammar sources, so EVERY grammar change | |
| # necessarily lands as a tree-sitter/** diff. Re-running generate when nothing under | |
| # tree-sitter/** changed is pure waste, so gate the expensive steps on it. The job still RUNS | |
| # (reports success) — only the steps are skipped — so a required status check is never pending. | |
| # schedule / workflow_dispatch force the full run regardless (the lockfile/cli-bump backstop). | |
| - name: Did the tree-sitter inputs change? | |
| id: changed | |
| run: | | |
| if [ "${{ github.event_name }}" != "push" ] && [ "${{ github.event_name }}" != "pull_request" ]; then | |
| echo "value=true" >> "$GITHUB_OUTPUT"; echo "forced full run (${{ github.event_name }})"; exit 0 | |
| fi | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then base="${{ github.event.pull_request.base.sha }}"; else base="${{ github.event.before }}"; fi | |
| if [ -z "$base" ] || ! git cat-file -e "$base^{commit}" 2>/dev/null; then | |
| echo "value=true" >> "$GITHUB_OUTPUT"; echo "no usable base — running the gate"; exit 0 | |
| fi | |
| if git diff --name-only "$base" HEAD | grep -qE '^tree-sitter/'; then | |
| echo "value=true" >> "$GITHUB_OUTPUT"; echo "tree-sitter/** changed — running the gate" | |
| else | |
| echo "value=false" >> "$GITHUB_OUTPUT"; echo "no tree-sitter/** change — skipping generate/build/bench" | |
| fi | |
| - uses: actions/setup-node@v4 | |
| if: steps.changed.outputs.value == 'true' | |
| with: | |
| node-version: 24 | |
| - if: steps.changed.outputs.value == 'true' | |
| run: npm ci | |
| # Conflict gate: `tree-sitter generate` for every derived grammar IN PARALLEL (was sequential | |
| # ~12 min; parallel ≈ the slowest single grammar, ts/tsx ~5 min). A conflict introduced by a | |
| # grammar change is caught even for the dialects whose wasm is not built below (tsx/js/jsx) — | |
| # exactly the gap that once let an unresolved `type`/`class_heritage` conflict ship. yaml | |
| # included (issue #3): its indent/scalar externals + C scanner make it generate + build. | |
| - name: Generate every derived tree-sitter grammar (parallel conflict gate) | |
| if: steps.changed.outputs.value == 'true' | |
| run: | | |
| langs=(typescript typescriptreact javascript javascriptreact html yaml) | |
| pids=() | |
| for g in "${langs[@]}"; do | |
| ( cd "tree-sitter/$g" && npx tree-sitter generate ) >"/tmp/gen-$g.log" 2>&1 & | |
| pids+=($!) | |
| done | |
| fail=0 | |
| for i in "${!langs[@]}"; do | |
| if wait "${pids[$i]}"; then echo "✓ ${langs[$i]}"; else echo "✗ ${langs[$i]}"; cat "/tmp/gen-${langs[$i]}.log"; fail=1; fi | |
| done | |
| exit $fail | |
| # Build the gated wasms FROM the parser.c just generated (no re-generate) and run the accuracy | |
| # benches: ts must beat official (the thesis proof), html vs parse5. The YAML wasm is built to | |
| # prove its C indentation scanner compiles + links; its accuracy bench needs the yaml-test-suite | |
| # checkout, so it runs in the readme-bench workflow. | |
| - name: Build wasm + accuracy gate (typescript / html / yaml) | |
| if: steps.changed.outputs.value == 'true' | |
| run: | | |
| ( cd tree-sitter/typescript && npx tree-sitter build --wasm . ) | |
| ( cd tree-sitter/html && npx tree-sitter build --wasm . ) | |
| ( cd tree-sitter/yaml && npx tree-sitter build --wasm . ) | |
| node test/treesitter-bench.ts | |
| node test/html-treesitter.ts |