From 5cdff763a58f517d64a5102bfa7ae1bcd2896d12 Mon Sep 17 00:00:00 2001 From: MayerTim Date: Wed, 10 Jun 2026 20:03:08 +0200 Subject: [PATCH 1/5] test(formatter): add corpus fixture test harness --- test/corpus/README.md | 11 +++ .../watcom/basic-procedure.expected.sql | 11 +++ test/corpus/watcom/basic-procedure.input.sql | 9 +++ test/runFormatterCorpusTests.js | 81 +++++++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 test/corpus/README.md create mode 100644 test/corpus/watcom/basic-procedure.expected.sql create mode 100644 test/corpus/watcom/basic-procedure.input.sql create mode 100644 test/runFormatterCorpusTests.js diff --git a/test/corpus/README.md b/test/corpus/README.md new file mode 100644 index 0000000..a47b76a --- /dev/null +++ b/test/corpus/README.md @@ -0,0 +1,11 @@ +# Formatter corpus fixtures + +Corpus fixtures are representative SQL samples that document current formatter behavior. +Each fixture has an `.input.sql` file and a matching `.expected.sql` file. + +The corpus runner checks two things for every fixture pair: + +1. formatting the input produces the expected output +2. formatting the expected output leaves it unchanged + +Use sanitized, public-safe SQL only. These fixtures are regression tests, not product roadmap notes. diff --git a/test/corpus/watcom/basic-procedure.expected.sql b/test/corpus/watcom/basic-procedure.expected.sql new file mode 100644 index 0000000..f5cdbbb --- /dev/null +++ b/test/corpus/watcom/basic-procedure.expected.sql @@ -0,0 +1,11 @@ +CREATE PROCEDURE dbo.customer_summary( + IN customer_id integer +) +BEGIN + SELECT TODAY(), customer_id; + IF customer_id IS NULL THEN + SELECT 'missing customer'; + ELSE + SELECT 'loaded customer'; + ENDIF; +END; diff --git a/test/corpus/watcom/basic-procedure.input.sql b/test/corpus/watcom/basic-procedure.input.sql new file mode 100644 index 0000000..b2737e4 --- /dev/null +++ b/test/corpus/watcom/basic-procedure.input.sql @@ -0,0 +1,9 @@ +create procedure dbo.customer_summary(in customer_id integer) +begin +select today(), customer_id; +if customer_id is null then +select 'missing customer'; +else +select 'loaded customer'; +endif; +end; diff --git a/test/runFormatterCorpusTests.js b/test/runFormatterCorpusTests.js new file mode 100644 index 0000000..f0cdbba --- /dev/null +++ b/test/runFormatterCorpusTests.js @@ -0,0 +1,81 @@ +const fs = require('fs'); +const path = require('path'); + +const { assert, runTest } = require('./helpers/runTest'); +const { formatSql, watcomDialect, mssqlDialect, defaultOptions } = require('./formatter/helpers'); + +const corpusRoot = path.join(__dirname, 'corpus'); +const dialectsByCorpusDirectory = new Map([ + ['watcom', watcomDialect], + ['mssql', mssqlDialect], +]); + +function listInputFixtures(directory) { + if (!fs.existsSync(directory)) { + return []; + } + + return fs + .readdirSync(directory, { withFileTypes: true }) + .flatMap((entry) => { + const entryPath = path.join(directory, entry.name); + + if (entry.isDirectory()) { + return listInputFixtures(entryPath); + } + + return entry.isFile() && entry.name.endsWith('.input.sql') ? [entryPath] : []; + }) + .sort((left, right) => left.localeCompare(right)); +} + +function getExpectedFixturePath(inputPath) { + return inputPath.replace(/\.input\.sql$/u, '.expected.sql'); +} + +function getDialectForFixture(inputPath) { + const relativePath = path.relative(corpusRoot, inputPath); + const corpusDirectory = relativePath.split(path.sep)[0]; + const dialect = dialectsByCorpusDirectory.get(corpusDirectory); + + if (!dialect) { + throw new Error(`No corpus dialect is configured for '${corpusDirectory}'.`); + } + + return dialect; +} + +function getFixtureName(inputPath) { + return path.relative(corpusRoot, inputPath).replace(/\.input\.sql$/u, ''); +} + +const inputFixtures = listInputFixtures(corpusRoot); + +if (inputFixtures.length === 0) { + throw new Error(`No formatter corpus fixtures found under ${corpusRoot}.`); +} + +for (const inputPath of inputFixtures) { + const expectedPath = getExpectedFixturePath(inputPath); + const fixtureName = getFixtureName(inputPath); + + runTest(`formatter corpus: ${fixtureName}`, () => { + assert.ok( + fs.existsSync(expectedPath), + `Expected corpus fixture is missing for ${path.relative(process.cwd(), inputPath)}.`, + ); + + const input = fs.readFileSync(inputPath, 'utf8'); + const expected = fs.readFileSync(expectedPath, 'utf8'); + const dialect = getDialectForFixture(inputPath); + const result = formatSql(input, dialect, defaultOptions); + const idempotentResult = formatSql(expected, dialect, defaultOptions); + + assert.equal(result.text, expected, `${fixtureName} did not format to the expected output.`); + assert.equal( + idempotentResult.text, + expected, + `${fixtureName} expected output is not idempotent.`, + ); + }); +} From 81f9182aeb35d08103db977c2357231a43180e4e Mon Sep 17 00:00:00 2001 From: MayerTim Date: Wed, 10 Jun 2026 20:03:11 +0200 Subject: [PATCH 2/5] test(formatter): add Watcom corpus fixtures for core SQL shapes --- test/corpus/watcom/case-expression.expected.sql | 12 ++++++++++++ test/corpus/watcom/case-expression.input.sql | 4 ++++ test/corpus/watcom/cursor-loop.expected.sql | 12 ++++++++++++ test/corpus/watcom/cursor-loop.input.sql | 8 ++++++++ test/corpus/watcom/ddl-parentheses.expected.sql | 6 ++++++ test/corpus/watcom/ddl-parentheses.input.sql | 6 ++++++ test/corpus/watcom/if-else-block.expected.sql | 16 ++++++++++++++++ test/corpus/watcom/if-else-block.input.sql | 10 ++++++++++ test/corpus/watcom/query-clauses.expected.sql | 17 +++++++++++++++++ test/corpus/watcom/query-clauses.input.sql | 4 ++++ test/corpus/watcom/union-all.expected.sql | 11 +++++++++++ test/corpus/watcom/union-all.input.sql | 4 ++++ 12 files changed, 110 insertions(+) create mode 100644 test/corpus/watcom/case-expression.expected.sql create mode 100644 test/corpus/watcom/case-expression.input.sql create mode 100644 test/corpus/watcom/cursor-loop.expected.sql create mode 100644 test/corpus/watcom/cursor-loop.input.sql create mode 100644 test/corpus/watcom/ddl-parentheses.expected.sql create mode 100644 test/corpus/watcom/ddl-parentheses.input.sql create mode 100644 test/corpus/watcom/if-else-block.expected.sql create mode 100644 test/corpus/watcom/if-else-block.input.sql create mode 100644 test/corpus/watcom/query-clauses.expected.sql create mode 100644 test/corpus/watcom/query-clauses.input.sql create mode 100644 test/corpus/watcom/union-all.expected.sql create mode 100644 test/corpus/watcom/union-all.input.sql diff --git a/test/corpus/watcom/case-expression.expected.sql b/test/corpus/watcom/case-expression.expected.sql new file mode 100644 index 0000000..ec15f56 --- /dev/null +++ b/test/corpus/watcom/case-expression.expected.sql @@ -0,0 +1,12 @@ +CREATE PROCEDURE dbo.classify_customer( + IN score integer +) +BEGIN + SELECT CASE + WHEN score >= 90 + THEN 'gold' + WHEN score >= 50 + THEN 'silver' + ELSE 'standard' + END AS customer_class; +END; diff --git a/test/corpus/watcom/case-expression.input.sql b/test/corpus/watcom/case-expression.input.sql new file mode 100644 index 0000000..6132350 --- /dev/null +++ b/test/corpus/watcom/case-expression.input.sql @@ -0,0 +1,4 @@ +create procedure dbo.classify_customer(in score integer) +begin +select case when score >= 90 then 'gold' when score >= 50 then 'silver' else 'standard' end as customer_class; +end; diff --git a/test/corpus/watcom/cursor-loop.expected.sql b/test/corpus/watcom/cursor-loop.expected.sql new file mode 100644 index 0000000..b523dfb --- /dev/null +++ b/test/corpus/watcom/cursor-loop.expected.sql @@ -0,0 +1,12 @@ +CREATE PROCEDURE dbo.recalculate_totals() +BEGIN + FOR order_cursor AS orders insensitive CURSOR FOR + SELECT order_id, customer_id + FROM dbo.orders + WHERE active = 1 + ORDER BY order_id + DO + UPDATE dbo.orders SET total = 0 + WHERE order_id = order_cursor.order_id; + END FOR; +END; diff --git a/test/corpus/watcom/cursor-loop.input.sql b/test/corpus/watcom/cursor-loop.input.sql new file mode 100644 index 0000000..ba1ad49 --- /dev/null +++ b/test/corpus/watcom/cursor-loop.input.sql @@ -0,0 +1,8 @@ +create procedure dbo.recalculate_totals() +begin +for order_cursor as orders insensitive cursor for +select order_id, customer_id from dbo.orders where active = 1 order by order_id +do +update dbo.orders set total = 0 where order_id = order_cursor.order_id; +end for; +end; diff --git a/test/corpus/watcom/ddl-parentheses.expected.sql b/test/corpus/watcom/ddl-parentheses.expected.sql new file mode 100644 index 0000000..5b4b19f --- /dev/null +++ b/test/corpus/watcom/ddl-parentheses.expected.sql @@ -0,0 +1,6 @@ +CREATE TABLE dbo.audit_log( + id integer NOT NULL, + created_at TIMESTAMP NOT NULL, + description varchar(255) NULL, + amount numeric(12, 2) NULL +); diff --git a/test/corpus/watcom/ddl-parentheses.input.sql b/test/corpus/watcom/ddl-parentheses.input.sql new file mode 100644 index 0000000..899c356 --- /dev/null +++ b/test/corpus/watcom/ddl-parentheses.input.sql @@ -0,0 +1,6 @@ +create table dbo.audit_log( +id integer not null, +created_at timestamp not null, +description varchar(255) null, +amount numeric(12,2) null +); diff --git a/test/corpus/watcom/if-else-block.expected.sql b/test/corpus/watcom/if-else-block.expected.sql new file mode 100644 index 0000000..9cd3d4b --- /dev/null +++ b/test/corpus/watcom/if-else-block.expected.sql @@ -0,0 +1,16 @@ +CREATE PROCEDURE dbo.update_status( + IN item_id integer, + IN status_code integer +) +BEGIN + IF status_code = 1 THEN + UPDATE dbo.items SET state = 'new' + WHERE id = item_id; + ELSEIF status_code = 2 THEN + UPDATE dbo.items SET state = 'active' + WHERE id = item_id; + ELSE + UPDATE dbo.items SET state = 'archived' + WHERE id = item_id; + ENDIF; +END; diff --git a/test/corpus/watcom/if-else-block.input.sql b/test/corpus/watcom/if-else-block.input.sql new file mode 100644 index 0000000..cddfa17 --- /dev/null +++ b/test/corpus/watcom/if-else-block.input.sql @@ -0,0 +1,10 @@ +create procedure dbo.update_status(in item_id integer, in status_code integer) +begin +if status_code = 1 then +update dbo.items set state = 'new' where id = item_id; +elseif status_code = 2 then +update dbo.items set state = 'active' where id = item_id; +else +update dbo.items set state = 'archived' where id = item_id; +endif; +end; diff --git a/test/corpus/watcom/query-clauses.expected.sql b/test/corpus/watcom/query-clauses.expected.sql new file mode 100644 index 0000000..44fe3d1 --- /dev/null +++ b/test/corpus/watcom/query-clauses.expected.sql @@ -0,0 +1,17 @@ +CREATE PROCEDURE dbo.find_orders( + IN minimum_total numeric(12, 2) +) +BEGIN + SELECT o.order_id, c.customer_name, SUM( + i.amount + ) AS total_amount + FROM dbo.orders o + LEFT OUTER JOIN dbo.customers c + ON c.customer_id = o.customer_id + LEFT OUTER JOIN dbo.order_items i + ON i.order_id = o.order_id + WHERE o.active = 1 + AND i.amount >= minimum_total + GROUP BY o.order_id, c.customer_name + ORDER BY c.customer_name, o.order_id; +END; diff --git a/test/corpus/watcom/query-clauses.input.sql b/test/corpus/watcom/query-clauses.input.sql new file mode 100644 index 0000000..35dfd9e --- /dev/null +++ b/test/corpus/watcom/query-clauses.input.sql @@ -0,0 +1,4 @@ +create procedure dbo.find_orders(in minimum_total numeric(12,2)) +begin +select o.order_id, c.customer_name, sum(i.amount) as total_amount from dbo.orders o left outer join dbo.customers c on c.customer_id = o.customer_id left outer join dbo.order_items i on i.order_id = o.order_id where o.active = 1 and i.amount >= minimum_total group by o.order_id, c.customer_name order by c.customer_name, o.order_id; +end; diff --git a/test/corpus/watcom/union-all.expected.sql b/test/corpus/watcom/union-all.expected.sql new file mode 100644 index 0000000..c0a6102 --- /dev/null +++ b/test/corpus/watcom/union-all.expected.sql @@ -0,0 +1,11 @@ +CREATE PROCEDURE dbo.lookup_events() +BEGIN + SELECT event_id, 'active' AS source_name + FROM dbo.active_events + WHERE visible = 1 + UNION ALL + SELECT event_id, 'archived' AS source_name + FROM dbo.archived_events + WHERE visible = 1 + ORDER BY event_id; +END; diff --git a/test/corpus/watcom/union-all.input.sql b/test/corpus/watcom/union-all.input.sql new file mode 100644 index 0000000..b82a673 --- /dev/null +++ b/test/corpus/watcom/union-all.input.sql @@ -0,0 +1,4 @@ +create procedure dbo.lookup_events() +begin +select event_id, 'active' as source_name from dbo.active_events where visible = 1 union all select event_id, 'archived' as source_name from dbo.archived_events where visible = 1 order by event_id; +end; From 0ed967fe6f9c7e067a56b9072e3bd90fa66e3619 Mon Sep 17 00:00:00 2001 From: MayerTim Date: Wed, 10 Jun 2026 20:03:15 +0200 Subject: [PATCH 3/5] test(formatter): cover corpus edge cases for protected text --- .../watcom/metadata-header-comments.expected.sql | 10 ++++++++++ test/corpus/watcom/metadata-header-comments.input.sql | 10 ++++++++++ .../protected-strings-and-comments.expected.sql | 11 +++++++++++ .../watcom/protected-strings-and-comments.input.sql | 10 ++++++++++ test/corpus/watcom/quoted-identifiers.expected.sql | 7 +++++++ test/corpus/watcom/quoted-identifiers.input.sql | 6 ++++++ 6 files changed, 54 insertions(+) create mode 100644 test/corpus/watcom/metadata-header-comments.expected.sql create mode 100644 test/corpus/watcom/metadata-header-comments.input.sql create mode 100644 test/corpus/watcom/protected-strings-and-comments.expected.sql create mode 100644 test/corpus/watcom/protected-strings-and-comments.input.sql create mode 100644 test/corpus/watcom/quoted-identifiers.expected.sql create mode 100644 test/corpus/watcom/quoted-identifiers.input.sql diff --git a/test/corpus/watcom/metadata-header-comments.expected.sql b/test/corpus/watcom/metadata-header-comments.expected.sql new file mode 100644 index 0000000..0c6d896 --- /dev/null +++ b/test/corpus/watcom/metadata-header-comments.expected.sql @@ -0,0 +1,10 @@ +-- METADATA +-- Name : dbo.metadata_fixture +-- Version : 1.0.0 +-- Description : Public-safe metadata header fixture. +-- History : +-- 2026-06-10 - tester - Created fixture. +CREATE PROCEDURE dbo.metadata_fixture() +BEGIN + SELECT 1; +END; diff --git a/test/corpus/watcom/metadata-header-comments.input.sql b/test/corpus/watcom/metadata-header-comments.input.sql new file mode 100644 index 0000000..436179f --- /dev/null +++ b/test/corpus/watcom/metadata-header-comments.input.sql @@ -0,0 +1,10 @@ +-- METADATA +-- Name : dbo.metadata_fixture +-- Version : 1.0.0 +-- Description : Public-safe metadata header fixture. +-- History : +-- 2026-06-10 - tester - Created fixture. +create procedure dbo.metadata_fixture() +begin +select 1; +end; diff --git a/test/corpus/watcom/protected-strings-and-comments.expected.sql b/test/corpus/watcom/protected-strings-and-comments.expected.sql new file mode 100644 index 0000000..2d45ba2 --- /dev/null +++ b/test/corpus/watcom/protected-strings-and-comments.expected.sql @@ -0,0 +1,11 @@ +CREATE PROCEDURE dbo.protected_text() +BEGIN + SELECT 'select from where join order by' AS note, "select" AS quoted_name -- select from comment should stay lower-case + FROM dbo.notes + WHERE MESSAGE = 'where from join' + AND status = 'open'; + /* + select from where in a block comment should stay lower-case + */ + SELECT '[select]' AS literal_value; +END; diff --git a/test/corpus/watcom/protected-strings-and-comments.input.sql b/test/corpus/watcom/protected-strings-and-comments.input.sql new file mode 100644 index 0000000..3644031 --- /dev/null +++ b/test/corpus/watcom/protected-strings-and-comments.input.sql @@ -0,0 +1,10 @@ +create procedure dbo.protected_text() +begin +select 'select from where join order by' as note, "select" as quoted_name -- select from comment should stay lower-case +from dbo.notes +where message = 'where from join' and status = 'open'; +/* +select from where in a block comment should stay lower-case +*/ +select '[select]' as literal_value; +end; diff --git a/test/corpus/watcom/quoted-identifiers.expected.sql b/test/corpus/watcom/quoted-identifiers.expected.sql new file mode 100644 index 0000000..21b4776 --- /dev/null +++ b/test/corpus/watcom/quoted-identifiers.expected.sql @@ -0,0 +1,7 @@ +CREATE PROCEDURE dbo.quoted_identifiers() +BEGIN + SELECT "from", "where", [select], [order by] + FROM dbo.keyword_table + WHERE "where" = 'active' + AND [select] IS NOT NULL; +END; diff --git a/test/corpus/watcom/quoted-identifiers.input.sql b/test/corpus/watcom/quoted-identifiers.input.sql new file mode 100644 index 0000000..33ca0a2 --- /dev/null +++ b/test/corpus/watcom/quoted-identifiers.input.sql @@ -0,0 +1,6 @@ +create procedure dbo.quoted_identifiers() +begin +select "from", "where", [select], [order by] +from dbo.keyword_table +where "where" = 'active' and [select] is not null; +end; From ab0af9aa5609a9a219d1006561ac71a29cd9e630 Mon Sep 17 00:00:00 2001 From: MayerTim Date: Wed, 10 Jun 2026 20:03:18 +0200 Subject: [PATCH 4/5] chore(test): include formatter corpus tests in validation --- package.json | 1 + test/runAllTests.js | 1 + 2 files changed, 2 insertions(+) diff --git a/package.json b/package.json index 8dcfcb1..4d9d1ba 100644 --- a/package.json +++ b/package.json @@ -58,6 +58,7 @@ "test": "npm run compile && node ./test/runAllTests.js", "test:metadata": "npm run compile && node ./test/runMetadataHeaderTests.js", "test:formatter": "npm run compile && node ./test/runFormatterTests.js", + "test:corpus": "npm run compile && node ./test/runFormatterCorpusTests.js", "test:project": "npm run compile && node ./test/runProjectValidationTests.js", "test:diagnostics": "npm run compile && node ./test/runDiagnosticsTests.js", "validate": "npm run check && npm test", diff --git a/test/runAllTests.js b/test/runAllTests.js index f34e9f9..5a41de2 100644 --- a/test/runAllTests.js +++ b/test/runAllTests.js @@ -5,6 +5,7 @@ const suites = [ './runMetadataHeaderTests', './runMetadataRegressionTests', './runFormatterTests', + './runFormatterCorpusTests', './runDiagnosticsTests', ]; From efc471abe3f8329ad196f0469352cd1f34d20983 Mon Sep 17 00:00:00 2001 From: MayerTim Date: Wed, 10 Jun 2026 20:03:21 +0200 Subject: [PATCH 5/5] docs(formatter): document corpus tests and formatter guarantees --- docs/DEVELOPMENT.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index ca929aa..89504b7 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -154,6 +154,45 @@ Keep `src/extras/metadataHeader.ts` as the public orchestration entry point. New Formatter regression coverage is grouped by topic under `test/formatter/`. Keep `test/runFormatterTests.js` as the stable entry point for `npm run test:formatter`, and add new formatter regressions to the closest topic suite instead of expanding the runner directly. Use `test/formatter/helpers.js` for shared formatter imports, dialects, default options and fixture loading. +## Formatter corpus tests + +Formatter corpus tests live under `test/corpus/` and use input/expected SQL fixture pairs. They are intended for representative, public-safe SQL examples that should remain stable while the formatter internals evolve. + +Run only the corpus suite with: + +```bash +npm run test:corpus +``` + +The corpus runner discovers `*.input.sql` files, formats each input with the dialect selected by the first corpus directory name, and compares the output with the matching `*.expected.sql` file. It also formats each expected file again to verify idempotency: + +```text +format(input) === expected +format(expected) === expected +``` + +When adding a fixture: + +- use sanitized SQL that can safely live in the public repository +- keep examples focused on one representative SQL shape or safety boundary +- include both the input and expected output files +- preserve current behavior unless a behavior change was explicitly approved +- do not use corpus fixtures as private roadmap notes or product planning documents + +Corpus fixtures document behavior on representative samples. They do not imply complete Watcom or MSSQL grammar coverage, and they should not be treated as a full parser conformance suite. + +## Formatter behavior guarantees + +SQLovely formatter tests aim to protect these practical guarantees: + +- formatting should be deterministic for the same input, dialect and options +- expected corpus output should be idempotent +- strings, comments and quoted identifiers should not be corrupted by keyword-casing or layout rules +- large-file and cancellation safety guards should keep the extension responsive +- dialect-specific behavior should be covered by dialect-named tests or corpus directories + +The formatter is intentionally conservative and does not claim full SQL grammar parsing. If a future change requires a behavior update, update the smallest relevant topic test or corpus fixture and make the behavior change explicit in the commit. + ## Formatter pipeline internals Watcom structural rewrites are coordinated through `src/formatter/formattingPipeline.ts`. Shared formatter inputs such as the active dialect, resolved options, indentation string, cancellation checks and safety decision live in `src/formatter/formattingContext.ts`. Indentation is isolated in `src/formatter/indentation/indentationEngine.ts`, which applies keyword casing, block depth, query continuation depth, parenthesis continuation depth and CASE/exception branch depth after structural expansion. Keep the pipeline order explicit and behavior-preserving: compact/control-flow expansion, query/cursor/exception/expression normalization, block-ending normalization and parenthesis splitting should run before the indentation engine and final cleanup passes. When adding a formatter rule, prefer a small stateful pipeline pass that consumes the shared formatting context instead of expanding formatter orchestration logic in `formatSql.ts`.