From 98325711d1dc032e2c9d20a2b567f73f0e87fc4c Mon Sep 17 00:00:00 2001 From: MayerTim Date: Thu, 11 Jun 2026 00:05:53 +0200 Subject: [PATCH] refactor(formatter): centralize word scanning helpers --- .../passes/structural/blockEndFormatting.ts | 41 +---- .../structural/caseExpressionFormatting.ts | 43 +---- .../passes/structural/cursorForFormatting.ts | 61 +------ .../passes/structural/exceptionFormatting.ts | 44 +---- .../structural/ifExpressionFormatting.ts | 64 ++----- .../passes/structural/inlineIfFormatting.ts | 73 +++----- .../structural/parenthesisFormatting.ts | 26 +-- .../structural/queryClauseFormatting.ts | 70 +------- .../passes/structural/unionAllFormatting.ts | 53 +----- src/formatter/sqlLineScanner.ts | 161 ++++++++++++++++++ 10 files changed, 227 insertions(+), 409 deletions(-) diff --git a/src/formatter/passes/structural/blockEndFormatting.ts b/src/formatter/passes/structural/blockEndFormatting.ts index 1d02ec7..4d63794 100644 --- a/src/formatter/passes/structural/blockEndFormatting.ts +++ b/src/formatter/passes/structural/blockEndFormatting.ts @@ -1,5 +1,6 @@ import type { SqlDialect } from '../../../dialects'; import { + collectSqlWordsFromSegments, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, type SqlOutsideSegment, @@ -21,14 +22,6 @@ interface EndPhraseToken { readonly hasSemicolon: boolean; } -interface WordMatch { - readonly start: number; - readonly end: number; - readonly normalized: string; -} - -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; const BLOCK_END_FOLLOWERS = new Set(['if', 'for', 'loop', 'while', 'try', 'catch']); export function createInitialBlockEndFormattingState(): BlockEndFormattingState { @@ -89,7 +82,7 @@ function collectBlockEndPhrases( line: string, outsideSegments: readonly SqlOutsideSegment[], ): EndPhraseToken[] { - const words = collectWords(line, outsideSegments); + const words = collectSqlWordsFromSegments(line, outsideSegments); const phrases: EndPhraseToken[] = []; let index = 0; @@ -171,33 +164,3 @@ function consumeOptionalSemicolon( return { end: start, hasSemicolon: false }; } - -function collectWords(line: string, outsideSegments: readonly SqlOutsideSegment[]): WordMatch[] { - const words: WordMatch[] = []; - - for (const segment of outsideSegments) { - let index = segment.start; - - while (index < segment.end) { - if (!SQL_WORD_START.test(line[index])) { - index += 1; - continue; - } - - const start = index; - index += 1; - - while (index < segment.end && SQL_WORD_PART.test(line[index])) { - index += 1; - } - - words.push({ - start, - end: index, - normalized: line.slice(start, index).toLowerCase(), - }); - } - } - - return words; -} diff --git a/src/formatter/passes/structural/caseExpressionFormatting.ts b/src/formatter/passes/structural/caseExpressionFormatting.ts index 9ad531f..41b9931 100644 --- a/src/formatter/passes/structural/caseExpressionFormatting.ts +++ b/src/formatter/passes/structural/caseExpressionFormatting.ts @@ -1,8 +1,9 @@ import type { SqlDialect } from '../../../dialects'; import { + collectSqlWordsFromSegments, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, - type SqlOutsideSegment, + type SqlWordMatch, } from '../../sqlLineScanner'; export interface CaseExpressionFormattingState { @@ -15,14 +16,8 @@ interface ExpandedLineResult { readonly nextState: CaseExpressionFormattingState; } -interface WordMatch { - readonly start: number; - readonly end: number; - readonly normalized: string; -} +type WordMatch = SqlWordMatch; -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; const CASE_SPLIT_WORDS = new Set(['when', 'then', 'else']); const BLOCK_END_FOLLOWERS = new Set(['for', 'if', 'loop', 'try', 'catch', 'while']); @@ -46,7 +41,7 @@ export function expandWatcomCaseExpressionLine( initialState: CaseExpressionFormattingState, ): ExpandedLineResult { const scanResult = scanSqlLineOutsideLiteralsAndComments(line, initialState.scanState); - const words = collectWords(line, scanResult.outsideSegments); + const words = collectSqlWordsFromSegments(line, scanResult.outsideSegments); const nextState: CaseExpressionFormattingState = { scanState: scanResult.nextState, caseDepth: calculateNextCaseDepth(words, initialState.caseDepth), @@ -182,33 +177,3 @@ function pushTrimmed(lines: string[], text: string): void { lines.push(trimmed); } } - -function collectWords(line: string, outsideSegments: readonly SqlOutsideSegment[]): WordMatch[] { - const words: WordMatch[] = []; - - for (const segment of outsideSegments) { - let index = segment.start; - - while (index < segment.end) { - if (!SQL_WORD_START.test(line[index])) { - index += 1; - continue; - } - - const start = index; - index += 1; - - while (index < segment.end && SQL_WORD_PART.test(line[index])) { - index += 1; - } - - words.push({ - start, - end: index, - normalized: line.slice(start, index).toLowerCase(), - }); - } - } - - return words; -} diff --git a/src/formatter/passes/structural/cursorForFormatting.ts b/src/formatter/passes/structural/cursorForFormatting.ts index 443f3f2..f485ceb 100644 --- a/src/formatter/passes/structural/cursorForFormatting.ts +++ b/src/formatter/passes/structural/cursorForFormatting.ts @@ -1,9 +1,10 @@ import type { SqlDialect } from '../../../dialects'; import { cloneSqlLineScanState, + collectSqlWordsWithParenthesisDepth, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, - type SqlOutsideSegment, + type SqlWordDepthMatch, } from '../../sqlLineScanner'; export interface CursorForFormattingState { @@ -16,15 +17,7 @@ interface ExpandedLineResult { readonly nextState: CursorForFormattingState; } -interface WordMatch { - readonly start: number; - readonly end: number; - readonly normalized: string; - readonly depth: number; -} - -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; +type WordMatch = SqlWordDepthMatch; export function createInitialCursorForFormattingState(): CursorForFormattingState { return { @@ -56,7 +49,7 @@ export function expandWatcomCursorForLine( return { lines: [line], nextState: baseNextState }; } - const words = collectWords(line, scanResult.outsideSegments); + const words = collectSqlWordsWithParenthesisDepth(line, scanResult.outsideSegments); const { splitPoints, inCursorQuery } = findCursorForSplitPoints( words, initialState.inCursorQuery, @@ -205,52 +198,6 @@ function splitLineAtIndexes(line: string, indexes: readonly number[]): string[] return lines.length > 0 ? lines : [line]; } -function collectWords(line: string, outsideSegments: readonly SqlOutsideSegment[]): WordMatch[] { - const words: WordMatch[] = []; - let depth = 0; - - for (const segment of outsideSegments) { - let index = segment.start; - - while (index < segment.end) { - const char = line[index]; - - if (char === '(') { - depth += 1; - index += 1; - continue; - } - - if (char === ')') { - depth = Math.max(0, depth - 1); - index += 1; - continue; - } - - if (!SQL_WORD_START.test(char)) { - index += 1; - continue; - } - - const start = index; - index += 1; - - while (index < segment.end && SQL_WORD_PART.test(line[index])) { - index += 1; - } - - words.push({ - start, - end: index, - normalized: line.slice(start, index).toLowerCase(), - depth, - }); - } - } - - return words; -} - function pushTrimmed(lines: string[], value: string): void { const trimmed = value.trim(); diff --git a/src/formatter/passes/structural/exceptionFormatting.ts b/src/formatter/passes/structural/exceptionFormatting.ts index 701f008..3af7ffa 100644 --- a/src/formatter/passes/structural/exceptionFormatting.ts +++ b/src/formatter/passes/structural/exceptionFormatting.ts @@ -1,8 +1,9 @@ import type { SqlDialect } from '../../../dialects'; import { + collectSqlWordsFromSegments, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, - type SqlOutsideSegment, + type SqlWordMatch, } from '../../sqlLineScanner'; export interface ExceptionFormattingState { @@ -15,14 +16,7 @@ interface ExpandedLineResult { readonly nextState: ExceptionFormattingState; } -interface WordMatch { - readonly start: number; - readonly end: number; - readonly normalized: string; -} - -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; +type WordMatch = SqlWordMatch; export function createInitialExceptionFormattingState(): ExceptionFormattingState { return { @@ -45,7 +39,7 @@ export function expandWatcomExceptionLine( initialState: ExceptionFormattingState, ): ExpandedLineResult { const scanResult = scanSqlLineOutsideLiteralsAndComments(line, initialState.scanState); - const words = collectWords(line, scanResult.outsideSegments); + const words = collectSqlWordsFromSegments(line, scanResult.outsideSegments); const nextState: ExceptionFormattingState = { scanState: scanResult.nextState, inExceptionSection: calculateNextExceptionSectionState(words, initialState.inExceptionSection), @@ -187,33 +181,3 @@ function pushTrimmed(lines: string[], text: string): void { lines.push(trimmed); } } - -function collectWords(line: string, outsideSegments: readonly SqlOutsideSegment[]): WordMatch[] { - const words: WordMatch[] = []; - - for (const segment of outsideSegments) { - let index = segment.start; - - while (index < segment.end) { - if (!SQL_WORD_START.test(line[index])) { - index += 1; - continue; - } - - const start = index; - index += 1; - - while (index < segment.end && SQL_WORD_PART.test(line[index])) { - index += 1; - } - - words.push({ - start, - end: index, - normalized: line.slice(start, index).toLowerCase(), - }); - } - } - - return words; -} diff --git a/src/formatter/passes/structural/ifExpressionFormatting.ts b/src/formatter/passes/structural/ifExpressionFormatting.ts index 43d2a12..5327533 100644 --- a/src/formatter/passes/structural/ifExpressionFormatting.ts +++ b/src/formatter/passes/structural/ifExpressionFormatting.ts @@ -1,9 +1,12 @@ import type { SqlDialect } from '../../../dialects'; import { cloneSqlLineScanState, + collectSqlWordsFromSegments, + findNextSqlWord, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, type SqlOutsideSegment, + type SqlWordMatch, } from '../../sqlLineScanner'; export interface IfExpressionFormattingState { @@ -24,11 +27,7 @@ interface ExpandedLineResult { readonly nextState: IfExpressionFormattingState; } -interface WordMatch { - readonly start: number; - readonly end: number; - readonly normalized: string; -} +type WordMatch = SqlWordMatch; interface EndIfMatch { readonly start: number; @@ -36,8 +35,6 @@ interface EndIfMatch { readonly suffix: string; } -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; const PROCEDURAL_BRANCH_STARTERS = new Set([ 'alter', 'begin', @@ -389,7 +386,7 @@ function findKeyword( let index = Math.max(segment.start, startIndex); while (index < segment.end) { - const word = readNextWord(line, index, segment.end); + const word = findNextSqlWord(line, index, segment.end); if (!word) { break; @@ -415,7 +412,7 @@ function findEndIf( let index = Math.max(segment.start, startIndex); while (index < segment.end) { - const firstWord = readNextWord(line, index, segment.end); + const firstWord = findNextSqlWord(line, index, segment.end); if (!firstWord) { break; @@ -430,7 +427,7 @@ function findEndIf( } if (firstWord.normalized === 'end') { - const secondWord = readNextWord(line, firstWord.end, segment.end); + const secondWord = findNextSqlWord(line, firstWord.end, segment.end); if (secondWord?.normalized === 'if') { return { @@ -448,48 +445,11 @@ function findEndIf( return undefined; } -function readNextWord(line: string, startIndex: number, endIndex: number): WordMatch | undefined { - for (let index = startIndex; index < endIndex; index += 1) { - if (!SQL_WORD_START.test(line[index])) { - continue; - } - - const start = index; - index += 1; - - while (index < endIndex && SQL_WORD_PART.test(line[index])) { - index += 1; - } - - return { - start, - end: index, - normalized: line.slice(start, index).toLowerCase(), - }; - } - - return undefined; -} - -function collectWords(line: string, outsideSegments: readonly SqlOutsideSegment[]): WordMatch[] { - const words: WordMatch[] = []; - - for (const segment of outsideSegments) { - let index = segment.start; - - while (index < segment.end) { - const word = readNextWord(line, index, segment.end); - - if (!word) { - break; - } - - words.push(word); - index = word.end; - } - } - - return words; +function collectWords( + line: string, + outsideSegments: readonly SqlOutsideSegment[], +): readonly WordMatch[] { + return collectSqlWordsFromSegments(line, outsideSegments); } function isProceduralBranchText(text: string): boolean { diff --git a/src/formatter/passes/structural/inlineIfFormatting.ts b/src/formatter/passes/structural/inlineIfFormatting.ts index 3abe964..af934ec 100644 --- a/src/formatter/passes/structural/inlineIfFormatting.ts +++ b/src/formatter/passes/structural/inlineIfFormatting.ts @@ -2,9 +2,13 @@ import type { SqlDialect } from '../../../dialects'; import { cloneSqlLineScanState, createSqlOutsideLookup, + findNextSqlWord, + isSqlWordStart, + readSqlWordAt, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, type SqlOutsideSegment, + type SqlWordMatch, } from '../../sqlLineScanner'; interface ExpandedLineResult { @@ -12,14 +16,7 @@ interface ExpandedLineResult { readonly nextState: SqlLineScanState; } -interface KeywordMatch { - readonly start: number; - readonly end: number; - readonly text: string; -} - -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; +type KeywordMatch = SqlWordMatch; /** * Normalizes compact Watcom IF statements before the line-based formatter applies indentation. @@ -67,7 +64,7 @@ function startsWithIfOutsideComments( } const word = readWordAt(line, firstContentIndex); - return word?.text.toLowerCase() === 'if'; + return word?.normalized === 'if'; }); } @@ -83,7 +80,7 @@ function tryExpandInlineIf( const ifMatch = readWordAt(line, firstContentIndex); - if (!ifMatch || ifMatch.text.toLowerCase() !== 'if') { + if (!ifMatch || ifMatch.normalized !== 'if') { return undefined; } @@ -196,7 +193,7 @@ function splitConditionOnLogicalOperators(condition: string): string[] { continue; } - if (!SQL_WORD_START.test(current)) { + if (!isSqlWordStart(current)) { index += 1; continue; } @@ -208,7 +205,7 @@ function splitConditionOnLogicalOperators(condition: string): string[] { continue; } - const normalized = word.text.toLowerCase(); + const normalized = word.normalized; if (parenDepth === 0 && (normalized === 'and' || normalized === 'or')) { if (normalized === 'and' && betweenPending) { @@ -249,19 +246,13 @@ function findKeyword( let index = Math.max(segment.start, startIndex); while (index < segment.end) { - const nextIndex = findWordStart(line, index, segment.end); - - if (nextIndex < 0) { - break; - } - - const word = readWordAt(line, nextIndex); + const word = findNextSqlWord(line, index, segment.end); - if (!word || word.end > segment.end) { + if (!word) { break; } - if (word.text.toLowerCase() === keyword) { + if (word.normalized === keyword) { return word; } @@ -281,34 +272,28 @@ function findEndIf( let index = Math.max(segment.start, startIndex); while (index < segment.end) { - const nextIndex = findWordStart(line, index, segment.end); + const firstWord = findNextSqlWord(line, index, segment.end); - if (nextIndex < 0) { + if (!firstWord) { break; } - const firstWord = readWordAt(line, nextIndex); - - if (!firstWord || firstWord.end > segment.end) { - break; - } - - const normalized = firstWord.text.toLowerCase(); + const normalized = firstWord.normalized; if (normalized === 'endif') { return firstWord; } if (normalized === 'end') { - const secondWordStart = findWordStart(line, firstWord.end, segment.end); - const secondWord = secondWordStart >= 0 ? readWordAt(line, secondWordStart) : undefined; + const secondWord = findNextSqlWord(line, firstWord.end, segment.end); - if (secondWord && secondWord.end <= segment.end && secondWord.text.toLowerCase() === 'if') { + if (secondWord?.normalized === 'if') { const trailingSemicolonEnd = consumeOptionalSemicolon(line, secondWord.end, segment.end); return { start: firstWord.start, end: trailingSemicolonEnd, text: line.slice(firstWord.start, trailingSemicolonEnd), + normalized: line.slice(firstWord.start, trailingSemicolonEnd).toLowerCase(), }; } } @@ -334,26 +319,6 @@ function consumeOptionalSemicolon(line: string, start: number, segmentEnd: numbe return line[index] === ';' ? index + 1 : start; } -function findWordStart(line: string, start: number, end: number): number { - for (let index = start; index < end; index += 1) { - if (SQL_WORD_START.test(line[index])) { - return index; - } - } - - return -1; -} - function readWordAt(line: string, start: number): KeywordMatch | undefined { - if (!SQL_WORD_START.test(line[start] ?? '')) { - return undefined; - } - - let end = start + 1; - - while (end < line.length && SQL_WORD_PART.test(line[end])) { - end += 1; - } - - return { start, end, text: line.slice(start, end) }; + return readSqlWordAt(line, start); } diff --git a/src/formatter/passes/structural/parenthesisFormatting.ts b/src/formatter/passes/structural/parenthesisFormatting.ts index c5e286d..3dee221 100644 --- a/src/formatter/passes/structural/parenthesisFormatting.ts +++ b/src/formatter/passes/structural/parenthesisFormatting.ts @@ -1,6 +1,7 @@ import { cloneSqlLineScanState, createSqlOutsideLookup, + readSqlWordBefore, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, } from '../../sqlLineScanner'; @@ -21,7 +22,6 @@ export interface ParenthesisIndentAnalysis { readonly nextScanState: SqlLineScanState; } -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; const TYPE_LENGTH_WORDS = new Set([ 'binary', 'bit', @@ -340,28 +340,8 @@ function isTypeLengthParenthesis(line: string, openIndex: number, inner: string) return false; } - const previousWord = readWordBefore(line, openIndex); - return previousWord !== undefined && TYPE_LENGTH_WORDS.has(previousWord.toLowerCase()); -} - -function readWordBefore(line: string, index: number): string | undefined { - let cursor = index - 1; - - while (cursor >= 0 && /\s/u.test(line[cursor])) { - cursor -= 1; - } - - const end = cursor + 1; - - while (cursor >= 0 && SQL_WORD_PART.test(line[cursor])) { - cursor -= 1; - } - - if (end === cursor + 1) { - return undefined; - } - - return line.slice(cursor + 1, end); + const previousWord = readSqlWordBefore(line, openIndex); + return previousWord !== undefined && TYPE_LENGTH_WORDS.has(previousWord.normalized); } function skipWhitespace(line: string, start: number): number { diff --git a/src/formatter/passes/structural/queryClauseFormatting.ts b/src/formatter/passes/structural/queryClauseFormatting.ts index ffd925c..968c00a 100644 --- a/src/formatter/passes/structural/queryClauseFormatting.ts +++ b/src/formatter/passes/structural/queryClauseFormatting.ts @@ -2,9 +2,10 @@ import type { SqlDialect } from '../../../dialects'; import { cloneSqlLineScanState, createSqlOutsideLookup, + collectSqlWordsWithParenthesisDepth, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, - type SqlOutsideSegment, + type SqlWordDepthMatch, } from '../../sqlLineScanner'; export interface QueryClauseFormattingState { @@ -17,15 +18,8 @@ interface ExpandedLineResult { readonly nextState: QueryClauseFormattingState; } -interface WordMatch { - readonly start: number; - readonly end: number; - readonly normalized: string; - readonly depth: number; -} +type WordMatch = SqlWordDepthMatch; -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; const JOIN_PREFIXES = new Set(['cross', 'full', 'inner', 'left', 'right']); const LOGICAL_CLAUSE_STARTERS = new Set(['where', 'on', 'having', 'and', 'or']); @@ -59,7 +53,11 @@ export function expandWatcomQueryClauseLine( return { lines: [line], nextState }; } - const words = collectWords(line, scanResult.outsideSegments, initialState.parenthesisDepth); + const words = collectSqlWordsWithParenthesisDepth( + line, + scanResult.outsideSegments, + initialState.parenthesisDepth, + ); if (words.length === 0) { return { lines: [line], nextState }; @@ -144,7 +142,7 @@ function splitLogicalContinuations(line: string): string[] { line, cloneSqlLineScanState({ inBlockComment: false }), ); - const words = collectWords(line, scanResult.outsideSegments, 0); + const words = collectSqlWordsWithParenthesisDepth(line, scanResult.outsideSegments); const firstWord = words[0]; if (!firstWord || firstWord.depth !== 0 || !LOGICAL_CLAUSE_STARTERS.has(firstWord.normalized)) { @@ -197,56 +195,6 @@ function splitLineAtIndexes(line: string, indexes: readonly number[]): string[] return lines.length > 0 ? lines : [line]; } -function collectWords( - line: string, - outsideSegments: readonly SqlOutsideSegment[], - initialDepth: number, -): WordMatch[] { - const words: WordMatch[] = []; - let depth = initialDepth; - - for (const segment of outsideSegments) { - let index = segment.start; - - while (index < segment.end) { - const char = line[index]; - - if (char === '(') { - depth += 1; - index += 1; - continue; - } - - if (char === ')') { - depth = Math.max(0, depth - 1); - index += 1; - continue; - } - - if (!SQL_WORD_START.test(char)) { - index += 1; - continue; - } - - const start = index; - index += 1; - - while (index < segment.end && SQL_WORD_PART.test(line[index])) { - index += 1; - } - - words.push({ - start, - end: index, - normalized: line.slice(start, index).toLowerCase(), - depth, - }); - } - } - - return words; -} - function updateParenthesisDepth( line: string, outside: readonly boolean[], diff --git a/src/formatter/passes/structural/unionAllFormatting.ts b/src/formatter/passes/structural/unionAllFormatting.ts index 29b0caf..ed886aa 100644 --- a/src/formatter/passes/structural/unionAllFormatting.ts +++ b/src/formatter/passes/structural/unionAllFormatting.ts @@ -1,8 +1,10 @@ import { cloneSqlLineScanState, + findNextSqlWord, scanSqlLineOutsideLiteralsAndComments, type SqlLineScanState, type SqlOutsideSegment, + type SqlWordMatch, } from '../../sqlLineScanner'; interface ExpandedLineResult { @@ -10,14 +12,7 @@ interface ExpandedLineResult { readonly nextState: SqlLineScanState; } -interface KeywordMatch { - readonly start: number; - readonly end: number; - readonly text: string; -} - -const SQL_WORD_START = /[A-Za-z_]/u; -const SQL_WORD_PART = /[A-Za-z0-9_$#]/u; +type KeywordMatch = SqlWordMatch; /** * Keeps UNION ALL as its own physical SQL line. @@ -60,31 +55,25 @@ function findUnionAllMatches( let index = segment.start; while (index < segment.end) { - const unionStart = findWordStart(line, index, segment.end); + const unionWord = findNextSqlWord(line, index, segment.end); - if (unionStart < 0) { + if (!unionWord) { break; } - const unionWord = readWordAt(line, unionStart); - - if (!unionWord || unionWord.end > segment.end) { - break; - } - - if (unionWord.text.toLowerCase() !== 'union') { + if (unionWord.normalized !== 'union') { index = unionWord.end; continue; } - const allStart = findWordStart(line, unionWord.end, segment.end); - const allWord = allStart >= 0 ? readWordAt(line, allStart) : undefined; + const allWord = findNextSqlWord(line, unionWord.end, segment.end); - if (allWord && allWord.end <= segment.end && allWord.text.toLowerCase() === 'all') { + if (allWord?.normalized === 'all') { matches.push({ start: unionWord.start, end: allWord.end, text: line.slice(unionWord.start, allWord.end), + normalized: line.slice(unionWord.start, allWord.end).toLowerCase(), }); index = allWord.end; continue; @@ -119,27 +108,3 @@ function pushTrimmed(lines: string[], value: string): void { lines.push(trimmed); } } - -function findWordStart(line: string, start: number, end: number): number { - for (let index = start; index < end; index += 1) { - if (SQL_WORD_START.test(line[index])) { - return index; - } - } - - return -1; -} - -function readWordAt(line: string, start: number): KeywordMatch | undefined { - if (!SQL_WORD_START.test(line[start] ?? '')) { - return undefined; - } - - let end = start + 1; - - while (end < line.length && SQL_WORD_PART.test(line[end])) { - end += 1; - } - - return { start, end, text: line.slice(start, end) }; -} diff --git a/src/formatter/sqlLineScanner.ts b/src/formatter/sqlLineScanner.ts index 8b6bd69..e9edf09 100644 --- a/src/formatter/sqlLineScanner.ts +++ b/src/formatter/sqlLineScanner.ts @@ -12,6 +12,167 @@ export interface SqlLineScanResult { readonly nextState: SqlLineScanState; } +export interface SqlWordMatch { + readonly start: number; + readonly end: number; + readonly text: string; + readonly normalized: string; +} + +export interface SqlWordDepthMatch extends SqlWordMatch { + readonly depth: number; +} + +const SQL_WORD_START_PATTERN = /[A-Za-z_]/u; +const SQL_WORD_PART_PATTERN = /[A-Za-z0-9_$#]/u; + +export function isSqlWordStart(character: string | undefined): boolean { + return character !== undefined && SQL_WORD_START_PATTERN.test(character); +} + +export function isSqlWordPart(character: string | undefined): boolean { + return character !== undefined && SQL_WORD_PART_PATTERN.test(character); +} + +export function readSqlWordAt( + line: string, + start: number, + endIndex = line.length, +): SqlWordMatch | undefined { + if (!isSqlWordStart(line[start])) { + return undefined; + } + + let end = start + 1; + + while (end < line.length && isSqlWordPart(line[end])) { + end += 1; + } + + if (end > endIndex) { + return undefined; + } + + const text = line.slice(start, end); + + return { + start, + end, + text, + normalized: text.toLowerCase(), + }; +} + +export function readSqlWordBefore(line: string, index: number): SqlWordMatch | undefined { + let cursor = index - 1; + + while (cursor >= 0 && /\s/u.test(line[cursor])) { + cursor -= 1; + } + + const end = cursor + 1; + + while (cursor >= 0 && isSqlWordPart(line[cursor])) { + cursor -= 1; + } + + const start = cursor + 1; + + if (start === end || !isSqlWordStart(line[start])) { + return undefined; + } + + const text = line.slice(start, end); + + return { + start, + end, + text, + normalized: text.toLowerCase(), + }; +} + +export function findNextSqlWord( + line: string, + startIndex: number, + endIndex: number, +): SqlWordMatch | undefined { + for (let index = startIndex; index < endIndex; index += 1) { + const word = readSqlWordAt(line, index, endIndex); + + if (word) { + return word; + } + } + + return undefined; +} + +export function collectSqlWordsFromSegments( + line: string, + outsideSegments: readonly SqlOutsideSegment[], +): readonly SqlWordMatch[] { + const words: SqlWordMatch[] = []; + + for (const segment of outsideSegments) { + let index = segment.start; + + while (index < segment.end) { + const word = findNextSqlWord(line, index, segment.end); + + if (!word) { + break; + } + + words.push(word); + index = word.end; + } + } + + return words; +} + +export function collectSqlWordsWithParenthesisDepth( + line: string, + outsideSegments: readonly SqlOutsideSegment[], + initialDepth = 0, +): readonly SqlWordDepthMatch[] { + const words: SqlWordDepthMatch[] = []; + let depth = initialDepth; + + for (const segment of outsideSegments) { + let index = segment.start; + + while (index < segment.end) { + const char = line[index]; + + if (char === '(') { + depth += 1; + index += 1; + continue; + } + + if (char === ')') { + depth = Math.max(0, depth - 1); + index += 1; + continue; + } + + const word = readSqlWordAt(line, index, segment.end); + + if (!word) { + index += 1; + continue; + } + + words.push({ ...word, depth }); + index = word.end; + } + } + + return words; +} + export function createInitialSqlLineScanState(): SqlLineScanState { return { inBlockComment: false }; }