diff --git a/docs/ai/design/2026-06-28-feature-telegram-markdown-chunking.md b/docs/ai/design/2026-06-28-feature-telegram-markdown-chunking.md new file mode 100644 index 00000000..497cba3e --- /dev/null +++ b/docs/ai/design/2026-06-28-feature-telegram-markdown-chunking.md @@ -0,0 +1,80 @@ +--- +phase: design +title: Telegram Markdown-first Chunking Design +description: Use marked tokens to split source Markdown before rendering Telegram HTML +--- + +# Telegram Markdown-first Chunking Design + +## Architecture Overview + +```mermaid +graph TD + A[TelegramAdapter.sendMessage markdown text] --> B[chunkMarkdownForTelegram] + B --> C[marked lexer top-level tokens] + C --> D[Group tokens by rendered HTML length] + D --> E[Split oversized token] + E --> F[Render each markdown chunk with markdownToTelegramHtml] + F --> G[sendMessage parse_mode HTML] + G --> H[parse-entities fallback to plain text] +``` + +The adapter moves chunking ahead of rendering. A new chunking helper uses `marked` lexer tokens to keep source Markdown boundaries, renders each candidate with `markdownToTelegramHtml`, and only emits chunks whose rendered HTML fits the Telegram limit. `TelegramAdapter` then sends those already-valid HTML chunks using the existing parse mode and fallback path. + +## Data Models + +- Markdown input: raw string passed to `TelegramAdapter.sendMessage`. +- Marked token: top-level `Tokens.Generic` from `marked.lexer(markdown)`. +- Markdown chunk: source Markdown string that can be rendered independently. +- Rendered chunk: Telegram-compatible HTML string produced by `markdownToTelegramHtml(markdownChunk)`. + +## API Design + +Internal helper: + +- `chunkMarkdownForTelegram(markdown: string, maxLen: number): string[]` + - Returns rendered Telegram HTML chunks. + - Throws only if the renderer/lexer fails before fallback can handle it. + - Does not expose new public package APIs. + +Adapter flow: + +1. Try Markdown-first chunking. +2. Send each rendered chunk with `{ parse_mode: 'HTML' }`. +3. If Telegram rejects a chunk with `can't parse entities`, send plain text derived from that rendered chunk. +4. If Markdown lexing/rendering throws before chunks are produced, preserve the existing source/plain text fallback in max-length chunks. + +## Component Breakdown + +- `packages/channel-connector/src/adapters/TelegramAdapter.ts` + - Replace rendered-HTML chunking with Markdown-first chunking. + - Keep `htmlToPlainText`, parse-entities detection, and plain text fallback. +- `packages/channel-connector/src/utils/telegramHtml.ts` + - Keep existing renderer unchanged. + - Export or reuse `marked` lexer only if it helps avoid duplicate configuration. +- `packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts` + - Add behavior tests for long code fences, nested list code, paragraphs, Unicode/emoji, and unchanged normal markdown. + +## Design Decisions + +- Chosen: use `marked` lexer/token raw source and render candidate Markdown chunks for validation. + - Trade-off: simple and aligned with current dependency, but requires recursive split heuristics for oversized tokens. +- Alternative: chunk rendered HTML with an HTML parser. + - Rejected because the user asked to chunk Markdown/source before rendering and because Telegram HTML validity depends on rendering each chunk independently. +- Alternative: convert to Telegram MessageEntity. + - Rejected by explicit requirement. + +## Splitting Strategy + +- Top-level token grouping: append token raw source to the current candidate when its rendered HTML fits. +- Oversized code token: split by lines, wrapping every chunk in a fenced block using the original language. +- Oversized list token: split by list items, reusing item raw source where available; if an item remains oversized, split that item recursively. +- Oversized paragraph/text token: split raw paragraph content by newline, then sentence punctuation, then word, while validating rendered length. +- Fallback: hard split source/plain text if rendering a chunk still cannot fit, then send without parse mode only for that fallback path. + +## Non-Functional Requirements + +- Reliability: each parse-mode send is independently rendered HTML. +- Performance: rendering candidates is acceptable because Telegram sends are already network-bound and messages are small relative to process memory. +- Security: continue escaping HTML through the existing renderer; do not pass raw HTML through. +- Maintainability: keep chunking local to the Telegram adapter and use `marked` tokens rather than ad hoc Markdown parsing. diff --git a/docs/ai/implementation/2026-06-28-feature-telegram-markdown-chunking.md b/docs/ai/implementation/2026-06-28-feature-telegram-markdown-chunking.md new file mode 100644 index 00000000..01716f26 --- /dev/null +++ b/docs/ai/implementation/2026-06-28-feature-telegram-markdown-chunking.md @@ -0,0 +1,78 @@ +--- +phase: implementation +title: Telegram Markdown-first Chunking Implementation +description: Implementation notes for marked-token chunking in TelegramAdapter +--- + +# Telegram Markdown-first Chunking Implementation + +## Development Setup + +- Active worktree: `/home/ubuntu/code/ai-devkit/.worktrees/feature-telegram-markdown-chunking` +- Branch: `feature-telegram-markdown-chunking` +- Bootstrap: `npm ci` +- Package: `@ai-devkit/channel-connector` + +## Code Structure + +- `packages/channel-connector/src/adapters/TelegramAdapter.ts`: Telegram send flow and chunking helpers. +- `packages/channel-connector/src/utils/telegramHtml.ts`: existing Markdown-to-Telegram-HTML renderer, retained as-is for rendering. +- `packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts`: mocked adapter behavior tests. + +## Implementation Notes + +### Core Features + +- Implemented `chunkMarkdownForTelegram` in `TelegramAdapter.ts`. +- Uses `Marked.lexer` to obtain top-level Markdown tokens. +- Groups tokens by rendering candidate source Markdown through `markdownToTelegramHtml`. +- Splits oversized code tokens by lines while wrapping every emitted part in the original fenced code marker and language. +- Splits oversized lists by list item where possible, then falls back to recursive text/code splitting for oversized items. +- Splits oversized paragraphs/text by newline, sentence, word, then code point fallback. +- Sends rendered chunks with Telegram HTML parse mode only after they fit. + +### Patterns & Best Practices + +- Keep renderer behavior unchanged. +- Keep fallback behavior local to `TelegramAdapter.sendMessage`. +- Prefer source Markdown chunk boundaries over rendered HTML manipulation. + +## Integration Points + +- No public API changes. +- No Telegram Bot API contract changes. +- Telegraf remains mocked in tests. + +## Error Handling + +- If Markdown chunk generation fails, fall back to source/plain text chunks. +- If Telegram rejects parse-mode HTML with `can't parse entities`, send plain text derived from that rendered chunk. +- Non-parse Telegram send errors continue to propagate. + +## Performance Considerations + +- Candidate rendering is repeated during grouping and splitting; this is bounded by Telegram message size and channel send frequency. +- Avoid large dependency changes or a custom parser. + +## Security Notes + +- The existing renderer continues to escape user content. +- Raw Markdown HTML remains dropped by the renderer. +- No secrets or new config are introduced. + +## Validation Results + +- `npx ai-devkit@latest lint --feature telegram-markdown-chunking`: exited 0. +- `npm --workspace @ai-devkit/channel-connector test -- src/__tests__/adapters/TelegramAdapter.test.ts`: exited 0, 26 tests passed. +- `npm --workspace @ai-devkit/channel-connector test`: exited 0, 62 tests passed. +- `npm --workspace @ai-devkit/channel-connector run typecheck`: exited 0. +- `npm --workspace @ai-devkit/channel-connector run lint`: exited 0. +- Final rerun of `npx ai-devkit@latest lint --feature telegram-markdown-chunking`: exited 0. +- Commit hook rerun after direct workspace package builds: repo lint exited 0 with existing warnings; repo tests exited 0 with 70 files and 821 tests passed. +- Post-fetch targeted validation: `npm --workspace @ai-devkit/channel-connector test -- src/__tests__/adapters/TelegramAdapter.test.ts` exited 0, 26 tests passed. + +## Deviations and Follow-ups + +- No design deviations. +- Plain/source fallback remains available if a rendered chunk still cannot fit after semantic splitting. +- PR opened: https://github.com/codeaholicguy/ai-devkit/pull/125. diff --git a/docs/ai/planning/2026-06-28-feature-telegram-markdown-chunking.md b/docs/ai/planning/2026-06-28-feature-telegram-markdown-chunking.md new file mode 100644 index 00000000..295b178a --- /dev/null +++ b/docs/ai/planning/2026-06-28-feature-telegram-markdown-chunking.md @@ -0,0 +1,80 @@ +--- +phase: planning +title: Telegram Markdown-first Chunking Plan +description: Implementation tasks for semantic Markdown chunking before Telegram HTML rendering +--- + +# Telegram Markdown-first Chunking Plan + +## Milestones + +- [x] Milestone 1: Requirements, design, and tests describe Markdown-first chunking. +- [x] Milestone 2: Adapter chunks Markdown source with marked tokens and sends independently rendered HTML chunks. +- [x] Milestone 3: Targeted tests, typecheck, lifecycle lint, review, commit, and PR are complete. + +## Task Breakdown + +### Phase 1: Documentation and Existing Behavior + +- [x] Task 1.1: Capture requirements, design, testing scenarios, and implementation plan. + - Outcome: lifecycle docs explain scope, non-goals, splitting strategy, and validation. + - Validation: `npx ai-devkit@latest lint --feature telegram-markdown-chunking`. + - Related tests: all testing doc scenarios. +- [x] Task 1.2: Inspect current Telegram adapter, renderer, package scripts, and existing tests. + - Outcome: implementation reuses local patterns and dependencies. + - Validation: source references recorded in implementation notes. + +### Phase 2: TDD and Core Implementation + +- [x] Task 2.1: Add failing tests for long fenced code, nested list code, paragraphs, Unicode/emoji, and unchanged normal markdown. + - Outcome: tests fail against rendered-HTML chunking for the right reasons. + - Validation: targeted Vitest run exits non-zero before production changes. +- [x] Task 2.2: Implement Markdown-first chunking with `marked` lexer tokens. + - Outcome: each rendered HTML chunk is independently valid and within the Telegram max length. + - Validation: targeted Vitest run exits zero. +- [x] Task 2.3: Preserve fallbacks for renderer failures and Telegram parse-entities errors. + - Outcome: existing fallback tests still pass. + - Validation: adapter test suite exits zero. + +### Phase 3: Verification and Review + +- [x] Task 3.1: Run typecheck and targeted package tests. + - Outcome: changed package validates locally. + - Validation: command output recorded in implementation/testing docs. +- [x] Task 3.2: Review implementation against design and update lifecycle docs. + - Outcome: docs reflect actual files, decisions, deviations, and risks. + - Validation: lifecycle lint passes. +- [x] Task 3.3: Commit, push, and open PR. + - Outcome: branch `feature-telegram-markdown-chunking` has a PR ready for review. + - Validation: commit SHA and PR URL reported. + +## Dependencies + +- Depends on existing `marked` dependency in `@ai-devkit/channel-connector`. +- Depends on existing `markdownToTelegramHtml` renderer remaining stable. +- No external Telegram API dependency for automated tests. + +## Timeline & Estimates + +- Documentation and code discovery: small. +- TDD and chunking implementation: medium, because recursive splitting must avoid malformed HTML and preserve fallbacks. +- Verification, review, PR: small to medium depending on CI/local runtime. + +## Risks & Mitigation + +- Risk: marked token `raw` values may differ across token kinds. + - Mitigation: use `raw` where available and fall back to token text for known oversized splitters. +- Risk: rendered length may exceed source length due to HTML wrappers/entities. + - Mitigation: validate by rendering every candidate before sending. +- Risk: plain text hard fallback could lose formatting. + - Mitigation: use it only after semantic splitting and rendering cannot fit. + +## Resources Needed + +- Repo-local tests and typecheck. +- `npx ai-devkit@latest` docs/lint commands. +- GitHub CLI or configured forge CLI for PR creation. + +## Progress Summary + +Implementation tasks are complete through package verification, local review, commit, push, and PR creation. PR: https://github.com/codeaholicguy/ai-devkit/pull/125. diff --git a/docs/ai/requirements/2026-06-28-feature-telegram-markdown-chunking.md b/docs/ai/requirements/2026-06-28-feature-telegram-markdown-chunking.md new file mode 100644 index 00000000..65085b3e --- /dev/null +++ b/docs/ai/requirements/2026-06-28-feature-telegram-markdown-chunking.md @@ -0,0 +1,64 @@ +--- +phase: requirements +title: Telegram Markdown-first Chunking +description: Chunk Telegram markdown source by semantic boundaries before rendering HTML +--- + +# Telegram Markdown-first Chunking + +## Problem Statement + +AI DevKit's Telegram adapter currently renders an entire Markdown message to Telegram-compatible HTML, then chunks the rendered HTML string. This can split HTML tags or entities, especially for long fenced code blocks and nested-list content that renders into `
...
`. Telegram then receives invalid HTML for a chunk and may reject the send. + +Affected users are people using `ai-devkit channel start telegram` to read long agent responses in Telegram. The current workaround is a parse-entities fallback that strips formatting for rejected chunks, but that still sends partial rendered fragments and loses formatting. + +## Goals & Objectives + +- Chunk Markdown/source before rendering, using `marked` lexer tokens rather than a new Markdown parser. +- Preserve the existing `markdownToTelegramHtml` renderer and Telegram HTML parse mode. +- Ensure every Telegram HTML send receives an independently rendered chunk with valid Telegram-compatible HTML. +- Split oversized content by sensible semantic boundaries: + - top-level Markdown tokens first + - code fences by lines while preserving fences and language + - lists by list item where possible + - paragraphs by newline, then sentence, then word + - source/plain text fallback only when rendering still fails +- Maintain normal markdown output for messages that already fit. + +## Non-goals + +- Do not implement Telegram `MessageEntity` conversion. +- Do not replace or rewrite the existing Markdown-to-Telegram-HTML renderer. +- Do not implement a custom Markdown parser. +- Do not change Telegram authorization, channel setup, polling, or send retry behavior beyond chunk preparation. + +## User Stories & Use Cases + +- As a Telegram channel user, I want long fenced code blocks containing literal strings like `tag` to arrive as multiple valid formatted code chunks, so Telegram does not reject malformed HTML. +- As a Telegram channel user, I want nested lists that include long code blocks to chunk at list item or code-line boundaries, so structure and readable formatting are preserved. +- As a Telegram channel user, I want long paragraphs to be split at readable boundaries, so responses remain understandable within Telegram's 4096 character limit. +- As a Telegram channel user, I want emoji and other Unicode text to be counted consistently with JavaScript/Telegram send limits, so chunks do not exceed the adapter's configured limit. +- As a maintainer, I want existing short and normal Markdown messages to remain unchanged. + +## Success Criteria + +- Each HTML send from `TelegramAdapter.sendMessage` is at most `TELEGRAM_MAX_MESSAGE_LENGTH` characters. +- Each HTML send is generated by independently calling `markdownToTelegramHtml` on a Markdown chunk. +- Tests cover long fenced code containing literal `tag` text without splitting rendered `
`, ``, or HTML entities.
+- Tests cover nested list content with long fenced code.
+- Tests cover long paragraphs split by sensible boundaries.
+- Tests cover Unicode/emoji length behavior.
+- Tests cover normal Markdown unchanged in a single send.
+- Existing parse-entities fallback behavior remains available when Telegram rejects a rendered chunk.
+
+## Constraints & Assumptions
+
+- Telegram max message length remains represented by `TELEGRAM_MAX_MESSAGE_LENGTH = 4096`.
+- JavaScript string length is the existing counting model for this adapter; this feature must not introduce a new byte-based or grapheme-count limit.
+- `marked` is already available in `@ai-devkit/channel-connector`; use its lexer/tokens.
+- Rendering a candidate chunk is the authoritative validation because the rendered HTML, not raw Markdown length, determines Telegram payload size.
+- If a single semantic unit still cannot be split cleanly under the limit, a plain/source fallback is acceptable to preserve delivery.
+
+## Questions & Open Items
+
+- No material open requirements. The user explicitly decided to keep the existing HTML renderer and not implement MessageEntity conversion.
diff --git a/docs/ai/testing/2026-06-28-feature-telegram-markdown-chunking.md b/docs/ai/testing/2026-06-28-feature-telegram-markdown-chunking.md
new file mode 100644
index 00000000..8fed9976
--- /dev/null
+++ b/docs/ai/testing/2026-06-28-feature-telegram-markdown-chunking.md
@@ -0,0 +1,62 @@
+---
+phase: testing
+title: Telegram Markdown-first Chunking Testing
+description: Verify semantic Markdown chunking before Telegram HTML rendering
+---
+
+# Telegram Markdown-first Chunking Testing
+
+## Test Coverage Goals
+
+- Unit test coverage target: all new Telegram chunking branches added in `TelegramAdapter`.
+- Integration scope: mocked Telegraf send calls from `TelegramAdapter.sendMessage`.
+- End-to-end scope: not required; no live Telegram API calls for this change.
+- Acceptance criteria map directly to the requirements edge cases.
+
+## Unit Tests
+
+### TelegramAdapter.sendMessage
+
+- [x] Long fenced code containing literal `tag` is split into multiple parse-mode HTML sends, each within 4096 characters and each containing balanced `
` wrappers.
+- [x] Nested list with long fenced code is split into multiple parse-mode HTML sends without a malformed partial HTML code block.
+- [x] Long paragraphs are split into multiple parse-mode HTML sends at readable boundaries and stay within limit.
+- [x] Unicode/emoji content respects JavaScript string length limits for chunk size.
+- [x] Normal markdown that fits is sent once and renders unchanged.
+- [x] Existing parse-entities retry still falls back to plain text.
+- [x] Existing renderer-throws fallback still sends source/plain text chunks.
+
+## Integration Tests
+
+- [x] Mocked `telegraf.telegram.sendMessage` calls always receive `{ parse_mode: 'HTML' }` for successful rendered chunks.
+- [x] Plain text fallback calls omit parse mode.
+
+## End-to-End Tests
+
+- Not planned. The behavior is deterministic and covered through the adapter boundary with Telegraf mocked.
+
+## Test Data
+
+- Fenced TypeScript code block with repeated lines containing `tag` and `&`.
+- Nested unordered list with a child fenced code block large enough to exceed Telegram length after rendering.
+- Paragraphs containing sentence punctuation, long words, and emoji.
+- Short markdown sample: `**bold** and *italic* and `code``.
+
+## Test Reporting & Coverage
+
+- Red command: `npm --workspace @ai-devkit/channel-connector test -- src/__tests__/adapters/TelegramAdapter.test.ts` exited 1 with 4 expected failures before production changes.
+- Targeted adapter command: `npm --workspace @ai-devkit/channel-connector test -- src/__tests__/adapters/TelegramAdapter.test.ts` exited 0 with 26 tests passed.
+- Package test command: `npm --workspace @ai-devkit/channel-connector test` exited 0 with 4 files and 62 tests passed.
+- Typecheck command: `npm --workspace @ai-devkit/channel-connector run typecheck` exited 0.
+- Lint command: `npm --workspace @ai-devkit/channel-connector run lint` exited 0.
+
+## Manual Testing
+
+- Not required for this non-UI adapter change.
+
+## Performance Testing
+
+- No dedicated benchmark required. Tests should avoid pathological runtime by using representative 5k to 12k character inputs.
+
+## Bug Tracking
+
+- Regressions should be added as adapter tests with inputs that previously generated malformed rendered HTML chunks.
diff --git a/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts b/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts
index 74480a2c..bdbe506f 100644
--- a/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts
+++ b/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts
@@ -43,6 +43,13 @@ function getMockBot() {
     return (telegrafModule as unknown as { __mockBot: ReturnType['mock'] & Record }).__mockBot;
 }
 
+function expectHtmlChunkWithinLimit(chunk: string): void {
+    expect(chunk.length).toBeLessThanOrEqual(4096);
+    expect(chunk).not.toMatch(/&(?:l|lt|g|gt|a|am|amp|q|qu|quo|quot)$/);
+    expect((chunk.match(/
/g) ?? []).length).toBe((chunk.match(/<\/pre>/g) ?? []).length);
+    expect((chunk.match(//g) ?? []).length).toBe((chunk.match(/<\/code>/g) ?? []).length);
+}
+
 describe('TelegramAdapter', () => {
     let adapter: TelegramAdapter;
 
@@ -170,6 +177,76 @@ describe('TelegramAdapter', () => {
             }
         });
 
+        it('should split long fenced code into independently valid HTML code blocks', async () => {
+            const bot = getMockBot();
+            const codeLine = 'const sample = "tag" && value < limit;\n';
+            const longMessage = `Before\n\n\`\`\`ts\n${codeLine.repeat(100)}\`\`\`\n\nAfter`;
+
+            await adapter.sendMessage('12345', longMessage);
+
+            expect(bot.telegram.sendMessage.mock.calls.length).toBeGreaterThan(1);
+            for (const call of bot.telegram.sendMessage.mock.calls) {
+                const chunk = call[1];
+                expect(call[2]).toEqual({ parse_mode: 'HTML' });
+                expectHtmlChunkWithinLimit(chunk);
+                if (chunk.includes('
')) {
+                    expect(chunk).toContain('');
+                    expect(chunk).toContain('<code>tag</code>');
+                }
+            }
+        });
+
+        it('should split nested list fenced code without sending partial HTML tags', async () => {
+            const bot = getMockBot();
+            const codeLine = '  return "tag" && input < output;\n';
+            const longMessage = [
+                '- parent item',
+                '  - child item with code',
+                '    ```js',
+                codeLine.repeat(100).trimEnd(),
+                '    ```',
+                '- final item',
+            ].join('\n');
+
+            await adapter.sendMessage('12345', longMessage);
+
+            expect(bot.telegram.sendMessage.mock.calls.length).toBeGreaterThan(1);
+            for (const call of bot.telegram.sendMessage.mock.calls) {
+                const chunk = call[1];
+                expect(call[2]).toEqual({ parse_mode: 'HTML' });
+                expectHtmlChunkWithinLimit(chunk);
+            }
+        });
+
+        it('should split long paragraphs at readable markdown boundaries', async () => {
+            const bot = getMockBot();
+            const sentence = 'This is a long sentence with enough words to make chunking choose sentence boundaries. ';
+            const longMessage = sentence.repeat(80);
+
+            await adapter.sendMessage('12345', longMessage);
+
+            expect(bot.telegram.sendMessage.mock.calls.length).toBeGreaterThan(1);
+            for (const call of bot.telegram.sendMessage.mock.calls) {
+                const chunk = call[1];
+                expect(call[2]).toEqual({ parse_mode: 'HTML' });
+                expectHtmlChunkWithinLimit(chunk);
+                expect(chunk.endsWith('.') || chunk.endsWith('.\n\n') || chunk.endsWith(' ')).toBe(true);
+            }
+        });
+
+        it('should keep emoji chunks within the configured JavaScript string length limit', async () => {
+            const bot = getMockBot();
+            const longMessage = `Status ${'🧪'.repeat(2500)}`;
+
+            await adapter.sendMessage('12345', longMessage);
+
+            expect(bot.telegram.sendMessage.mock.calls.length).toBeGreaterThan(1);
+            for (const call of bot.telegram.sendMessage.mock.calls) {
+                expect(call[1].length).toBeLessThanOrEqual(4096);
+                expect(call[2]).toEqual({ parse_mode: 'HTML' });
+            }
+        });
+
         it('should hard split at 4096 when no newlines available', async () => {
             const bot = getMockBot();
             const longMessage = 'A'.repeat(5000);
@@ -191,7 +268,7 @@ describe('TelegramAdapter', () => {
 
             // First chunk should end at a \n\n boundary, not mid-paragraph
             const firstChunk = bot.telegram.sendMessage.mock.calls[0][1];
-            expect(firstChunk.endsWith('\n\n')).toBe(true);
+            expect(firstChunk).toBe(`${paragraph}\n\n${paragraph}`);
         });
 
         it('should send short messages in a single call', async () => {
diff --git a/packages/channel-connector/src/adapters/TelegramAdapter.ts b/packages/channel-connector/src/adapters/TelegramAdapter.ts
index de174388..a358764c 100644
--- a/packages/channel-connector/src/adapters/TelegramAdapter.ts
+++ b/packages/channel-connector/src/adapters/TelegramAdapter.ts
@@ -1,4 +1,5 @@
 import { Telegraf } from 'telegraf';
+import { Marked, type Token, type Tokens } from 'marked';
 import type { ChannelAdapter } from './ChannelAdapter.js';
 import { markdownToTelegramHtml } from '../utils/telegramHtml.js';
 import type { IncomingMessage } from '../types.js';
@@ -6,6 +7,12 @@ import type { IncomingMessage } from '../types.js';
 export const TELEGRAM_CHANNEL_TYPE = 'telegram';
 export const TELEGRAM_MAX_MESSAGE_LENGTH = 4096;
 const TELEGRAM_PARSE_MODE = 'HTML' as const;
+const markdownLexer = new Marked();
+
+type TelegramMessageChunk = {
+    text: string;
+    html: boolean;
+};
 
 export interface TelegramAdapterOptions {
     botToken: string;
@@ -56,14 +63,13 @@ export class TelegramAdapter implements ChannelAdapter {
 
     /**
      * Input is treated as markdown and rendered as Telegram-compatible HTML.
-     * Long messages are chunked at paragraph boundaries when possible; very
-     * long single blocks (e.g. a `
` over 4096 chars) may still split
-     * mid-tag and produce a partial render in the second chunk.
+     * Long messages are chunked as markdown source before rendering so each
+     * Telegram HTML payload is independently valid.
      */
     async sendMessage(chatId: string, text: string): Promise {
-        let html: string;
+        let chunks: TelegramMessageChunk[];
         try {
-            html = markdownToTelegramHtml(text);
+            chunks = chunkMarkdownForTelegram(text, TELEGRAM_MAX_MESSAGE_LENGTH);
         } catch {
             for (const chunk of chunkMessage(text, TELEGRAM_MAX_MESSAGE_LENGTH)) {
                 await this.bot.telegram.sendMessage(chatId, chunk);
@@ -71,14 +77,19 @@ export class TelegramAdapter implements ChannelAdapter {
             return;
         }
 
-        for (const chunk of chunkMessage(html, TELEGRAM_MAX_MESSAGE_LENGTH)) {
+        for (const chunk of chunks) {
+            if (!chunk.html) {
+                await this.bot.telegram.sendMessage(chatId, chunk.text);
+                continue;
+            }
+
             try {
-                await this.bot.telegram.sendMessage(chatId, chunk, { parse_mode: TELEGRAM_PARSE_MODE });
+                await this.bot.telegram.sendMessage(chatId, chunk.text, { parse_mode: TELEGRAM_PARSE_MODE });
             } catch (error) {
                 if (!isParseEntitiesError(error)) throw error;
                 // Telegram rejected the rendered HTML — fall back to plain text
                 // so the user still gets the content (just unformatted).
-                await this.bot.telegram.sendMessage(chatId, htmlToPlainText(chunk));
+                await this.bot.telegram.sendMessage(chatId, htmlToPlainText(chunk.text));
             }
         }
     }
@@ -108,6 +119,252 @@ function htmlToPlainText(html: string): string {
         .replace(/&/g, '&');
 }
 
+function chunkMarkdownForTelegram(markdown: string, maxLen: number): TelegramMessageChunk[] {
+    const markdownChunks = splitMarkdownSource(markdown, maxLen);
+    const chunks: TelegramMessageChunk[] = [];
+
+    for (const markdownChunk of markdownChunks) {
+        const html = markdownToTelegramHtml(markdownChunk);
+        if (html.length <= maxLen) {
+            if (html.length > 0) chunks.push({ text: html, html: true });
+            continue;
+        }
+
+        for (const plainChunk of chunkMessage(markdownChunk, maxLen)) {
+            if (plainChunk.length > 0) chunks.push({ text: plainChunk, html: false });
+        }
+    }
+
+    return chunks;
+}
+
+function splitMarkdownSource(markdown: string, maxLen: number, depth = 0): string[] {
+    if (markdown.length === 0) return [];
+    if (renderedLengthFits(markdown, maxLen)) return [markdown];
+    if (depth > 6) return splitPlainMarkdownText(markdown, maxLen);
+
+    const chunks: string[] = [];
+    let current = '';
+    const tokens = markdownLexer.lexer(markdown);
+
+    for (const token of tokens) {
+        const raw = token.raw ?? '';
+        if (raw.length === 0) continue;
+
+        const candidate = current + raw;
+        if (candidate.length > 0 && renderedLengthFits(candidate, maxLen)) {
+            current = candidate;
+            continue;
+        }
+
+        if (current.length > 0) {
+            chunks.push(current);
+            current = '';
+        }
+
+        if (renderedLengthFits(raw, maxLen)) {
+            current = raw;
+        } else {
+            chunks.push(...splitOversizedToken(token, maxLen, depth + 1));
+        }
+    }
+
+    if (current.length > 0) chunks.push(current);
+    return chunks.flatMap((chunk) => renderedLengthFits(chunk, maxLen) ? [chunk] : splitPlainMarkdownText(chunk, maxLen));
+}
+
+function splitOversizedToken(token: Token, maxLen: number, depth: number): string[] {
+    switch (token.type) {
+        case 'code':
+            return splitCodeToken(token as Tokens.Code, maxLen);
+        case 'list':
+            return splitListToken(token as Tokens.List, maxLen, depth);
+        case 'paragraph':
+        case 'text':
+            return splitPlainMarkdownText(token.raw, maxLen);
+        default:
+            if ('tokens' in token && Array.isArray(token.tokens) && token.raw !== undefined) {
+                return splitMarkdownSource(token.raw, maxLen, depth);
+            }
+            return splitPlainMarkdownText(token.raw ?? '', maxLen);
+    }
+}
+
+function splitListToken(token: Tokens.List, maxLen: number, depth: number): string[] {
+    const chunks: string[] = [];
+    let current = '';
+
+    for (const item of token.items) {
+        const raw = item.raw;
+        const candidate = current + raw;
+        if (candidate.length > 0 && renderedLengthFits(candidate, maxLen)) {
+            current = candidate;
+            continue;
+        }
+
+        if (current.length > 0) {
+            chunks.push(current);
+            current = '';
+        }
+
+        if (renderedLengthFits(raw, maxLen)) {
+            current = raw;
+        } else {
+            chunks.push(...splitMarkdownSource(raw, maxLen, depth + 1));
+        }
+    }
+
+    if (current.length > 0) chunks.push(current);
+    return chunks;
+}
+
+function splitCodeToken(token: Tokens.Code, maxLen: number): string[] {
+    const fence = token.raw.startsWith('~~~') ? '~~~' : '```';
+    const lang = token.lang ? token.lang.split(/\s/)[0] : '';
+    const lines = token.text.split('\n');
+    const chunks: string[] = [];
+    let currentLines: string[] = [];
+
+    const renderFence = (codeLines: string[]): string =>
+        `${fence}${lang}\n${codeLines.join('\n')}\n${fence}\n\n`;
+
+    for (const line of lines) {
+        const candidateLines = [...currentLines, line];
+        if (renderedLengthFits(renderFence(candidateLines), maxLen)) {
+            currentLines = candidateLines;
+            continue;
+        }
+
+        if (currentLines.length > 0) {
+            chunks.push(renderFence(currentLines));
+            currentLines = [];
+        }
+
+        if (renderedLengthFits(renderFence([line]), maxLen)) {
+            currentLines = [line];
+        } else {
+            for (const segment of splitCodeLine(line, fence, lang, maxLen)) {
+                chunks.push(renderFence([segment]));
+            }
+        }
+    }
+
+    if (currentLines.length > 0) chunks.push(renderFence(currentLines));
+    return chunks;
+}
+
+function splitCodeLine(line: string, fence: string, lang: string, maxLen: number): string[] {
+    const segments: string[] = [];
+    let current = '';
+    const renderFence = (value: string): string => `${fence}${lang}\n${value}\n${fence}\n\n`;
+
+    for (const char of Array.from(line)) {
+        const candidate = current + char;
+        if (renderedLengthFits(renderFence(candidate), maxLen)) {
+            current = candidate;
+            continue;
+        }
+
+        if (current.length > 0) {
+            segments.push(current);
+            current = '';
+        }
+
+        if (renderedLengthFits(renderFence(char), maxLen)) {
+            current = char;
+        } else {
+            segments.push(char);
+        }
+    }
+
+    if (current.length > 0) segments.push(current);
+    return segments;
+}
+
+function splitPlainMarkdownText(markdown: string, maxLen: number): string[] {
+    if (markdown.length === 0) return [];
+    if (renderedLengthFits(markdown, maxLen)) return [markdown];
+
+    const newlineUnits = markdown.split(/(?<=\n)/u);
+    if (newlineUnits.length > 1) {
+        return packMarkdownUnits(newlineUnits, maxLen, splitPlainMarkdownText);
+    }
+
+    const sentenceUnits = markdown.match(/[^.!?]+[.!?]+(?:\s+|$)|[^.!?]+$/gu);
+    if (sentenceUnits && sentenceUnits.length > 1) {
+        return packMarkdownUnits(sentenceUnits, maxLen, splitPlainMarkdownText);
+    }
+
+    const wordUnits = markdown.match(/\S+\s*/gu);
+    if (wordUnits && wordUnits.length > 1) {
+        return packMarkdownUnits(wordUnits, maxLen, splitPlainMarkdownText);
+    }
+
+    return splitByCodePoint(markdown, maxLen);
+}
+
+function packMarkdownUnits(
+    units: string[],
+    maxLen: number,
+    splitOversized: (unit: string, maxLen: number) => string[],
+): string[] {
+    const chunks: string[] = [];
+    let current = '';
+
+    for (const unit of units) {
+        const candidate = current + unit;
+        if (renderedLengthFits(candidate, maxLen)) {
+            current = candidate;
+            continue;
+        }
+
+        if (current.length > 0) {
+            chunks.push(current);
+            current = '';
+        }
+
+        if (renderedLengthFits(unit, maxLen)) {
+            current = unit;
+        } else {
+            chunks.push(...splitOversized(unit, maxLen));
+        }
+    }
+
+    if (current.length > 0) chunks.push(current);
+    return chunks;
+}
+
+function splitByCodePoint(text: string, maxLen: number): string[] {
+    const chunks: string[] = [];
+    let current = '';
+
+    for (const char of Array.from(text)) {
+        const candidate = current + char;
+        if (renderedLengthFits(candidate, maxLen)) {
+            current = candidate;
+            continue;
+        }
+
+        if (current.length > 0) {
+            chunks.push(current);
+            current = '';
+        }
+
+        if (renderedLengthFits(char, maxLen)) {
+            current = char;
+        } else {
+            chunks.push(char);
+        }
+    }
+
+    if (current.length > 0) chunks.push(current);
+    return chunks;
+}
+
+function renderedLengthFits(markdown: string, maxLen: number): boolean {
+    return markdownToTelegramHtml(markdown).length <= maxLen;
+}
+
 /**
  * Split text into chunks of maxLen or fewer characters. Prefers paragraph
  * boundaries (\n\n), then single newlines (\n), then hard-splits at maxLen.