From ac28c9093054b1c10ae9eff2607e7c5c11f16a82 Mon Sep 17 00:00:00 2001 From: Hoang Nguyen Date: Sun, 28 Jun 2026 06:50:03 +0000 Subject: [PATCH] fix(channel-connector): preserve source text on Telegram fallback --- .../adapters/TelegramAdapter.test.ts | 74 +++++++++++++++- .../src/adapters/TelegramAdapter.ts | 85 ++++++++++++++++--- 2 files changed, 142 insertions(+), 17 deletions(-) diff --git a/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts b/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts index 74480a2c..932d26e7 100644 --- a/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts +++ b/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts @@ -219,12 +219,79 @@ describe('TelegramAdapter', () => { expect(htmlChunk).toContain('hello'); expect(htmlOpts).toEqual({ parse_mode: 'HTML' }); - // Second call: same content, plain text (tags stripped, no parse_mode) + // Second call: original source text, no parse_mode const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1]; - expect(plainChunk).toBe('hello'); + expect(plainChunk).toBe('**hello**'); expect(plainOpts).toBeUndefined(); }); + it('should fall back to source text for markdown code that contains HTML-looking tags', async () => { + const bot = getMockBot(); + const parseError = Object.assign(new Error('400'), { + description: "Bad Request: can't parse entities", + }); + bot.telegram.sendMessage + .mockRejectedValueOnce(parseError) + .mockResolvedValueOnce(undefined); + + await adapter.sendMessage('12345', '`x`'); + + const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1]; + expect(plainChunk).toBe('`x`'); + expect(plainChunk).not.toBe('x'); + expect(plainOpts).toBeUndefined(); + }); + + it('should fall back to source text chunks for long fenced code with tag-like content', async () => { + const bot = getMockBot(); + const parseError = Object.assign(new Error('400'), { + description: "Bad Request: can't parse entities", + }); + bot.telegram.sendMessage + .mockRejectedValueOnce(parseError) + .mockResolvedValue(undefined); + const codeLine = 'x\n'; + const message = `\`\`\`\n${codeLine.repeat(350)}\`\`\``; + + await adapter.sendMessage('12345', message); + + const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1]; + expect(plainChunk).toContain('```'); + expect(plainChunk).toContain('x'); + expect(plainChunk).not.toContain('<code>x</code>'); + expect(plainOpts).toBeUndefined(); + }); + + it('should not split rendered fenced code into malformed Telegram HTML chunks', async () => { + const bot = getMockBot(); + bot.telegram.sendMessage.mockImplementation(async (_chatId: string, chunk: string, opts?: { parse_mode?: string }) => { + if (opts?.parse_mode === 'HTML') { + const preOpen = (chunk.match(/
/g) ?? []).length;
+                    const preClose = (chunk.match(/<\/pre>/g) ?? []).length;
+                    const codeOpen = (chunk.match(/]*)?>/g) ?? []).length;
+                    const codeClose = (chunk.match(/<\/code>/g) ?? []).length;
+                    if (preOpen !== preClose || codeOpen !== codeClose) {
+                        throw Object.assign(new Error('400: Bad Request'), {
+                            description: "Bad Request: can't parse entities: Can't find end tag corresponding to start tag \"pre\"",
+                        });
+                    }
+                }
+                return undefined;
+            });
+            const codeLine = 'const value = "x";\n';
+            const message = `\`\`\`ts\n${codeLine.repeat(180)}\`\`\``;
+
+            await adapter.sendMessage('12345', message);
+
+            expect(bot.telegram.sendMessage).toHaveBeenCalledTimes(2);
+            for (const call of bot.telegram.sendMessage.mock.calls) {
+                expect(call[1].length).toBeLessThanOrEqual(4096);
+                expect(call[1]).toMatch(/^
/);
+                expect(call[1]).toMatch(/<\/code><\/pre>$/);
+                expect(call[2]).toEqual({ parse_mode: 'HTML' });
+            }
+        });
+
         it('should detect parse-entities error from "message" field too', async () => {
             const bot = getMockBot();
             // Some error shapes carry the marker on `message` rather than `description`
@@ -238,7 +305,7 @@ describe('TelegramAdapter', () => {
             expect(bot.telegram.sendMessage).toHaveBeenCalledTimes(2);
         });
 
-        it('should decode HTML entities when falling back to plain text', async () => {
+        it('should keep source text when falling back from escaped HTML', async () => {
             const bot = getMockBot();
             const parseError = Object.assign(new Error('400'), {
                 description: "Bad Request: can't parse entities",
@@ -247,7 +314,6 @@ describe('TelegramAdapter', () => {
                 .mockRejectedValueOnce(parseError)
                 .mockResolvedValueOnce(undefined);
 
-            // Source has chars that escapeHtml encodes; fallback should decode them
             await adapter.sendMessage('12345', 'a < b && c > d');
 
             const [, plainChunk] = bot.telegram.sendMessage.mock.calls[1];
diff --git a/packages/channel-connector/src/adapters/TelegramAdapter.ts b/packages/channel-connector/src/adapters/TelegramAdapter.ts
index de174388..f877622f 100644
--- a/packages/channel-connector/src/adapters/TelegramAdapter.ts
+++ b/packages/channel-connector/src/adapters/TelegramAdapter.ts
@@ -56,9 +56,7 @@ export class TelegramAdapter implements ChannelAdapter {
 
     /**
      * Input is treated as markdown and rendered as Telegram-compatible HTML.
-     * Long messages are chunked at paragraph boundaries when possible; very
-     * long single blocks (e.g. a `
` over 4096 chars) may still split
-     * mid-tag and produce a partial render in the second chunk.
+     * Long messages are chunked at paragraph boundaries when possible.
      */
     async sendMessage(chatId: string, text: string): Promise {
         let html: string;
@@ -71,14 +69,19 @@ export class TelegramAdapter implements ChannelAdapter {
             return;
         }
 
-        for (const chunk of chunkMessage(html, TELEGRAM_MAX_MESSAGE_LENGTH)) {
+        const htmlChunks = chunkTelegramHtml(html, TELEGRAM_MAX_MESSAGE_LENGTH);
+        const fallbackChunks = chunkMessage(text, TELEGRAM_MAX_MESSAGE_LENGTH);
+
+        for (const [index, chunk] of htmlChunks.entries()) {
             try {
                 await this.bot.telegram.sendMessage(chatId, chunk, { parse_mode: TELEGRAM_PARSE_MODE });
             } catch (error) {
                 if (!isParseEntitiesError(error)) throw error;
                 // Telegram rejected the rendered HTML — fall back to plain text
-                // so the user still gets the content (just unformatted).
-                await this.bot.telegram.sendMessage(chatId, htmlToPlainText(chunk));
+                // from the source so escaped code content is not decoded into
+                // HTML-looking Telegram tags.
+                const fallbackChunk = fallbackChunks[index] ?? text;
+                await this.bot.telegram.sendMessage(chatId, fallbackChunk);
             }
         }
     }
@@ -99,13 +102,69 @@ function isParseEntitiesError(error: unknown): boolean {
     return ((description ?? '') + (message ?? '')).includes("can't parse entities");
 }
 
-function htmlToPlainText(html: string): string {
-    return html
-        .replace(/<[^>]+>/g, '')
-        .replace(/</g, '<')
-        .replace(/>/g, '>')
-        .replace(/"/g, '"')
-        .replace(/&/g, '&');
+function chunkTelegramHtml(html: string, maxLen: number): string[] {
+    const chunks: string[] = [];
+    const preCodePattern = /
]*)>([\s\S]*?)<\/code><\/pre>(\n\n)?/g;
+    let lastIndex = 0;
+
+    for (const match of html.matchAll(preCodePattern)) {
+        const matchIndex = match.index ?? 0;
+        chunks.push(...chunkMessage(html.slice(lastIndex, matchIndex), maxLen));
+
+        const [block, attrs, content, suffix = ''] = match;
+        if (block.length <= maxLen) {
+            chunks.push(block);
+        } else {
+            chunks.push(...chunkPreCodeBlock(attrs, content, suffix, maxLen));
+        }
+
+        lastIndex = matchIndex + block.length;
+    }
+
+    chunks.push(...chunkMessage(html.slice(lastIndex), maxLen));
+    return chunks;
+}
+
+function chunkPreCodeBlock(attrs: string, content: string, suffix: string, maxLen: number): string[] {
+    const open = `
`;
+    const close = '
'; + const maxContentLen = maxLen - open.length - close.length; + if (maxContentLen <= 0) return chunkMessage(`${open}${content}${close}${suffix}`, maxLen); + + const contentChunks = chunkHtmlText(content, maxContentLen); + return contentChunks.map((chunk, index) => + `${open}${chunk}${close}${index === contentChunks.length - 1 ? suffix : ''}` + ); +} + +function chunkHtmlText(text: string, maxLen: number): string[] { + const chunks: string[] = []; + let remaining = text; + + while (remaining.length > 0) { + if (remaining.length <= maxLen) { + chunks.push(remaining); + break; + } + + const newline = remaining.lastIndexOf('\n', maxLen - 1); + const splitAt = avoidEntitySplit(remaining, newline > 0 ? newline + 1 : maxLen); + chunks.push(remaining.slice(0, splitAt)); + remaining = remaining.slice(splitAt); + } + + return chunks; +} + +function avoidEntitySplit(text: string, splitAt: number): number { + const amp = text.lastIndexOf('&', splitAt - 1); + const semicolon = text.lastIndexOf(';', splitAt - 1); + if (amp <= semicolon) return splitAt; + + const nextSemicolon = text.indexOf(';', amp); + if (nextSemicolon >= splitAt && nextSemicolon < text.length) return amp; + + return splitAt; } /**