diff --git a/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts b/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts
index 74480a2c..932d26e7 100644
--- a/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts
+++ b/packages/channel-connector/src/__tests__/adapters/TelegramAdapter.test.ts
@@ -219,12 +219,79 @@ describe('TelegramAdapter', () => {
expect(htmlChunk).toContain('hello');
expect(htmlOpts).toEqual({ parse_mode: 'HTML' });
- // Second call: same content, plain text (tags stripped, no parse_mode)
+ // Second call: original source text, no parse_mode
const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1];
- expect(plainChunk).toBe('hello');
+ expect(plainChunk).toBe('**hello**');
expect(plainOpts).toBeUndefined();
});
+ it('should fall back to source text for markdown code that contains HTML-looking tags', async () => {
+ const bot = getMockBot();
+ const parseError = Object.assign(new Error('400'), {
+ description: "Bad Request: can't parse entities",
+ });
+ bot.telegram.sendMessage
+ .mockRejectedValueOnce(parseError)
+ .mockResolvedValueOnce(undefined);
+
+ await adapter.sendMessage('12345', '`x`');
+
+ const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1];
+ expect(plainChunk).toBe('`x`');
+ expect(plainChunk).not.toBe('x');
+ expect(plainOpts).toBeUndefined();
+ });
+
+ it('should fall back to source text chunks for long fenced code with tag-like content', async () => {
+ const bot = getMockBot();
+ const parseError = Object.assign(new Error('400'), {
+ description: "Bad Request: can't parse entities",
+ });
+ bot.telegram.sendMessage
+ .mockRejectedValueOnce(parseError)
+ .mockResolvedValue(undefined);
+ const codeLine = 'x\n';
+ const message = `\`\`\`\n${codeLine.repeat(350)}\`\`\``;
+
+ await adapter.sendMessage('12345', message);
+
+ const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1];
+ expect(plainChunk).toContain('```');
+ expect(plainChunk).toContain('x');
+ expect(plainChunk).not.toContain('<code>x</code>');
+ expect(plainOpts).toBeUndefined();
+ });
+
+ it('should not split rendered fenced code into malformed Telegram HTML chunks', async () => {
+ const bot = getMockBot();
+ bot.telegram.sendMessage.mockImplementation(async (_chatId: string, chunk: string, opts?: { parse_mode?: string }) => {
+ if (opts?.parse_mode === 'HTML') {
+ const preOpen = (chunk.match(/
/g) ?? []).length;
+ const preClose = (chunk.match(/<\/pre>/g) ?? []).length;
+ const codeOpen = (chunk.match(/]*)?>/g) ?? []).length;
+ const codeClose = (chunk.match(/<\/code>/g) ?? []).length;
+ if (preOpen !== preClose || codeOpen !== codeClose) {
+ throw Object.assign(new Error('400: Bad Request'), {
+ description: "Bad Request: can't parse entities: Can't find end tag corresponding to start tag \"pre\"",
+ });
+ }
+ }
+ return undefined;
+ });
+ const codeLine = 'const value = "x";\n';
+ const message = `\`\`\`ts\n${codeLine.repeat(180)}\`\`\``;
+
+ await adapter.sendMessage('12345', message);
+
+ expect(bot.telegram.sendMessage).toHaveBeenCalledTimes(2);
+ for (const call of bot.telegram.sendMessage.mock.calls) {
+ expect(call[1].length).toBeLessThanOrEqual(4096);
+ expect(call[1]).toMatch(/^/);
+ expect(call[1]).toMatch(/<\/code><\/pre>$/);
+ expect(call[2]).toEqual({ parse_mode: 'HTML' });
+ }
+ });
+
it('should detect parse-entities error from "message" field too', async () => {
const bot = getMockBot();
// Some error shapes carry the marker on `message` rather than `description`
@@ -238,7 +305,7 @@ describe('TelegramAdapter', () => {
expect(bot.telegram.sendMessage).toHaveBeenCalledTimes(2);
});
- it('should decode HTML entities when falling back to plain text', async () => {
+ it('should keep source text when falling back from escaped HTML', async () => {
const bot = getMockBot();
const parseError = Object.assign(new Error('400'), {
description: "Bad Request: can't parse entities",
@@ -247,7 +314,6 @@ describe('TelegramAdapter', () => {
.mockRejectedValueOnce(parseError)
.mockResolvedValueOnce(undefined);
- // Source has chars that escapeHtml encodes; fallback should decode them
await adapter.sendMessage('12345', 'a < b && c > d');
const [, plainChunk] = bot.telegram.sendMessage.mock.calls[1];
diff --git a/packages/channel-connector/src/adapters/TelegramAdapter.ts b/packages/channel-connector/src/adapters/TelegramAdapter.ts
index de174388..f877622f 100644
--- a/packages/channel-connector/src/adapters/TelegramAdapter.ts
+++ b/packages/channel-connector/src/adapters/TelegramAdapter.ts
@@ -56,9 +56,7 @@ export class TelegramAdapter implements ChannelAdapter {
/**
* Input is treated as markdown and rendered as Telegram-compatible HTML.
- * Long messages are chunked at paragraph boundaries when possible; very
- * long single blocks (e.g. a `` over 4096 chars) may still split
- * mid-tag and produce a partial render in the second chunk.
+ * Long messages are chunked at paragraph boundaries when possible.
*/
async sendMessage(chatId: string, text: string): Promise {
let html: string;
@@ -71,14 +69,19 @@ export class TelegramAdapter implements ChannelAdapter {
return;
}
- for (const chunk of chunkMessage(html, TELEGRAM_MAX_MESSAGE_LENGTH)) {
+ const htmlChunks = chunkTelegramHtml(html, TELEGRAM_MAX_MESSAGE_LENGTH);
+ const fallbackChunks = chunkMessage(text, TELEGRAM_MAX_MESSAGE_LENGTH);
+
+ for (const [index, chunk] of htmlChunks.entries()) {
try {
await this.bot.telegram.sendMessage(chatId, chunk, { parse_mode: TELEGRAM_PARSE_MODE });
} catch (error) {
if (!isParseEntitiesError(error)) throw error;
// Telegram rejected the rendered HTML — fall back to plain text
- // so the user still gets the content (just unformatted).
- await this.bot.telegram.sendMessage(chatId, htmlToPlainText(chunk));
+ // from the source so escaped code content is not decoded into
+ // HTML-looking Telegram tags.
+ const fallbackChunk = fallbackChunks[index] ?? text;
+ await this.bot.telegram.sendMessage(chatId, fallbackChunk);
}
}
}
@@ -99,13 +102,69 @@ function isParseEntitiesError(error: unknown): boolean {
return ((description ?? '') + (message ?? '')).includes("can't parse entities");
}
-function htmlToPlainText(html: string): string {
- return html
- .replace(/<[^>]+>/g, '')
- .replace(/</g, '<')
- .replace(/>/g, '>')
- .replace(/"/g, '"')
- .replace(/&/g, '&');
+function chunkTelegramHtml(html: string, maxLen: number): string[] {
+ const chunks: string[] = [];
+ const preCodePattern = /]*)>([\s\S]*?)<\/code><\/pre>(\n\n)?/g;
+ let lastIndex = 0;
+
+ for (const match of html.matchAll(preCodePattern)) {
+ const matchIndex = match.index ?? 0;
+ chunks.push(...chunkMessage(html.slice(lastIndex, matchIndex), maxLen));
+
+ const [block, attrs, content, suffix = ''] = match;
+ if (block.length <= maxLen) {
+ chunks.push(block);
+ } else {
+ chunks.push(...chunkPreCodeBlock(attrs, content, suffix, maxLen));
+ }
+
+ lastIndex = matchIndex + block.length;
+ }
+
+ chunks.push(...chunkMessage(html.slice(lastIndex), maxLen));
+ return chunks;
+}
+
+function chunkPreCodeBlock(attrs: string, content: string, suffix: string, maxLen: number): string[] {
+ const open = ``;
+ const close = '
';
+ const maxContentLen = maxLen - open.length - close.length;
+ if (maxContentLen <= 0) return chunkMessage(`${open}${content}${close}${suffix}`, maxLen);
+
+ const contentChunks = chunkHtmlText(content, maxContentLen);
+ return contentChunks.map((chunk, index) =>
+ `${open}${chunk}${close}${index === contentChunks.length - 1 ? suffix : ''}`
+ );
+}
+
+function chunkHtmlText(text: string, maxLen: number): string[] {
+ const chunks: string[] = [];
+ let remaining = text;
+
+ while (remaining.length > 0) {
+ if (remaining.length <= maxLen) {
+ chunks.push(remaining);
+ break;
+ }
+
+ const newline = remaining.lastIndexOf('\n', maxLen - 1);
+ const splitAt = avoidEntitySplit(remaining, newline > 0 ? newline + 1 : maxLen);
+ chunks.push(remaining.slice(0, splitAt));
+ remaining = remaining.slice(splitAt);
+ }
+
+ return chunks;
+}
+
+function avoidEntitySplit(text: string, splitAt: number): number {
+ const amp = text.lastIndexOf('&', splitAt - 1);
+ const semicolon = text.lastIndexOf(';', splitAt - 1);
+ if (amp <= semicolon) return splitAt;
+
+ const nextSemicolon = text.indexOf(';', amp);
+ if (nextSemicolon >= splitAt && nextSemicolon < text.length) return amp;
+
+ return splitAt;
}
/**