Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,79 @@ describe('TelegramAdapter', () => {
expect(htmlChunk).toContain('<b>hello</b>');
expect(htmlOpts).toEqual({ parse_mode: 'HTML' });

// Second call: same content, plain text (tags stripped, no parse_mode)
// Second call: original source text, no parse_mode
const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1];
expect(plainChunk).toBe('hello');
expect(plainChunk).toBe('**hello**');
expect(plainOpts).toBeUndefined();
});

it('should fall back to source text for markdown code that contains HTML-looking tags', async () => {
const bot = getMockBot();
const parseError = Object.assign(new Error('400'), {
description: "Bad Request: can't parse entities",
});
bot.telegram.sendMessage
.mockRejectedValueOnce(parseError)
.mockResolvedValueOnce(undefined);

await adapter.sendMessage('12345', '`<code>x</code>`');

const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1];
expect(plainChunk).toBe('`<code>x</code>`');
expect(plainChunk).not.toBe('<code>x</code>');
expect(plainOpts).toBeUndefined();
});

it('should fall back to source text chunks for long fenced code with tag-like content', async () => {
const bot = getMockBot();
const parseError = Object.assign(new Error('400'), {
description: "Bad Request: can't parse entities",
});
bot.telegram.sendMessage
.mockRejectedValueOnce(parseError)
.mockResolvedValue(undefined);
const codeLine = '<code>x</code>\n';
const message = `\`\`\`\n${codeLine.repeat(350)}\`\`\``;

await adapter.sendMessage('12345', message);

const [, plainChunk, plainOpts] = bot.telegram.sendMessage.mock.calls[1];
expect(plainChunk).toContain('```');
expect(plainChunk).toContain('<code>x</code>');
expect(plainChunk).not.toContain('&lt;code&gt;x&lt;/code&gt;');
expect(plainOpts).toBeUndefined();
});

it('should not split rendered fenced code into malformed Telegram HTML chunks', async () => {
const bot = getMockBot();
bot.telegram.sendMessage.mockImplementation(async (_chatId: string, chunk: string, opts?: { parse_mode?: string }) => {
if (opts?.parse_mode === 'HTML') {
const preOpen = (chunk.match(/<pre>/g) ?? []).length;
const preClose = (chunk.match(/<\/pre>/g) ?? []).length;
const codeOpen = (chunk.match(/<code(?:\s[^>]*)?>/g) ?? []).length;
const codeClose = (chunk.match(/<\/code>/g) ?? []).length;
if (preOpen !== preClose || codeOpen !== codeClose) {
throw Object.assign(new Error('400: Bad Request'), {
description: "Bad Request: can't parse entities: Can't find end tag corresponding to start tag \"pre\"",
});
}
}
return undefined;
});
const codeLine = 'const value = "<code>x</code>";\n';
const message = `\`\`\`ts\n${codeLine.repeat(180)}\`\`\``;

await adapter.sendMessage('12345', message);

expect(bot.telegram.sendMessage).toHaveBeenCalledTimes(2);
for (const call of bot.telegram.sendMessage.mock.calls) {
expect(call[1].length).toBeLessThanOrEqual(4096);
expect(call[1]).toMatch(/^<pre><code class="language-ts">/);
expect(call[1]).toMatch(/<\/code><\/pre>$/);
expect(call[2]).toEqual({ parse_mode: 'HTML' });
}
});

it('should detect parse-entities error from "message" field too', async () => {
const bot = getMockBot();
// Some error shapes carry the marker on `message` rather than `description`
Expand All @@ -238,7 +305,7 @@ describe('TelegramAdapter', () => {
expect(bot.telegram.sendMessage).toHaveBeenCalledTimes(2);
});

it('should decode HTML entities when falling back to plain text', async () => {
it('should keep source text when falling back from escaped HTML', async () => {
const bot = getMockBot();
const parseError = Object.assign(new Error('400'), {
description: "Bad Request: can't parse entities",
Expand All @@ -247,7 +314,6 @@ describe('TelegramAdapter', () => {
.mockRejectedValueOnce(parseError)
.mockResolvedValueOnce(undefined);

// Source has chars that escapeHtml encodes; fallback should decode them
await adapter.sendMessage('12345', 'a < b && c > d');

const [, plainChunk] = bot.telegram.sendMessage.mock.calls[1];
Expand Down
85 changes: 72 additions & 13 deletions packages/channel-connector/src/adapters/TelegramAdapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ export class TelegramAdapter implements ChannelAdapter {

/**
* Input is treated as markdown and rendered as Telegram-compatible HTML.
* Long messages are chunked at paragraph boundaries when possible; very
* long single blocks (e.g. a `<pre>` over 4096 chars) may still split
* mid-tag and produce a partial render in the second chunk.
* Long messages are chunked at paragraph boundaries when possible.
*/
async sendMessage(chatId: string, text: string): Promise<void> {
let html: string;
Expand All @@ -71,14 +69,19 @@ export class TelegramAdapter implements ChannelAdapter {
return;
}

for (const chunk of chunkMessage(html, TELEGRAM_MAX_MESSAGE_LENGTH)) {
const htmlChunks = chunkTelegramHtml(html, TELEGRAM_MAX_MESSAGE_LENGTH);
const fallbackChunks = chunkMessage(text, TELEGRAM_MAX_MESSAGE_LENGTH);

for (const [index, chunk] of htmlChunks.entries()) {
try {
await this.bot.telegram.sendMessage(chatId, chunk, { parse_mode: TELEGRAM_PARSE_MODE });
} catch (error) {
if (!isParseEntitiesError(error)) throw error;
// Telegram rejected the rendered HTML — fall back to plain text
// so the user still gets the content (just unformatted).
await this.bot.telegram.sendMessage(chatId, htmlToPlainText(chunk));
// from the source so escaped code content is not decoded into
// HTML-looking Telegram tags.
const fallbackChunk = fallbackChunks[index] ?? text;
await this.bot.telegram.sendMessage(chatId, fallbackChunk);
}
}
}
Expand All @@ -99,13 +102,69 @@ function isParseEntitiesError(error: unknown): boolean {
return ((description ?? '') + (message ?? '')).includes("can't parse entities");
}

function htmlToPlainText(html: string): string {
return html
.replace(/<[^>]+>/g, '')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&amp;/g, '&');
function chunkTelegramHtml(html: string, maxLen: number): string[] {
const chunks: string[] = [];
const preCodePattern = /<pre><code([^>]*)>([\s\S]*?)<\/code><\/pre>(\n\n)?/g;
let lastIndex = 0;

for (const match of html.matchAll(preCodePattern)) {
const matchIndex = match.index ?? 0;
chunks.push(...chunkMessage(html.slice(lastIndex, matchIndex), maxLen));

const [block, attrs, content, suffix = ''] = match;
if (block.length <= maxLen) {
chunks.push(block);
} else {
chunks.push(...chunkPreCodeBlock(attrs, content, suffix, maxLen));
}

lastIndex = matchIndex + block.length;
}

chunks.push(...chunkMessage(html.slice(lastIndex), maxLen));
return chunks;
}

function chunkPreCodeBlock(attrs: string, content: string, suffix: string, maxLen: number): string[] {
const open = `<pre><code${attrs}>`;
const close = '</code></pre>';
const maxContentLen = maxLen - open.length - close.length;
if (maxContentLen <= 0) return chunkMessage(`${open}${content}${close}${suffix}`, maxLen);

const contentChunks = chunkHtmlText(content, maxContentLen);
return contentChunks.map((chunk, index) =>
`${open}${chunk}${close}${index === contentChunks.length - 1 ? suffix : ''}`
);
}

function chunkHtmlText(text: string, maxLen: number): string[] {
const chunks: string[] = [];
let remaining = text;

while (remaining.length > 0) {
if (remaining.length <= maxLen) {
chunks.push(remaining);
break;
}

const newline = remaining.lastIndexOf('\n', maxLen - 1);
const splitAt = avoidEntitySplit(remaining, newline > 0 ? newline + 1 : maxLen);
chunks.push(remaining.slice(0, splitAt));
remaining = remaining.slice(splitAt);
}

return chunks;
}

function avoidEntitySplit(text: string, splitAt: number): number {
const amp = text.lastIndexOf('&', splitAt - 1);
const semicolon = text.lastIndexOf(';', splitAt - 1);
if (amp <= semicolon) return splitAt;

const nextSemicolon = text.indexOf(';', amp);
if (nextSemicolon >= splitAt && nextSemicolon < text.length) return amp;

return splitAt;
}

/**
Expand Down
Loading