From d0bd09ae57c9f60dcc367f197a80756fdfabdb77 Mon Sep 17 00:00:00 2001 From: Zhicheng Han <43314240+hanzckernel@users.noreply.github.com> Date: Sun, 26 Apr 2026 03:39:49 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=B5=8C=E5=A5=97=20m?= =?UTF-8?q?arkdown=20fence=20=E5=AF=BC=E8=87=B4=E7=9A=84=E6=B8=B2=E6=9F=93?= =?UTF-8?q?=E6=88=AA=E6=96=AD=20(#222)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: keep nested markdown fences rendered * fix: prevent thinking placeholder leaks * fix: normalize nested markdown example fences --- .../hermes/chat/MarkdownRenderer.vue | 3 +- .../hermes/chat/markdownFenceRepair.ts | 216 ++++++++++++++++++ packages/client/src/utils/thinking-parser.ts | 20 +- tests/client/markdown-rendering.test.ts | 149 ++++++++++++ tests/client/thinking-parser.test.ts | 18 ++ 5 files changed, 400 insertions(+), 6 deletions(-) create mode 100644 packages/client/src/components/hermes/chat/markdownFenceRepair.ts diff --git a/packages/client/src/components/hermes/chat/MarkdownRenderer.vue b/packages/client/src/components/hermes/chat/MarkdownRenderer.vue index 5c62604c..1131d5b2 100644 --- a/packages/client/src/components/hermes/chat/MarkdownRenderer.vue +++ b/packages/client/src/components/hermes/chat/MarkdownRenderer.vue @@ -4,6 +4,7 @@ import { useI18n } from 'vue-i18n' import { useMessage } from 'naive-ui' import MarkdownIt from 'markdown-it' import { handleCodeBlockCopyClick, renderHighlightedCodeBlock } from './highlight' +import { repairNestedMarkdownFences } from './markdownFenceRepair' import { downloadFile } from '@/api/hermes/download' const props = withDefaults(defineProps<{ @@ -26,7 +27,7 @@ const md: MarkdownIt = new MarkdownIt({ }) const renderedHtml = computed(() => { - let html = md.render(props.content) + let html = md.render(repairNestedMarkdownFences(props.content)) if (props.mentionNames && props.mentionNames.length > 0) { const escaped = props.mentionNames.map(n => n.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')) const re = new RegExp(`(?<=[\\s>]|^)@(${escaped.join('|')})(?=\\s|$)`, 'gi') diff --git a/packages/client/src/components/hermes/chat/markdownFenceRepair.ts b/packages/client/src/components/hermes/chat/markdownFenceRepair.ts new file mode 100644 index 00000000..9c8dd387 --- /dev/null +++ b/packages/client/src/components/hermes/chat/markdownFenceRepair.ts @@ -0,0 +1,216 @@ +const MARKDOWN_FENCE_LANGUAGES = new Set(['md', 'markdown', 'mdown', 'mkd']) + +type FenceInfo = { + indent: string + marker: string + fence: string + length: number + info: string +} + +function parseFence(line: string): FenceInfo | null { + const match = line.match(/^( {0,3})(`{3,}|~{3,})(.*)$/) + if (!match) return null + + const [, indent, fence, rawInfo = ''] = match + const marker = fence[0] + const info = rawInfo.trim() + + // CommonMark permits backticks in tilde-fence info strings, but not in + // backtick-fence info strings. Keeping this distinction prevents inline-ish + // malformed backtick text from being promoted into a fence opener. + if (marker === '`' && info.includes('`')) return null + + return { + indent, + marker, + fence, + length: fence.length, + info, + } +} + +function serializeFence(fence: FenceInfo, length = fence.length, info = fence.info): string { + return `${fence.indent}${fence.marker.repeat(length)}${info ? ` ${info}` : ''}` +} + +function isMarkdownFence(fence: FenceInfo): boolean { + const language = fence.info.split(/\s+/)[0]?.toLowerCase() + return MARKDOWN_FENCE_LANGUAGES.has(language) +} + +function isClosingFence(line: string, opener: FenceInfo): boolean { + const fence = parseFence(line) + return Boolean( + fence + && fence.marker === opener.marker + && fence.length >= opener.length + && fence.info === '', + ) +} + +function findLastNonEmptyLine(lines: string[], start = lines.length - 1): number { + let index = start + while (index >= 0 && lines[index].trim() === '') { + index -= 1 + } + return index +} + +function findFinalClosingFence(lines: string[], opener: FenceInfo, start: number): number { + for (let i = findLastNonEmptyLine(lines); i > start; i -= 1) { + if (isClosingFence(lines[i], opener)) { + return i + } + } + return -1 +} + +type OpenFence = { + marker: string + length: number +} + +function canBalanceNestedFences(lines: string[], marker: string): boolean { + const stack: OpenFence[] = [] + let sawFence = false + + for (const line of lines) { + const fence = parseFence(line) + if (!fence || fence.marker !== marker) continue + + sawFence = true + const current = stack[stack.length - 1] + if (fence.info === '' && current && fence.length >= current.length) { + stack.pop() + continue + } + + // Inside a Markdown example, an unlabeled fence can be either a closing + // fence or a literal nested unlabeled example opener. If there is no nested + // opener waiting to close, treat it as the latter while evaluating a later + // candidate closing fence for the outer example. + stack.push({ marker: fence.marker, length: fence.length }) + } + + return sawFence && stack.length === 0 +} + +function findBalancedClosingFence(lines: string[], opener: FenceInfo, start: number): number { + const candidates: number[] = [] + + for (let i = start; i < lines.length; i += 1) { + const fence = parseFence(lines[i]) + if ( + fence + && fence.marker === opener.marker + && fence.info === '' + && fence.length >= opener.length + ) { + candidates.push(i) + } + } + + for (let i = candidates.length - 1; i >= 0; i -= 1) { + const candidate = candidates[i] + if (canBalanceNestedFences(lines.slice(start, candidate), opener.marker)) { + return candidate + } + } + + return candidates[0] ?? -1 +} + +function maxFenceLength(lines: string[], marker: string): number { + let maxLength = 0 + for (const line of lines) { + const fence = parseFence(line) + if (fence?.marker === marker) { + maxLength = Math.max(maxLength, fence.length) + } + } + return maxLength +} + +function promoteMarkdownExampleFences(lines: string[]): string[] { + const output: string[] = [] + + for (let i = 0; i < lines.length; i += 1) { + const opener = parseFence(lines[i]) + if (!opener || !isMarkdownFence(opener)) { + output.push(lines[i]) + continue + } + + const balancedClose = findBalancedClosingFence(lines, opener, i + 1) + if (balancedClose === -1) { + output.push(lines[i]) + continue + } + + const body = lines.slice(i + 1, balancedClose) + const innerMaxLength = maxFenceLength(body, opener.marker) + if (innerMaxLength >= opener.length) { + const promotedLength = innerMaxLength + 1 + output.push(serializeFence(opener, promotedLength)) + output.push(...body) + output.push(serializeFence(opener, promotedLength, '')) + } else { + output.push(lines[i]) + output.push(...body) + output.push(lines[balancedClose]) + } + + i = balancedClose + } + + return output +} + +/** + * LLMs often wrap a complete PR draft or Markdown answer in an outer + * ```md fence. Showing that outer wrapper as a code block makes the UI look + * like Markdown rendering is broken: headings, lists, and inline code remain + * literal text. Strip only that outer draft wrapper before handing content to + * markdown-it. + * + * The unwrapped draft can still contain Markdown examples that themselves + * contain fenced examples. CommonMark closes fences at the first same-marker + * line with at least the opener length, so a malformed example like + * ```md ... ```md ... ``` ... ``` must be normalized by making the example's + * outer fence longer than the literal fences inside it. + */ +export function repairNestedMarkdownFences(content: string): string { + if (!content.includes('```') && !content.includes('~~~')) return content + + const lines = content.split('\n') + const output: string[] = [] + let changed = false + + for (let i = 0; i < lines.length; i += 1) { + const opener = parseFence(lines[i]) + if (!opener || !isMarkdownFence(opener)) { + output.push(lines[i]) + continue + } + + const finalClose = findFinalClosingFence(lines, opener, i + 1) + if (finalClose === -1) { + output.push(lines[i]) + continue + } + + const lastNonEmpty = findLastNonEmptyLine(lines) + if (finalClose !== lastNonEmpty) { + output.push(lines[i]) + continue + } + + output.push(...promoteMarkdownExampleFences(lines.slice(i + 1, finalClose))) + output.push(...lines.slice(finalClose + 1)) + changed = true + break + } + + return changed ? output.join('\n') : content +} diff --git a/packages/client/src/utils/thinking-parser.ts b/packages/client/src/utils/thinking-parser.ts index e188a9f7..7eaef598 100644 --- a/packages/client/src/utils/thinking-parser.ts +++ b/packages/client/src/utils/thinking-parser.ts @@ -14,7 +14,7 @@ const TAG_RE = /<(think|thinking|reasoning)>([\s\S]*?)<\/\1>/gi const PLACEHOLDER_PREFIX = '\u0000THKCODE' const PLACEHOLDER_SUFFIX = '\u0000' -const FENCED_RE = /(```|~~~)([\s\S]*?)\1/g +const FENCED_RE = /(^|\n)( {0,3})(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\2\3[ \t]*(?=\n|$)/g const INLINE_CODE_RE = /`[^`\n]*`/g function protectCodeBlocks(input: string): { masked: string; blocks: string[] } { @@ -32,10 +32,20 @@ function protectCodeBlocks(input: string): { masked: string; blocks: string[] } function restoreCodeBlocks(text: string, blocks: string[]): string { if (blocks.length === 0) return text - return text.replace( - new RegExp(`${PLACEHOLDER_PREFIX}(\\d+)${PLACEHOLDER_SUFFIX}`, 'g'), - (_, idx) => blocks[Number(idx)] ?? '', - ) + + const placeholderRe = new RegExp(`${PLACEHOLDER_PREFIX}(\\d+)${PLACEHOLDER_SUFFIX}`, 'g') + let restored = text + + for (let i = 0; i < blocks.length; i += 1) { + const next = restored.replace( + placeholderRe, + (_, idx) => blocks[Number(idx)] ?? '', + ) + if (next === restored) break + restored = next + } + + return restored } export function parseThinking(content: string, opts: ParseOptions): ParsedThinking { diff --git a/tests/client/markdown-rendering.test.ts b/tests/client/markdown-rendering.test.ts index cddd6664..34350c82 100644 --- a/tests/client/markdown-rendering.test.ts +++ b/tests/client/markdown-rendering.test.ts @@ -86,6 +86,155 @@ describe('MarkdownRenderer', () => { expect(wrapper.find('code.hljs').text()).toContain('INFO Starting server') }) + it('renders outer markdown draft fences as markdown while preserving nested fenced examples', () => { + const wrapper = mount(MarkdownRenderer, { + props: { + content: [ + '下面是可直接手动编辑的 PR draft。', + '', + '```md', + '标题: fix(chat): 保留附件在同一聊天后续轮次的上下文', + '', + '## Summary', + '', + '附件上传后,首轮 `startRun()` 的 `input` 已包含上传文件引用:', + '', + '```md', + '[File: screenshot.png](/uploaded/path)', + '```', + '', + '但本地保存的用户消息只保留 UI 可见文本。', + '', + '## Fix', + '- Preserve context.', + '```', + ].join('\n'), + }, + }) + + expect(wrapper.findAll('.hljs-code-block')).toHaveLength(1) + expect(wrapper.find('.code-lang').text()).toBe('md') + expect(wrapper.find('code.hljs').text()).toContain('[File: screenshot.png](/uploaded/path)') + expect(wrapper.find('.markdown-body').findAll('h2')).toHaveLength(2) + expect(wrapper.find('.markdown-body').find('h2').text()).toBe('Summary') + expect(wrapper.find('.markdown-body').text()).toContain('但本地保存的用户消息只保留 UI 可见文本。') + expect(wrapper.find('.markdown-body').text()).toContain('Preserve context.') + }) + + it('keeps markdown examples with their own nested fences intact after unwrapping a draft fence', () => { + const wrapper = mount(MarkdownRenderer, { + props: { + content: [ + '```md', + '## Regression Coverage', + '', + '```md', + '下面是一个 PR draft。', + '', + '```md', + '[File: Screenshot.png](/tmp/example.png)', + '```', + '', + '## Fix', + '', + '- 后续 heading 不应被截断。', + '```', + '', + '## Local Verification', + '', + '- localhost renders after the example.', + '```', + ].join('\n'), + }, + }) + + const headings = wrapper.find('.markdown-body').findAll('h2').map(heading => heading.text()) + expect(headings).toEqual(['Regression Coverage', 'Local Verification']) + expect(wrapper.findAll('.hljs-code-block')).toHaveLength(1) + + const codeText = wrapper.find('code.hljs').text() + expect(codeText).toContain('下面是一个 PR draft。') + expect(codeText).toContain('```md\n[File: Screenshot.png](/tmp/example.png)\n```') + expect(codeText).toContain('## Fix') + expect(codeText).toContain('- 后续 heading 不应被截断。') + expect(wrapper.find('.markdown-body').text()).toContain('localhost renders after the example.') + }) + + it('keeps markdown examples with unlabeled nested fences intact', () => { + const wrapper = mount(MarkdownRenderer, { + props: { + content: [ + '```md', + '## Unlabeled Fence Example', + '', + '```md', + '```', + 'plain nested block', + '```', + '```', + '', + 'Done outside.', + '```', + ].join('\n'), + }, + }) + + expect(wrapper.find('.markdown-body').find('h2').text()).toBe('Unlabeled Fence Example') + expect(wrapper.findAll('.hljs-code-block')).toHaveLength(1) + expect(wrapper.find('code.hljs').text()).toContain('```\nplain nested block\n```') + expect(wrapper.find('.markdown-body').text()).toContain('Done outside.') + }) + + it('keeps tilde-fenced markdown examples with nested tilde fences intact', () => { + const wrapper = mount(MarkdownRenderer, { + props: { + content: [ + '```md', + '## Tilde Example', + '', + '~~~md', + '~~~yaml', + 'ok: true', + '~~~', + '~~~', + '', + 'Done outside.', + '```', + ].join('\n'), + }, + }) + + expect(wrapper.find('.markdown-body').find('h2').text()).toBe('Tilde Example') + expect(wrapper.findAll('.hljs-code-block')).toHaveLength(1) + expect(wrapper.find('code.hljs').text()).toContain('~~~yaml\nok: true\n~~~') + expect(wrapper.find('.markdown-body').text()).toContain('Done outside.') + }) + + it('keeps already-valid longer markdown example fences valid', () => { + const wrapper = mount(MarkdownRenderer, { + props: { + content: [ + '```md', + '## Longer Fence Example', + '', + '````md', + '```ts', + 'const answer = 42', + '```', + '````', + '', + 'Done outside.', + '```', + ].join('\n'), + }, + }) + + expect(wrapper.find('.markdown-body').find('h2').text()).toBe('Longer Fence Example') + expect(wrapper.findAll('.hljs-code-block')).toHaveLength(1) + expect(wrapper.find('code.hljs').text()).toContain('```ts\nconst answer = 42\n```') + expect(wrapper.find('.markdown-body').text()).toContain('Done outside.') + }) + it('copies code through the delegated click handler', async () => { const writeText = vi.mocked(navigator.clipboard.writeText) const wrapper = mount(MarkdownRenderer, { diff --git a/tests/client/thinking-parser.test.ts b/tests/client/thinking-parser.test.ts index 1083ce60..e25ab167 100644 --- a/tests/client/thinking-parser.test.ts +++ b/tests/client/thinking-parser.test.ts @@ -92,6 +92,24 @@ describe('parseThinking', () => { expect(r.body).toBe('text\n```\nfake\n```') }) + it('does not leak code-protection placeholders for inline mentions of markdown fences', () => { + const src = [ + 'Previous fix kept the outer ` ```md ` block as code.', + '', + '````md', + '下面是可直接手动编辑的 PR draft。', + '```md', + '标题', + '```', + '````', + ].join('\n') + const r = parseThinking(src, { streaming: false }) + expect(r.hasThinking).toBe(false) + expect(r.body).toBe(src) + expect(r.body).not.toContain('THKCODE') + expect(r.body).not.toContain('\u0000') + }) + it('same-name nesting: inner tag absorbed into first segment (documented limitation)', () => { const r = parseThinking('abc', { streaming: false }) expect(r.segments).toEqual(['ab'])