fix: render backslash LaTeX delimiters in chat

Closes #1847

Co-authored-by: Michaelyklam <Michaelyklam@users.noreply.github.com>
This commit is contained in:
Michaelyklam
2026-05-07 20:43:01 +00:00
parent d8612ba323
commit d44513aabd
2 changed files with 95 additions and 5 deletions
+21 -5
View File
@@ -72,7 +72,20 @@ function _stripWorkspaceDisplayPrefix(text){
}
function _renderUserFencedBlocks(text){
const stash=[];
const mathStash=[];
const stashMath=(type,src)=>{mathStash.push({type,src});return '\x00UM'+(mathStash.length-1)+'\x00';};
const restoreMath=html=>String(html||'').replace(/\x00UM(\d+)\x00/g,(_,i)=>{
const item=mathStash[+i];
if(!item) return '';
if(item.type==='display') return `<div class="katex-block" data-katex="display">${esc(item.src)}</div>`;
return `<span class="katex-inline" data-katex="inline">${esc(item.src)}</span>`;
});
let s=String(text||'');
// Stash math before escaping plain text; display delimiters must run before inline.
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>stashMath('display',m));
s=s.replace(/\\\[([\s\S]+?)\\\]/g,(_,m)=>stashMath('display',m));
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>stashMath('inline',m));
s=s.replace(/\\\((.+?)\\\)/g,(_,m)=>stashMath('inline',m));
// Extract fenced code blocks → stash, replace with null-token placeholder
// CommonMark §4.5 line-anchored fence: the closing run must use at least
// as many backticks as the opener, so inner triple-backtick fences remain content.
@@ -100,8 +113,9 @@ function _renderUserFencedBlocks(text){
});
// Escape remaining plain text and convert newlines to <br>
s=esc(s).replace(/\n/g,'<br>');
// Restore stashed code blocks
// Restore stashed code blocks, then math placeholders as KaTeX targets.
s=s.replace(/\x00UF(\d+)\x00/g,(_,i)=>stash[+i]);
s=restoreMath(s);
return s;
}
function _statusCardHtml(card){
@@ -2076,14 +2090,16 @@ function renderMd(raw){
// Math stash: protect $$..$$ and $..$ from markdown processing
// Runs AFTER fence_stash so backtick code spans protect their dollar-sign contents
const math_stash=[];
// Display math: $$...$$ (must come before inline to avoid mis-parsing)
// Display math: $$...$$ and \[...\] (must come before inline to avoid mis-parsing)
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>{math_stash.push({type:'display',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Match a single literal backslash before the display delimiter (the common LLM form).
s=s.replace(/\\\[([\s\S]+?)\\\]/g,(_,m)=>{math_stash.push({type:'display',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Inline math: $...$ — require non-space at boundaries to avoid false positives
// e.g. "costs $5 and $10" should not trigger (space after opening $)
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>{math_stash.push({type:'inline',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Also stash \(...\) and \[...\] LaTeX delimiters
s=s.replace(/\\\\\((.+?)\\\\\)/g,(_,m)=>{math_stash.push({type:'inline',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
s=s.replace(/\\\\\[(.+?)\\\\\]/gs,(_,m)=>{math_stash.push({type:'display',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Also stash \(...\) LaTeX delimiters.
// Match a single literal backslash before the delimiter (the common LLM form).
s=s.replace(/\\\((.+?)\\\)/g,(_,m)=>{math_stash.push({type:'inline',src:m});return '\x00M'+(math_stash.length-1)+'\x00';});
// Safe tag → markdown equivalent (these produce the same output as **text** etc.)
// Stash raw <pre> blocks so the inline <code> rewrite below does not run
// inside them. Running that rewrite in <pre> content can introduce stray
+74
View File
@@ -10,8 +10,11 @@ Structural tests — no server required. Verify:
- SAFE_TAGS updated to allow <span> (for inline math)
- renderKatexBlocks() is wired into the requestAnimationFrame call
"""
import json
import pathlib
import re
import subprocess
import textwrap
REPO = pathlib.Path(__file__).parent.parent
UI_JS = (REPO / 'static' / 'ui.js').read_text(encoding='utf-8')
@@ -19,6 +22,61 @@ INDEX = (REPO / 'static' / 'index.html').read_text(encoding='utf-8')
CSS = (REPO / 'static' / 'style.css').read_text(encoding='utf-8')
def _extract_function(src: str, name: str) -> str:
marker = f"function {name}("
start = src.index(marker)
brace = src.index("{", start)
depth = 1
pos = brace + 1
while depth and pos < len(src):
ch = src[pos]
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
pos += 1
assert depth == 0, f"could not extract {name}()"
return src[start:pos]
def _run_renderers(markdown: str) -> dict:
js = textwrap.dedent(
r'''
const esc=s=>String(s??'').replace(/[&<>"']/g,c=>({'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
const _IMAGE_EXTS=/\.(png|jpg|jpeg|gif|webp|bmp|ico|avif)$/i;
const _PDF_EXTS=/\.pdf$/i;
const _SVG_EXTS=/\.svg$/i;
const _AUDIO_EXTS=/\.(mp3|ogg|wav|m4a|aac|flac|wma|opus|webm|oga)$/i;
const _VIDEO_EXTS=/\.(mp4|webm|mkv|mov|avi|ogv|m4v)$/i;
function t(k){ return k; }
function _mediaPlayerHtml(){ return ''; }
global.document={baseURI:'http://example.test/'};
'''
)
js += "\n" + _extract_function(UI_JS, "_matchBacktickFenceLine")
js += "\n" + _extract_function(UI_JS, "_isBacktickFenceClose")
js += "\n" + _extract_function(UI_JS, "_renderUserFencedBlocks")
js += "\n" + _extract_function(UI_JS, "renderMd")
js += textwrap.dedent(
r'''
const input=process.argv[1];
console.log(JSON.stringify({
assistant: renderMd(input),
user: _renderUserFencedBlocks(input),
}));
'''
)
proc = subprocess.run(
["node", "-e", js, markdown],
cwd=REPO,
text=True,
capture_output=True,
timeout=30,
check=True,
)
return json.loads(proc.stdout)
# ── renderMd pipeline ──────────────────────────────────────────────────────────
def test_display_math_stash_present():
@@ -41,6 +99,22 @@ def test_katex_block_placeholder_emitted():
'.katex-block placeholder div not emitted by renderMd restore pass'
def test_backslash_latex_delimiters_render_to_katex_placeholders():
"""Common LLM LaTeX delimiters \\[...\\] and \\(...\\) render in assistant and user bubbles."""
sample = """\\[
\\text{SoundPower}(f)=10\\log_{10}(x)
\\]
where \\(L_i(f)\\) = SPL at angle \\(i\\)."""
rendered = _run_renderers(sample)
for role in ("assistant", "user"):
html = rendered[role]
assert 'class="katex-block" data-katex="display"' in html, html
assert 'class="katex-inline" data-katex="inline"' in html, html
assert "\\[" not in html and "\\]" not in html, html
assert "\\(" not in html and "\\)" not in html, html
def test_katex_inline_placeholder_emitted():
"""renderMd restore pass must emit .katex-inline spans for inline math."""
assert 'katex-inline' in UI_JS, \