Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions src/diff.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use similar::{Algorithm, ChangeTag, DiffOp, TextDiff};

use crate::{Block, DiffResult, DiffSpan, SpanTag};
use crate::{Block, DiffResult, DiffSpan, SpanTag, TypstLabel};

/// Diff two sequences of blocks, returning a list of diff results.
///
Expand Down Expand Up @@ -136,8 +136,9 @@ fn process_replace(old_range: &[Block], new_range: &[Block], results: &mut Vec<D
/// everything else (CJK characters, punctuation, whitespace) becomes an
/// individual single-character token.
///
/// Typst code expressions (`#func[...]`, `#func(...)`) and references (`@label`)
/// are treated as atomic tokens so the diff never fragments valid Typst syntax.
/// Typst code expressions (`#func[...]`, `#func(...)`), references (`@label`),
/// and labels (`<label>`) are treated as atomic tokens so the diff never
/// fragments valid Typst syntax.
///
/// This gives word-level granularity for Latin text while allowing
/// character-level precision for CJK text (which has no whitespace boundaries).
Expand Down Expand Up @@ -192,6 +193,14 @@ fn tokenize_mixed(s: &str) -> Vec<&str> {
}
}
tokens.push(&s[start..i]);
} else if c == '<' {
let start = i;
if let Some(end) = TypstLabel::end(&s[start..]) {
i += end;
} else {
i += c_len;
}
tokens.push(&s[start..i]);
} else if c.is_ascii_alphanumeric() {
let start = i;
i += c_len;
Expand Down Expand Up @@ -528,6 +537,12 @@ mod tests {
);
}

#[test]
fn test_tokenize_typst_label_atomic() {
let tokens = tokenize_mixed("<sample-widget_anchor>");
assert_eq!(tokens, vec!["<sample-widget_anchor>"]);
}

#[test]
fn test_tokenize_cjk_char_level() {
// CJK characters should be tokenized individually (no whitespace boundaries).
Expand Down
22 changes: 22 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@ pub mod diff;
pub mod parse;
pub mod render;

pub(crate) struct TypstLabel;

impl TypstLabel {
pub(crate) fn end(text: &str) -> Option<usize> {
if !text.starts_with('<') {
return None;
}

let end = text[1..].find('>')?;
let id = &text[1..(1 + end)];
typst_syntax::is_valid_label_literal_id(id).then_some(end + 2)
}

pub(crate) fn is_only(text: &str) -> bool {
let trimmed = text.trim();
Self::end(trimmed).is_some_and(|end| end == trimmed.len())
}
}

/// A block-level element extracted from a Typst document.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Block {
Expand Down Expand Up @@ -90,6 +109,9 @@ impl Block {
/// Returns true if this block should be treated as atomic (no word-level diff).
pub fn is_atomic(&self) -> bool {
matches!(
self,
Block::Paragraph { source_text } if TypstLabel::is_only(source_text)
) || matches!(
self,
Block::RawBlock { .. }
| Block::Equation { .. }
Expand Down
18 changes: 18 additions & 0 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ pub fn parse(source: &str) -> Vec<Block> {
content: node_text(&expr),
});
}
Expr::Label(_) if paragraph_buf.trim().is_empty() => {
flush_paragraph(&mut paragraph_buf, &mut blocks);
blocks.push(Block::Paragraph {
source_text: node_text(&expr),
});
}

// ---- inline elements (accumulate into paragraph) ----
Expr::Text(_)
Expand Down Expand Up @@ -223,4 +229,16 @@ mod tests {
}
}
}

#[test]
fn test_parse_label_at_paragraph_start_as_own_block() {
let blocks = parse("= Sample\n\n<sample-anchor>\nAlpha beta.\n");
assert!(matches!(&blocks[0], Block::Heading { .. }));
assert!(blocks.iter().any(
|b| matches!(b, Block::Paragraph { source_text } if source_text == "<sample-anchor>")
));
assert!(blocks.iter().any(
|b| matches!(b, Block::Paragraph { source_text } if source_text == "Alpha beta.")
));
}
}
87 changes: 81 additions & 6 deletions src/render.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::fmt::Write;

use crate::{Block, BlockKind, DiffResult, DiffSpan, SpanTag};
use crate::{Block, BlockKind, DiffResult, DiffSpan, SpanTag, TypstLabel};

const PREAMBLE: &str = r##"#let diff-added(body) = {
set text(fill: rgb("#0000ff"))
Expand Down Expand Up @@ -92,8 +92,7 @@ fn render_block(block: &Block, out: &mut String) {

/// Returns true if the text consists only of a Typst label `<...>`.
fn is_label_only(text: &str) -> bool {
let t = text.trim();
t.starts_with('<') && t.ends_with('>') && !t[1..t.len() - 1].contains('<')
TypstLabel::is_only(text)
}

/// Render a block wrapped in #diff-added[...] or #diff-deleted[...].
Expand Down Expand Up @@ -199,10 +198,20 @@ fn render_spans(spans: &[DiffSpan], out: &mut String) {
prev_was_diff = false;
}
SpanTag::Deleted => {
if is_label_only(&span.text) {
// Nothing is emitted, so keep `prev_was_diff` tied to the
// last actual output.
continue;
}
write!(out, "#diff-deleted[{}]", escape_content(&span.text, true)).unwrap();
prev_was_diff = true;
}
SpanTag::Inserted => {
if is_label_only(&span.text) {
out.push_str(&span.text);
prev_was_diff = false;
continue;
}
write!(out, "#diff-added[{}]", escape_content(&span.text, false)).unwrap();
prev_was_diff = true;
}
Expand Down Expand Up @@ -235,11 +244,12 @@ fn write_enum_prefix(number: Option<usize>, out: &mut String) {
/// - When `escape_refs` is true, `@` is escaped as `\@` and `<` is escaped
/// as `\<` to suppress reference resolution and label creation (used for
/// deleted content where the referenced label may no longer exist or would
/// create duplicates).
/// create duplicates). Label literals inside `#ref(<label>, ...)` are left
/// unchanged because they are code arguments rather than content labels.
fn escape_content(s: &str, escape_refs: bool) -> String {
let mut result = String::with_capacity(s.len());
let mut depth: i32 = 0;
for ch in s.chars() {
for (i, ch) in s.char_indices() {
match ch {
'[' => {
depth += 1;
Expand All @@ -258,7 +268,9 @@ fn escape_content(s: &str, escape_refs: bool) -> String {
result.push('\\');
result.push('@');
}
'<' if escape_refs => {
// Bare deleted labels are escaped so they do not create anchors.
// In `#ref(<label>, ...)`, though, the label is code and must stay bare.
'<' if escape_refs && !is_ref_label_arg(s, i) => {
result.push('\\');
result.push('<');
}
Expand All @@ -275,6 +287,17 @@ fn escape_content(s: &str, escape_refs: bool) -> String {
result
}

/// True when `label_start` points at the `<` in the first argument to `#ref(...)`.
fn is_ref_label_arg(s: &str, label_start: usize) -> bool {
// First require a real Typst label literal. Then look left: the first
// argument is preceded by `(`, and the call before that must be `#ref`.
TypstLabel::end(&s[label_start..]).is_some()
&& s[..label_start]
.trim_end()
.strip_suffix('(')
.is_some_and(|prefix| prefix.trim_end().ends_with("#ref"))
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -319,6 +342,14 @@ mod tests {
assert_eq!(escape_content("text <my-label>", true), "text \\<my-label>");
}

#[test]
fn test_escape_content_ref_call_label_not_escaped_when_requested() {
assert_eq!(
escape_content("compare #ref(<tbl-cobalt-notes>, supplement: [])", true),
"compare #ref(<tbl-cobalt-notes>, supplement: [])"
);
}

#[test]
fn test_is_label_only() {
assert!(is_label_only("<my-label>"));
Expand Down Expand Up @@ -397,4 +428,48 @@ mod tests {
let output = render(&results);
assert!(output.contains("Hello #diff-deleted[world]#diff-added[there]"));
}

#[test]
fn test_render_modified_label_bare() {
let results = vec![DiffResult::Modified {
kind: BlockKind::Paragraph,
spans: vec![
DiffSpan {
tag: SpanTag::Deleted,
text: "<old-label>".into(),
},
DiffSpan {
tag: SpanTag::Inserted,
text: "<new-label>".into(),
},
],
}];
let output = render(&results);
assert!(output.contains("<new-label>"));
assert!(!output.contains("#diff-added[<new-label>]"));
assert!(!output.contains("#diff-deleted[\\<old-label>]"));
}

#[test]
fn test_render_skipped_deleted_label_keeps_diff_call_guard() {
let results = vec![DiffResult::Modified {
kind: BlockKind::Paragraph,
spans: vec![
DiffSpan {
tag: SpanTag::Deleted,
text: "old".into(),
},
DiffSpan {
tag: SpanTag::Deleted,
text: "<old-label>".into(),
},
DiffSpan {
tag: SpanTag::Equal,
text: "(next)".into(),
},
],
}];
let output = render(&results);
assert!(output.contains("#diff-deleted[old]\u{200B}(next)"));
}
}
62 changes: 62 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,65 @@ fn test_markup_reference_paragraph() {
);
assert!(output.contains("#[@foo]のような"));
}

#[test]
fn test_renamed_label_is_not_diffed_inside_label_syntax() {
let old = "= Sample\n\n<sample-widget-anchor>\n\nBody.\n";
let new = "= Sample\n\n<sample-widget_anchor>\n\nBody.\n";
let output = run_diff(old, new);

assert!(output.contains("<sample-widget_anchor>"));
assert!(!output.contains("<#diff-added"));
assert!(!output.contains("#diff-added[_]"));
assert!(!output.contains("#diff-deleted[-]"));
}

#[test]
fn test_unchanged_label_stays_outside_added_paragraph() {
let old = r#"#set heading(numbering: "1.")

See #ref(<sample-anchor>, supplement: [Section]).

= Sample

<sample-anchor>
Alpha beta gamma delta epsilon zeta eta theta.
"#;
let new = r#"#set heading(numbering: "1.")

See #ref(<sample-anchor>, supplement: [Section]).

= Sample

<sample-anchor>
Inserted paragraph before old text.

Alpha beta gamma delta changed epsilon zeta eta theta.
"#;
let output = run_diff(old, new);

assert!(output.contains("<sample-anchor>"));
assert!(output.contains("#diff-added[Inserted paragraph before old text.]"));
assert!(output.contains("Alpha beta gamma delta #diff-added[changed ]epsilon"));
assert!(!output.contains("#diff-added[<sample-anchor>"));
assert!(!output.contains("#diff-deleted[\\<sample-anchor>"));
}

#[test]
fn test_replaced_footnote_ref_label_is_not_escaped_on_deleted_side() {
let old = r#"The bridge inspection memo kept its summary sentence for editors#footnote[
Earlier notes pointed reviewers to #ref(<sec-bridge-ledger>, supplement: [])
while the field log was being reconciled.
]. The closing sentence stays fixed so the paragraph can align.
"#;
let new = r#"The bridge inspection memo kept its summary sentence for editors#footnote[
Current notes point reviewers to #ref(<sec-bridge-ledger>, supplement: [])
after the field log was reconciled.
]. The closing sentence stays fixed so the paragraph can align.
"#;
let output = run_diff(old, new);

assert!(output.contains("#diff-deleted[#footnote["));
assert!(output.contains("#ref(<sec-bridge-ledger>, supplement: [])"));
assert!(!output.contains("#ref(\\<sec-bridge-ledger>"));
}