diff --git a/.gitignore b/.gitignore index 78ac9ee..925b013 100644 --- a/.gitignore +++ b/.gitignore @@ -84,4 +84,7 @@ Backup of *.doc* *.log *.lock -test* \ No newline at end of file +test* +!tests/ +!tests/** +!crates/soapberry-zip/assets/test.zip \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 82ee752..f42eeb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,150 +1,86 @@ -[package] -name = "litchi" -version = "0.0.1" -edition = "2024" -description = "High-performance parser for Microsoft Office, OpenDocument, and Apple iWork file formats with unified API" -authors = ["Ryker Zhu "] -license = "Apache-2.0" -repository = "https://github.com/DevExzh/litchi" -documentation = "https://docs.rs/litchi" -readme = "README.md" -keywords = ["office", "docx", "xlsx", "pptx", "parser"] -categories = ["parser-implementations", "encoding", "text-processing"] -exclude = [ - # Reference materials and third-party projects - "3rdparty/*", - # Examples - "examples/*", - # Test files - "test.*", - "*.doc", - "*.docx", - "*.xls", - "*.xlsx", - "*.xlsb", - "*.ppt", - "*.pptx", - "*.odt", - "*.ods", - "*.odp", - "*.pages", - "*.numbers", - "*.key", - "*.csv", - "*.txt", - # Build artifacts - "target/*", - # Media files (logo kept for crates.io display) - # "media/*", # Uncomment if logo shouldn't be in package - # CI/CD and development files - ".github/*", - ".git/*", - ".gitignore", -] +[workspace] +resolver = "3" +members = ["crates/*"] -[features] -default = ["ole", "ooxml", "ooxml_encryption", "eval_engine"] -full = [ - "iwa", - "odf", - "ole", - "ooxml", - "ooxml_encryption", - "rtf", - "formula", - "imgconv", - "eval_engine", - "fonts", -] -# Format support features -iwa = [ - "dep:snap", - "dep:plist", - "dep:prost", - "dep:prost-types", - "dep:soapberry-zip", - "dep:prost-build", -] -odf = ["dep:soapberry-zip", "dep:quick-xml"] -ole = ["dep:encoding_rs", "dep:bumpalo"] -ooxml = ["dep:soapberry-zip", "dep:quick-xml", "dep:encoding_rs"] -ooxml_encryption = ["ooxml", "ole", "dep:aes", "dep:cbc", "dep:hmac", "dep:sha1"] -rtf = ["dep:bumpalo", "dep:crc-fast", "dep:encoding_rs"] -# Additional functionality features -formula = ["dep:rowan", "dep:bumpalo", "dep:quick-xml"] -imgconv = ["dep:image"] -fonts = ["dep:allsorts", "dep:font-kit"] -eval_engine = ["dep:statrs", "dep:num-complex"] -eval_engine_web_functions = [ - "eval_engine", - "dep:urlencoding", - "dep:reqwest", - "dep:sxd-document", - "dep:sxd-xpath", -] +[workspace.package] +version = "0.0.1" +edition = "2024" +authors = ["Ryker Zhu "] +license = "Apache-2.0" +repository = "https://github.com/DevExzh/litchi" +rust-version = "1.85" -[dependencies] -aes = { version = "0.8", optional = true } # AES block cipher for OOXML file encryption -aho-corasick = "1.1" # Fast string searching for fast byte pattern matching -allsorts = { version = "0.16", optional = true } # Font subsetting and layout -atoi_simd = "0.18" # SIMD-optimized conversion of byte slices to integers -base64 = "0.22" # Base64 encoding/decoding for embedded binary data in XML -bitflags = { version = "2.10", features = ["std", "serde"] } # Bit flags for efficient flag combinations -bumpalo = { version = "3", features = ["collections"], optional = true } # Fast bump allocator for temporary formula parsing data -bytes = { version = "1", features = ["serde"] } # Efficient byte buffer operations with zero-copy slicing -cbc = { version = "0.1", features = ["alloc"], optional = true } # CBC mode for AES-based OOXML encryption -chrono = { version = "0", features = ["serde"] } # Date and time types for Office timestamps -crc-fast = { version = "1.8", features = ["optimize_crc32_auto"], optional = true } # Fast CRC32 implementation for verifying file integrity -encoding_rs = { version = "0.8", optional = true } # Used by both OLE (.doc, .xls, .ppt) and OOXML (.xlsb) formats for UTF-16LE decoding -fast-float2 = "0.2" # Fast string-to-float conversion with correct rounding -fixedbitset = "0.5" # Fixed-size bitset for efficient visited tracking with better cache locality -flate2 = { version = "1", features = ["zlib-rs"], default-features = false } # Compression/decompression for Office file formats -font-kit = { version = "0.14", optional = true } # Font discovery and loading -hmac = { version = "0.13", optional = true } # HMAC for Agile OOXML encryption integrity -image = { version = "0.25", features = ["default-formats", "rayon"], optional = true } # Image format decoding/encoding for embedded media -itoa = "1.0" # Fast integer-to-string conversion -memchr = "2.7" # SIMD-accelerated string searching for fast byte pattern matching -num-complex = { version = "0.4", optional = true } # Complex number arithmetic for engineering functions -once_cell = "1" # Lazy statics and one-time initialization for global state -parking_lot = { version = "0.12", features = ["hardware-lock-elision"] } # Lock-free synchronization primitives for concurrent programming -phf = { version = "0.13", features = ["macros"] } # Perfect hash functions for compile-time static maps -plist = { version = "1", optional = true } # Property list parser for iWork metadata and indexes -prost = { version = "0.14", features = ["derive"], optional = true } # Protocol Buffers implementation for iWork archive structures -prost-types = { version = "0.14", optional = true } # Well-known Protocol Buffer types used by iWork -quick-xml = { version = "0.39", optional = true } # High-performance XML parser for .docx, .xlsx, .pptx files -rand = "0.10" # Cryptographically secure random numbers for salts in protection hashes -rayon = "1.11" # Data parallelism library for parallel iterators and work-stealing -reqwest = { version = "0.13", features = ["json"], optional = true } -roaring = "0" # Compressed bitmap data structure for efficient set operations -rowan = { version = "0.16", optional = true } # Lossless syntax tree library for formula parsing (AST) -ryu = "1.0" # Fast float-to-string conversion with minimal allocations -serde = { version = "1", features = ["derive"] } # Serialization/deserialization framework -serde-saphyr = "0" # YAML serialization support -sha1 = { version = "0.11", optional = true } # SHA-1 hashing for Standard 2007 and Agile OOXML encryption -sha2 = "0.11" # SHA-512 hashing for OOXML password protection -smallvec = "1.15" # Stack-allocated vectors for small collections to avoid heap allocations -snap = { version = "1", optional = true } # Snappy compression used in iWork file formats (.pages, .numbers, .key) -soapberry-zip = { path = "soapberry-zip", optional = true } # High-performance ZIP archive handling for modern Office formats -statrs = { version = "0.18", optional = true } # Statistics library for statistical functions -sxd-document = { version = "0.3", optional = true } -sxd-xpath = { version = "0.4", optional = true } -thiserror = "2.0" # Convenient derive macros for error types -tokio = { version = "1", features = ["full"] } -urlencoding = { version = "2.1", optional = true } -xml-minifier = { path = "xml-minifier" } -zerocopy = { version = "0.8", features = ["std"] } # Safe zero-cost type conversions between bytes and structured data -zerocopy-derive = "0.8" # Derive macros for zerocopy traits +[workspace.dependencies] +aes = "0.8" +aho-corasick = "1.1" +allsorts = "0.16" +atoi_simd = "0.18" +base64 = "0.22" +bitflags = { version = "2.10", features = ["std", "serde"] } +bumpalo = { version = "3", features = ["collections"] } +bytes = { version = "1", features = ["serde"] } +cbc = { version = "0.1", features = ["alloc"] } +chrono = { version = "0", features = ["serde"] } +crc-fast = { version = "1.8", features = ["optimize_crc32_auto"] } +encoding_rs = "0.8" +fast-float2 = "0.2" +fixedbitset = "0.5" +flate2 = { version = "1", features = ["zlib-rs"], default-features = false } +font-kit = "0.14" +hmac = "0.13" +image = { version = "0.25", features = ["default-formats", "rayon"] } +itoa = "1.0" +litchi = { version = "0.0.1", path = "crates/litchi" } +litchi-cfb = { version = "0.0.1", path = "crates/litchi-cfb" } +# Path deps to internal crates. +litchi-core = { version = "0.0.1", path = "crates/litchi-core" } +litchi-eval = { version = "0.0.1", path = "crates/litchi-eval" } +litchi-fonts = { version = "0.0.1", path = "crates/litchi-fonts" } +litchi-formula = { version = "0.0.1", path = "crates/litchi-formula" } +litchi-imgconv = { version = "0.0.1", path = "crates/litchi-imgconv" } +litchi-iwa = { version = "0.0.1", path = "crates/litchi-iwa" } +litchi-markdown = { version = "0.0.1", path = "crates/litchi-markdown" } +litchi-odf = { version = "0.0.1", path = "crates/litchi-odf" } +litchi-ole = { version = "0.0.1", path = "crates/litchi-ole" } +litchi-ooxml = { version = "0.0.1", path = "crates/litchi-ooxml" } +litchi-opc = { version = "0.0.1", path = "crates/litchi-opc" } +litchi-rtf = { version = "0.0.1", path = "crates/litchi-rtf" } +memchr = "2.7" +num-complex = "0.4" +once_cell = "1" +parking_lot = { version = "0.12", features = ["hardware-lock-elision"] } +phf = { version = "0.13", features = ["macros"] } +plist = "1" +prost = { version = "0.14", features = ["derive"] } +prost-build = "0.14" +prost-types = "0.14" +quick-xml = "0.39" +rand = "0.10" +rayon = "1.11" +reqwest = { version = "0.13", features = ["json"] } +roaring = "0" +rowan = "0.16" +ryu = "1.0" +serde = { version = "1", features = ["derive"] } +serde-saphyr = "0" +sha1 = "0.11" +sha2 = "0.11" +smallvec = "1.15" +snap = "1" +soapberry-zip = { path = "crates/soapberry-zip" } +statrs = "0.18" +sxd-document = "0.3" +sxd-xpath = "0.4" -[build-dependencies] -# Build-time Protocol Buffer compiler for generating iWork message types -# Only needed when iwa feature is enabled -prost-build = { version = "0.14", optional = true } - -[dev-dependencies] -clap = { version = "4", features = ["derive"] } -proptest = "1.5" -tempfile = "3" -zip = { version = "8", default-features = false, features = ["deflate"] } +# External deps used by ≥2 crates — declared once, inherited by members. +# VERSIONS PRESERVED VERBATIM FROM CURRENT root Cargo.toml. +tempfile = "3" +thiserror = "2.0" +tokio = { version = "1", features = ["full"] } +urlencoding = "2.1" +xml-minifier = { path = "crates/xml-minifier" } +zerocopy = { version = "0.8", features = ["std"] } +zerocopy-derive = "0.8" [profile.release] lto = true diff --git a/build.rs b/build.rs deleted file mode 100644 index d16fb3f..0000000 --- a/build.rs +++ /dev/null @@ -1,44 +0,0 @@ -fn main() -> std::io::Result<()> { - // Only compile protobuf files if the iwa feature is enabled - #[cfg(feature = "iwa")] - { - println!("cargo:rerun-if-changed=src/iwa/protos/"); - - // Configure prost-build - let mut config = prost_build::Config::new(); - - // Collect all .proto files from the protos directory for complete compilation - let all_proto_files = std::fs::read_dir("src/iwa/protos") - .expect("Failed to read protos directory") - .filter_map(|entry| { - let entry = entry.ok()?; - let path = entry.path(); - if path.extension()?.to_str()? == "proto" { - Some(path.to_string_lossy().to_string()) - } else { - None - } - }) - .collect::>(); - - println!( - "Compiling all {} protobuf files together for proper dependency resolution", - all_proto_files.len() - ); - - // Compile all protobuf files - will fail the build if any errors occur - match config - .enable_type_names() - .include_file("iwa_protos.rs") - .compile_protos(&all_proto_files, &["src/iwa/protos"]) - { - Ok(_) => println!("Successfully compiled all protobuf files"), - Err(e) => { - eprintln!("Failed to compile protobuf files: {}\n", e); - panic!("Protobuf compilation failed - check for syntax errors in .proto files"); - }, - } - } - - Ok(()) -} diff --git a/crates/litchi-cfb/Cargo.toml b/crates/litchi-cfb/Cargo.toml new file mode 100644 index 0000000..042a4f8 --- /dev/null +++ b/crates/litchi-cfb/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "litchi-cfb" +description = "Microsoft Compound File Binary (CFB / OLE2) container parser and writer for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[features] +default = [] +# Writing/mutation support. Without this feature only reading is available. +write = [] + +[dependencies] +chrono = { workspace = true } +fixedbitset = { workspace = true } +litchi-core = { workspace = true, features = ["ole"] } +smallvec = { workspace = true } +zerocopy = { workspace = true } +zerocopy-derive = { workspace = true } diff --git a/crates/litchi-cfb/README.md b/crates/litchi-cfb/README.md new file mode 100644 index 0000000..5100f71 --- /dev/null +++ b/crates/litchi-cfb/README.md @@ -0,0 +1,39 @@ +# litchi-cfb + +Parser and writer for the Microsoft Compound File Binary (CFB / OLE2) container format. + +## Overview + +CFB is the storage substrate underneath the legacy Microsoft Office binary +documents (`.doc`, `.xls`, `.ppt`) and is also used to wrap encrypted OOXML +packages. This crate implements `[MS-CFB]` reading and writing, exposes the +directory tree, stream I/O, and standard property-set metadata, and is consumed +by `litchi-ole` and the `crypto` feature of `litchi-ooxml`. + +## Usage + +```toml +[dependencies] +litchi-cfb = "0.0.1" +``` + +```rust +use litchi_cfb::OleFile; +use std::fs::File; + +let file = File::open("example.doc")?; +let mut ole = OleFile::open(file)?; +let word_doc = ole.open_stream(&["WordDocument"])?; +# Ok::<(), litchi_cfb::OleError>(()) +``` + +## Features + +- Zero-copy parsing of CFB headers, FAT/MiniFAT, and directory entries +- Stream extraction by path through the storage tree +- Standard property-set metadata (`SummaryInformation`, `DocumentSummaryInformation`) +- Optional `write` feature for authoring new CFB containers via `OleWriter` + +## License + +Licensed under the Apache License, Version 2.0. Part of the [Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-cfb/examples/inspect_ole.rs b/crates/litchi-cfb/examples/inspect_ole.rs new file mode 100644 index 0000000..47e5fdb --- /dev/null +++ b/crates/litchi-cfb/examples/inspect_ole.rs @@ -0,0 +1,188 @@ +//! Inspect a Microsoft Compound File Binary (CFB / OLE2) container. +//! +//! This example verifies the file signature with [`is_ole_file`], opens it +//! with [`OleFile::open`], walks the directory tree printing each entry's +//! name, type, and size, and prints summary properties from +//! [`OleMetadata`] when available. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-cfb --example inspect_ole -- +//! ``` +//! +//! If no path is supplied, defaults to a sample `.doc` file from the +//! workspace's `test-data/ole/doc/` directory. +//! +//! [`is_ole_file`]: litchi_cfb::is_ole_file +//! [`OleFile::open`]: litchi_cfb::OleFile::open +//! [`OleMetadata`]: litchi_cfb::OleMetadata + +use std::fs::File; +use std::io::Read; +use std::path::PathBuf; + +use litchi_cfb::{DirectoryEntry, OleFile, is_ole_file}; + +type ExampleResult = Result>; + +const DEFAULT_SAMPLE: &str = "test-data/ole/doc/Lists.doc"; + +fn main() -> ExampleResult<()> { + let path: PathBuf = std::env::args() + .nth(1) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from(DEFAULT_SAMPLE)); + + println!("=== litchi-cfb: inspect_ole ==="); + println!("Target file: {}", path.display()); + + // 1. Read a prefix of the file and verify the CFB signature. + // `is_ole_file` requires at least MINIMAL_OLEFILE_SIZE (1536) bytes, + // so read a generously-sized prefix rather than just the magic bytes. + let mut probe_buf = vec![0u8; 4096]; + let n = { + let mut probe = File::open(&path)?; + probe.read(&mut probe_buf)? + }; + probe_buf.truncate(n); + if !is_ole_file(&probe_buf) { + return Err(format!("Not a CFB/OLE2 file (magic mismatch): {}", path.display()).into()); + } + println!("Signature OK: D0 CF 11 E0 A1 B1 1A E1"); + + // 2. Open the CFB container. + let file = File::open(&path)?; + let mut ole = OleFile::open(file)?; + println!("File size: {} bytes", ole.file_size()); + if let Some(name) = ole.get_root_name() { + println!("Root entry: {name}"); + } + + // 3. Walk every directory level and print each entry. + println!("\n--- Directory tree ---"); + print_dir(&ole, &[], 0)?; + + // 4. Stream paths (flattened). + let streams = ole.list_streams(); + println!("\n--- Streams ({}) ---", streams.len()); + for path in &streams { + println!(" /{}", path.join("/")); + } + + // 5. Metadata, if SummaryInformation is present. + println!("\n--- Metadata ---"); + match ole.get_metadata() { + Ok(meta) => print_metadata(&meta), + Err(e) => println!(" (unable to read metadata: {e})"), + } + + Ok(()) +} + +/// Recursively print directory entries. `path` is the slice of names that +/// addresses the current storage relative to the root. +fn print_dir( + ole: &OleFile, + path: &[&str], + depth: usize, +) -> ExampleResult<()> { + let entries = ole.list_directory_entries(path)?; + for entry in entries { + let indent = " ".repeat(depth); + let kind = describe_type(entry.entry_type); + println!( + "{indent}- {name:<32} [{kind}] size={size}", + name = entry.name, + kind = kind, + size = entry.size, + ); + + // Recurse into nested storages. Build a new path slice with the + // child name appended. + if is_storage(entry) { + let mut next: Vec<&str> = path.to_vec(); + next.push(&entry.name); + print_dir(ole, &next, depth + 1)?; + } + } + Ok(()) +} + +fn is_storage(entry: &DirectoryEntry) -> bool { + // STGTY_STORAGE = 1, STGTY_ROOT = 5 + entry.entry_type == 1 || entry.entry_type == 5 +} + +fn describe_type(t: u8) -> &'static str { + match t { + 0 => "empty", + 1 => "storage", + 2 => "stream", + 3 => "lockbytes", + 4 => "property", + 5 => "root", + _ => "unknown", + } +} + +fn print_metadata(meta: &litchi_cfb::OleMetadata) { + let mut printed = false; + let mut row = |label: &str, value: Option<&str>| { + if let Some(v) = value + && !v.is_empty() + { + println!(" {label:<22} {v}"); + printed = true; + } + }; + + row("Title:", meta.title.as_deref()); + row("Subject:", meta.subject.as_deref()); + row("Author:", meta.author.as_deref()); + row("Keywords:", meta.keywords.as_deref()); + row("Comments:", meta.comments.as_deref()); + row("Template:", meta.template.as_deref()); + row("Last saved by:", meta.last_saved_by.as_deref()); + row("Revision:", meta.revision_number.as_deref()); + row("Application:", meta.creating_application.as_deref()); + row("Category:", meta.category.as_deref()); + row("Manager:", meta.manager.as_deref()); + row("Company:", meta.company.as_deref()); + + if let Some(t) = meta.create_time { + println!(" {:<22} {t}", "Created:"); + printed = true; + } + if let Some(t) = meta.last_saved_time { + println!(" {:<22} {t}", "Last saved:"); + printed = true; + } + if let Some(t) = meta.last_printed_time { + println!(" {:<22} {t}", "Last printed:"); + printed = true; + } + if let Some(d) = meta.edit_time { + println!(" {:<22} {d}", "Edit time:"); + printed = true; + } + if let Some(p) = meta.num_pages { + println!(" {:<22} {p}", "Pages:"); + printed = true; + } + if let Some(w) = meta.num_words { + println!(" {:<22} {w}", "Words:"); + printed = true; + } + if let Some(c) = meta.num_chars { + println!(" {:<22} {c}", "Characters:"); + printed = true; + } + if let Some(cp) = meta.codepage { + println!(" {:<22} {cp}", "Codepage:"); + printed = true; + } + + if !printed { + println!(" (no SummaryInformation properties)"); + } +} diff --git a/crates/litchi-cfb/examples/write_ole.rs b/crates/litchi-cfb/examples/write_ole.rs new file mode 100644 index 0000000..a7ef192 --- /dev/null +++ b/crates/litchi-cfb/examples/write_ole.rs @@ -0,0 +1,82 @@ +//! Write a small CFB / OLE2 container with a single user stream. +//! +//! This example uses [`OleWriter`] to construct a minimal CFB file in +//! memory, save it to a temporary file, and then re-open it with +//! [`OleFile`] to round-trip-verify the contents. +//! +//! Gated on the `write` feature. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-cfb --features write --example write_ole [-- ] +//! ``` +//! +//! If no output path is supplied the file is written to the system temp +//! directory. +//! +//! [`OleWriter`]: litchi_cfb::OleWriter +//! [`OleFile`]: litchi_cfb::OleFile + +#[cfg(feature = "write")] +fn main() -> Result<(), Box> { + use std::fs::File; + use std::path::PathBuf; + + use litchi_cfb::{OleFile, OleWriter, is_ole_file}; + + let out_path: PathBuf = std::env::args() + .nth(1) + .map(PathBuf::from) + .unwrap_or_else(|| std::env::temp_dir().join("litchi-cfb-demo.ole")); + + println!("=== litchi-cfb: write_ole ==="); + println!("Output file: {}", out_path.display()); + + // Build a tiny container: one top-level storage with one stream inside, + // plus a top-level stream alongside it. + let mut writer = OleWriter::new(); + writer.create_stream(&["Greeting"], b"Hello from litchi-cfb!\n")?; + writer.create_storage(&["Demo"])?; + writer.create_stream( + &["Demo", "Payload"], + b"Nested stream contents written by the example.", + )?; + + writer.save(&out_path)?; + println!( + "Wrote CFB file: {} bytes", + std::fs::metadata(&out_path)?.len() + ); + + // Round-trip verify: re-open and list. + // `is_ole_file` checks both the magic *and* a minimum size, so read a + // sufficiently large prefix (>= 1536 bytes) rather than just 8 bytes. + let mut head = vec![0u8; 4096]; + let n = { + use std::io::Read; + let mut f = File::open(&out_path)?; + f.read(&mut head)? + }; + head.truncate(n); + assert!(is_ole_file(&head), "output is not a valid CFB file"); + + let mut ole = OleFile::open(File::open(&out_path)?)?; + println!("\nStreams in re-opened file:"); + for path in ole.list_streams() { + let refs: Vec<&str> = path.iter().map(String::as_str).collect(); + let data = ole.open_stream(&refs)?; + println!(" /{} ({} bytes)", path.join("/"), data.len()); + } + + println!("\nDone."); + Ok(()) +} + +#[cfg(not(feature = "write"))] +fn main() { + eprintln!( + "This example requires the `write` feature. Re-run with:\n \ + cargo run -p litchi-cfb --features write --example write_ole" + ); + std::process::exit(1); +} diff --git a/crates/litchi-cfb/fuzz/.gitignore b/crates/litchi-cfb/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/crates/litchi-cfb/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/litchi-cfb/fuzz/Cargo.toml b/crates/litchi-cfb/fuzz/Cargo.toml new file mode 100644 index 0000000..25d186f --- /dev/null +++ b/crates/litchi-cfb/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "litchi-cfb-fuzz" +version = "0.0.0" +edition = "2024" +publish = false +authors = ["Ryker Zhu "] +license = "Apache-2.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +litchi-cfb = { path = ".." } + +[[bin]] +name = "parse_cfb" +path = "fuzz_targets/parse_cfb.rs" +test = false +doc = false +bench = false + +[profile.release] +debug = 1 +codegen-units = 1 +lto = "thin" + +[workspace] diff --git a/crates/litchi-cfb/fuzz/fuzz_targets/parse_cfb.rs b/crates/litchi-cfb/fuzz/fuzz_targets/parse_cfb.rs new file mode 100644 index 0000000..b18cd79 --- /dev/null +++ b/crates/litchi-cfb/fuzz/fuzz_targets/parse_cfb.rs @@ -0,0 +1,26 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +// Drives raw bytes through litchi-cfb's CFB/OLE2 parser. +// Errors are expected on malformed input; we want to ensure +// the parser does not panic, OOM, or hit UB on arbitrary bytes. +fuzz_target!(|data: &[u8]| { + // Cheap sniff helper; exercises the public is_ole_file path. + let _ = litchi_cfb::is_ole_file(data); + + if let Ok(mut ole) = litchi_cfb::OleFile::open(std::io::Cursor::new(data)) { + // Walk the directory tree to give the fuzzer extra reach. + let streams = ole.list_streams(); + let _ = ole.list_directory_entries(&[]); + let _ = ole.get_root_name(); + + // Attempt to read each enumerated stream. Cap iterations so a + // pathological directory tree can't dominate one fuzz iteration. + for path in streams.into_iter().take(64) { + let refs: Vec<&str> = path.iter().map(String::as_str).collect(); + let _ = ole.exists(&refs); + let _ = ole.open_stream(&refs); + } + } +}); diff --git a/crates/litchi-cfb/src/consts.rs b/crates/litchi-cfb/src/consts.rs new file mode 100644 index 0000000..48d1020 --- /dev/null +++ b/crates/litchi-cfb/src/consts.rs @@ -0,0 +1,92 @@ +/// Magic bytes that should be at the beginning of every OLE file +pub const MAGIC: &[u8; 8] = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"; + +/// Minimal size of an empty OLE file with 512-byte sectors (1536 bytes) +pub const MINIMAL_OLEFILE_SIZE: usize = 1536; + +/// Size of a directory entry in bytes +pub const DIRENTRY_SIZE: usize = 128; + +/// Default sector size for version 3 (512 bytes) +pub const SECTOR_SIZE_V3: usize = 512; + +/// Default sector size for version 4 (4096 bytes) +pub const SECTOR_SIZE_V4: usize = 4096; + +// Sector IDs (from AAF specifications) +/// Maximum regular sector ID +pub const MAXREGSECT: u32 = 0xFFFFFFFA; // -6 +/// Denotes a DIFAT sector in a FAT +pub const DIFSECT: u32 = 0xFFFFFFFC; // -4 +/// Denotes a FAT sector in a FAT +pub const FATSECT: u32 = 0xFFFFFFFD; // -3 +/// End of a virtual stream chain +pub const ENDOFCHAIN: u32 = 0xFFFFFFFE; // -2 +/// Unallocated sector +pub const FREESECT: u32 = 0xFFFFFFFF; // -1 + +// Directory Entry IDs (from AAF specifications) +/// Maximum directory entry ID +pub const MAXREGSID: u32 = 0xFFFFFFFA; // -6 +/// Unallocated directory entry +pub const NOSTREAM: u32 = 0xFFFFFFFF; // -1 + +// Object types in storage (from AAF specifications) +/// Empty directory entry +pub const STGTY_EMPTY: u8 = 0; +/// Element is a storage object +pub const STGTY_STORAGE: u8 = 1; +/// Element is a stream object +pub const STGTY_STREAM: u8 = 2; +/// Element is an ILockBytes object +pub const STGTY_LOCKBYTES: u8 = 3; +/// Element is an IPropertyStorage object +pub const STGTY_PROPERTY: u8 = 4; +/// Element is a root storage +pub const STGTY_ROOT: u8 = 5; + +/// Unknown size for a stream (used when size is not known in advance) +pub const UNKNOWN_SIZE: u32 = 0x7FFFFFFF; + +// Property types +pub const VT_EMPTY: u16 = 0; +pub const VT_NULL: u16 = 1; +pub const VT_I2: u16 = 2; +pub const VT_I4: u16 = 3; +pub const VT_R4: u16 = 4; +pub const VT_R8: u16 = 5; +pub const VT_CY: u16 = 6; +pub const VT_DATE: u16 = 7; +pub const VT_BSTR: u16 = 8; +pub const VT_DISPATCH: u16 = 9; +pub const VT_ERROR: u16 = 10; +pub const VT_BOOL: u16 = 11; +pub const VT_VARIANT: u16 = 12; +pub const VT_UNKNOWN: u16 = 13; +pub const VT_DECIMAL: u16 = 14; +pub const VT_I1: u16 = 16; +pub const VT_UI1: u16 = 17; +pub const VT_UI2: u16 = 18; +pub const VT_UI4: u16 = 19; +pub const VT_I8: u16 = 20; +pub const VT_UI8: u16 = 21; +pub const VT_INT: u16 = 22; +pub const VT_UINT: u16 = 23; +pub const VT_VOID: u16 = 24; +pub const VT_HRESULT: u16 = 25; +pub const VT_PTR: u16 = 26; +pub const VT_SAFEARRAY: u16 = 27; +pub const VT_CARRAY: u16 = 28; +pub const VT_USERDEFINED: u16 = 29; +pub const VT_LPSTR: u16 = 30; +pub const VT_LPWSTR: u16 = 31; +pub const VT_FILETIME: u16 = 64; +pub const VT_BLOB: u16 = 65; +pub const VT_STREAM: u16 = 66; +pub const VT_STORAGE: u16 = 67; +pub const VT_STREAMED_OBJECT: u16 = 68; +pub const VT_STORED_OBJECT: u16 = 69; +pub const VT_BLOB_OBJECT: u16 = 70; +pub const VT_CF: u16 = 71; +pub const VT_CLSID: u16 = 72; +pub const VT_VECTOR: u16 = 0x1000; diff --git a/src/ole/file.rs b/crates/litchi-cfb/src/file.rs similarity index 96% rename from src/ole/file.rs rename to crates/litchi-cfb/src/file.rs index 6940a95..a6f87bd 100644 --- a/src/ole/file.rs +++ b/crates/litchi-cfb/src/file.rs @@ -113,8 +113,8 @@ impl From for OleError { } } -impl From for OleError { - fn from(err: crate::common::binary::BinaryError) -> Self { +impl From for OleError { + fn from(err: litchi_core::binary::BinaryError) -> Self { OleError::InvalidData(err.to_string()) } } @@ -134,6 +134,27 @@ impl std::fmt::Display for OleError { impl std::error::Error for OleError {} +// Convert CFB-substrate errors into the unified `litchi_core::Error`. +// +// The reverse direction (`OleError -> litchi_core::Error`) used to live in the +// umbrella crate's `error_ext.rs`, but the orphan rule forbids implementing +// `From for external` outside the crate that defines either side. +// This impl is local to `litchi-cfb` because `OleError` is defined here. +impl From for litchi_core::Error { + fn from(err: OleError) -> Self { + match err { + OleError::Io(e) => litchi_core::Error::Io(e), + OleError::InvalidFormat(s) => litchi_core::Error::InvalidFormat(s), + OleError::InvalidData(s) => litchi_core::Error::InvalidFormat(s), + OleError::NotOleFile => litchi_core::Error::NotOfficeFile, + OleError::CorruptedFile(s) => litchi_core::Error::CorruptedFile(s), + OleError::StreamNotFound => { + litchi_core::Error::ComponentNotFound("Stream not found".to_string()) + }, + } + } +} + impl OleFile { /// Open and parse an OLE file from a reader /// @@ -893,8 +914,8 @@ fn decode_utf16le(bytes: &[u8]) -> String { /// - **Pre-allocated buffer**: Exact capacity to avoid reallocation /// - **2-4x faster** than standard formatting on modern CPUs fn format_clsid(bytes: &[u8]) -> String { - use crate::common::simd::cmp::is_all_zero; - use crate::common::simd::fmt::hex_encode_to_string; + use litchi_core::simd::cmp::is_all_zero; + use litchi_core::simd::fmt::hex_encode_to_string; if bytes.len() != 16 { return String::new(); diff --git a/crates/litchi-cfb/src/lib.rs b/crates/litchi-cfb/src/lib.rs new file mode 100644 index 0000000..db93315 --- /dev/null +++ b/crates/litchi-cfb/src/lib.rs @@ -0,0 +1,18 @@ +//! Microsoft Compound File Binary (CFB / OLE2) container parser and writer. +//! +//! This crate provides the CFB substrate consumed by the legacy Office binary +//! format crates (`litchi-ole` for `.doc`/`.xls`/`.ppt`) and by encrypted +//! OOXML packages (`litchi-ooxml` under its `crypto` feature). +//! +//! See `[MS-CFB]: Compound File Binary File Format` for the format spec. + +#![allow(missing_docs)] + +pub mod consts; +mod file; +mod metadata; +pub mod writer; + +pub use file::{DirectoryEntry, OleError, OleFile, is_ole_file}; +pub use metadata::{OleMetadata, PropertyValue}; +pub use writer::OleWriter; diff --git a/crates/litchi-cfb/src/metadata.rs b/crates/litchi-cfb/src/metadata.rs new file mode 100644 index 0000000..f73ecc1 --- /dev/null +++ b/crates/litchi-cfb/src/metadata.rs @@ -0,0 +1,498 @@ +use super::consts::*; +use super::file::{OleError, OleFile}; +use chrono::{DateTime, Duration, Utc}; +use std::collections::HashMap; +use std::io::{Read, Seek}; +use zerocopy::{FromBytes, I16, I32, LE, U16, U32}; + +/// Metadata extracted from OLE property streams +/// +/// This struct contains standard properties from SummaryInformation +/// and DocumentSummaryInformation streams. +#[derive(Debug, Default)] +pub struct OleMetadata { + // SummaryInformation properties + pub codepage: Option, + pub title: Option, + pub subject: Option, + pub author: Option, + pub keywords: Option, + pub comments: Option, + pub template: Option, + pub last_saved_by: Option, + pub revision_number: Option, + pub edit_time: Option, + pub create_time: Option>, + pub last_printed_time: Option>, + pub last_saved_time: Option>, + pub num_pages: Option, + pub num_words: Option, + pub num_chars: Option, + pub creating_application: Option, + pub security: Option, + + // DocumentSummaryInformation properties + pub category: Option, + pub manager: Option, + pub company: Option, +} + +/// Property value types +#[derive(Debug, Clone)] +pub enum PropertyValue { + I2(i16), + I4(i32), + UI2(u16), + UI4(u32), + Bool(bool), + Lpstr(Vec), // Raw bytes for ANSI strings (need codepage to decode) + Lpwstr(String), // Already decoded UTF-16 + Filetime(u64), + Blob(Vec), + Empty, +} + +impl OleFile { + /// Parse metadata from standard property streams + /// + /// This method attempts to parse SummaryInformation and + /// DocumentSummaryInformation streams to extract metadata. + pub fn get_metadata(&mut self) -> Result { + let mut metadata = OleMetadata::default(); + + // Try to parse SummaryInformation stream + if let Ok(data) = self.open_stream(&["\u{0005}SummaryInformation"]) + && let Ok(props) = parse_property_stream(&data) + { + extract_summary_info(&mut metadata, &props); + } + + // Try to parse DocumentSummaryInformation stream + if let Ok(data) = self.open_stream(&["\u{0005}DocumentSummaryInformation"]) + && let Ok(props) = parse_property_stream(&data) + { + extract_document_summary_info(&mut metadata, &props); + } + + Ok(metadata) + } +} + +/// Convert a FILETIME property value to Rust Date +/// +/// The FILETIME structure is a 64-bit value that represents the number of 100-nanosecond intervals +/// that have elapsed since January 1, 1601, Coordinated Universal Time (UTC). +#[inline] +fn filetime_to_date(filetime: u64) -> Option> { + // Number of 100-nanosecond intervals between 1601-01-01 and 1970-01-01 + const EPOCH_DIFF: i64 = 116_444_736_000_000_000; + let doc_epoch = i64::try_from(filetime).ok()?; + Some(DateTime::from_timestamp_nanos( + (doc_epoch - EPOCH_DIFF) * 100, + )) +} + +/// Convert a FILETIME property value to Rust duration +/// +/// It is like [filetime_to_date], but the result is a duration instead of a date. +#[inline] +fn filetime_to_duration(filetime: u64) -> Option { + let nanos = filetime * 100; + Some(Duration::nanoseconds(i64::try_from(nanos).ok()?)) +} + +/// Parse a property stream and return properties as a HashMap +/// +/// Property streams contain metadata in a structured format according +/// to [MS-OLEPS] specification. +fn parse_property_stream(data: &[u8]) -> Result, OleError> { + if data.len() < 48 { + return Err(OleError::InvalidFormat( + "Property stream too short".to_string(), + )); + } + + // Skip header (28 bytes) and format ID (20 bytes) + let section_offset = U32::::read_from_bytes(&data[44..48]) + .map(|v| v.get() as usize) + .unwrap_or(0); + + if section_offset + 8 > data.len() { + return Err(OleError::InvalidFormat( + "Invalid section offset".to_string(), + )); + } + + // Read property count (section size at offset 0 is not used) + let num_props = U32::::read_from_bytes(&data[section_offset + 4..section_offset + 8]) + .map(|v| v.get()) + .unwrap_or(0); + + // Limit properties to prevent DoS + let num_props = num_props.min(1000); + + // Create a HashMap with the estimated number of properties + let mut properties = HashMap::with_capacity(num_props as usize); + + // Parse each property + for i in 0..num_props { + let prop_offset = section_offset + 8 + (i as usize) * 8; + if prop_offset + 8 > data.len() { + break; + } + + // Property ID + let prop_id = U32::::read_from_bytes(&data[prop_offset..prop_offset + 4]) + .map(|v| v.get()) + .unwrap_or(0); + + // Offset to property value + let value_offset = section_offset + + U32::::read_from_bytes(&data[prop_offset + 4..prop_offset + 8]) + .map(|v| v.get() as usize) + .unwrap_or(0); + + if value_offset + 4 > data.len() { + continue; + } + + // Property type + let prop_type = U16::::read_from_bytes(&data[value_offset..value_offset + 2]) + .map(|v| v.get()) + .unwrap_or(0); + + // Parse property value based on type + if let Ok(value) = parse_property_value(data, value_offset + 4, prop_type) { + properties.insert(prop_id, value); + } + } + + Ok(properties) +} + +/// Parse a single property value based on its type +fn parse_property_value( + data: &[u8], + offset: usize, + prop_type: u16, +) -> Result { + match prop_type { + VT_I2 => { + // 16-bit signed integer + if offset + 2 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let value = I16::::read_from_bytes(&data[offset..offset + 2]) + .map(|v| v.get()) + .unwrap_or(0); + Ok(PropertyValue::I2(value)) + }, + VT_I4 | VT_INT | VT_ERROR => { + // 32-bit signed integer + if offset + 4 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let value = I32::::read_from_bytes(&data[offset..offset + 4]) + .map(|v| v.get()) + .unwrap_or(0); + Ok(PropertyValue::I4(value)) + }, + VT_UI2 => { + // 16-bit unsigned integer + if offset + 2 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let value = U16::::read_from_bytes(&data[offset..offset + 2]) + .map(|v| v.get()) + .unwrap_or(0); + Ok(PropertyValue::UI2(value)) + }, + VT_UI4 | VT_UINT => { + // 32-bit unsigned integer + if offset + 4 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let value = U32::::read_from_bytes(&data[offset..offset + 4]) + .map(|v| v.get()) + .unwrap_or(0); + Ok(PropertyValue::UI4(value)) + }, + VT_LPSTR | VT_BSTR => { + // Code page string + if offset + 4 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let str_len = U32::::read_from_bytes(&data[offset..offset + 4]) + .map(|v| v.get() as usize) + .unwrap_or(0); + + if offset + 4 + str_len > data.len() { + return Err(OleError::InvalidFormat("String overflow".to_string())); + } + + let str_bytes = &data[offset + 4..offset + 4 + str_len]; + // Store raw bytes - will be decoded later with proper codepage + let raw_bytes = str_bytes.to_vec(); + Ok(PropertyValue::Lpstr(raw_bytes)) + }, + VT_LPWSTR => { + // Unicode string (UTF-16LE) + if offset + 4 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let char_count = U32::::read_from_bytes(&data[offset..offset + 4]) + .map(|v| v.get() as usize) + .unwrap_or(0); + + let byte_len = char_count * 2; + if offset + 4 + byte_len > data.len() { + return Err(OleError::InvalidFormat("String overflow".to_string())); + } + + // Decode UTF-16LE + let mut utf16_chars = Vec::new(); + for i in 0..char_count { + let byte_offset = offset + 4 + i * 2; + let code_unit = U16::::read_from_bytes(&data[byte_offset..byte_offset + 2]) + .map(|v| v.get()) + .unwrap_or(0); + if code_unit == 0 { + break; + } + utf16_chars.push(code_unit); + } + + let s = String::from_utf16_lossy(&utf16_chars); + Ok(PropertyValue::Lpwstr(s)) + }, + VT_FILETIME => { + // 64-bit file time + if offset + 8 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let low = U32::::read_from_bytes(&data[offset..offset + 4]) + .map(|v| v.get() as u64) + .unwrap_or(0); + let high = U32::::read_from_bytes(&data[offset + 4..offset + 8]) + .map(|v| v.get() as u64) + .unwrap_or(0); + let filetime = low | (high << 32); + Ok(PropertyValue::Filetime(filetime)) + }, + VT_BOOL => { + // Boolean (16-bit) + if offset + 2 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let value = U16::::read_from_bytes(&data[offset..offset + 2]) + .map(|v| v.get()) + .unwrap_or(0); + Ok(PropertyValue::Bool(value != 0)) + }, + VT_BLOB => { + // Binary data + if offset + 4 > data.len() { + return Err(OleError::InvalidFormat("Buffer overflow".to_string())); + } + let blob_len = U32::::read_from_bytes(&data[offset..offset + 4]) + .map(|v| v.get() as usize) + .unwrap_or(0); + + if offset + 4 + blob_len > data.len() { + return Err(OleError::InvalidFormat("Blob overflow".to_string())); + } + + let blob = data[offset + 4..offset + 4 + blob_len].to_vec(); + Ok(PropertyValue::Blob(blob)) + }, + VT_EMPTY | VT_NULL => Ok(PropertyValue::Empty), + _ => { + // Unsupported type + Ok(PropertyValue::Empty) + }, + } +} + +/// Extract SummaryInformation properties into metadata +/// +/// See [this document](https://learn.microsoft.com/en-us/openspecs/windows_protocols/MS-OLEPS/f7933d28-2cc4-4b36-bc23-8861cbcd37c4) +/// for your information. +fn extract_summary_info(metadata: &mut OleMetadata, props: &HashMap) { + // Property IDs for SummaryInformation (start at 1) + // 1: CODEPAGE + let codepage = if let Some(PropertyValue::I2(v)) = props.get(&1) { + let cp = Some(*v as u32); + metadata.codepage = cp; + cp + } else { + None + }; + + // 2: TITLE + if let Some(v) = props.get(&2) { + metadata.title = extract_string(v, codepage); + } + + // 3: SUBJECT + if let Some(v) = props.get(&3) { + metadata.subject = extract_string(v, codepage); + } + + // 4: AUTHOR + if let Some(v) = props.get(&4) { + metadata.author = extract_string(v, codepage); + } + + // 5: KEYWORDS + if let Some(v) = props.get(&5) { + metadata.keywords = extract_string(v, codepage); + } + + // 6: COMMENTS + if let Some(v) = props.get(&6) { + metadata.comments = extract_string(v, codepage); + } + + // 7: TEMPLATE + if let Some(v) = props.get(&7) { + metadata.template = extract_string(v, codepage); + } + + // 8: LAST_SAVED_BY + if let Some(v) = props.get(&8) { + metadata.last_saved_by = extract_string(v, codepage); + } + + // 9: REVISION_NUMBER + if let Some(v) = props.get(&9) { + metadata.revision_number = extract_string(v, codepage); + } + + // 10: EDIT_TIME + if let Some(PropertyValue::Filetime(v)) = props.get(&10) { + metadata.edit_time = filetime_to_duration(*v); + } + + // 11: LAST_PRINTED_TIME + if let Some(PropertyValue::Filetime(v)) = props.get(&11) { + metadata.last_printed_time = filetime_to_date(*v); + } + + // 12: CREATE_TIME + if let Some(PropertyValue::Filetime(v)) = props.get(&12) { + metadata.create_time = filetime_to_date(*v); + } + + // 13: LAST_SAVED_TIME + if let Some(PropertyValue::Filetime(v)) = props.get(&13) { + metadata.last_saved_time = filetime_to_date(*v); + } + + // 14: NUM_PAGES + if let Some(PropertyValue::I4(v)) = props.get(&14) { + metadata.num_pages = Some(*v as u32); + } + + // 15: NUM_WORDS + if let Some(PropertyValue::I4(v)) = props.get(&15) { + metadata.num_words = Some(*v as u32); + } + + // 16: NUM_CHARS + if let Some(PropertyValue::I4(v)) = props.get(&16) { + metadata.num_chars = Some(*v as u32); + } + + // 18: CREATING_APPLICATION + if let Some(v) = props.get(&18) { + metadata.creating_application = extract_string(v, codepage); + } + + // 19: SECURITY + if let Some(PropertyValue::I4(v)) = props.get(&19) { + metadata.security = Some(*v as u32); + } +} + +/// Extract DocumentSummaryInformation properties into metadata +/// +/// See [this document](https://learn.microsoft.com/en-us/windows/win32/stg/the-documentsummaryinformation-and-userdefined-property-sets) +/// for your information. +fn extract_document_summary_info(metadata: &mut OleMetadata, props: &HashMap) { + // Use the codepage that was set during SummaryInformation parsing + let codepage = metadata.codepage; + + // 2: CATEGORY + if let Some(v) = props.get(&2) { + metadata.category = extract_string(v, codepage); + } + + // 3. PRESFORMAT + // if let Some(v) = props.get(&3) { + // metadata.presentation_target = extract_string(v, codepage); + // } + + // 14: MANAGER + if let Some(v) = props.get(&14) { + metadata.manager = extract_string(v, codepage); + } + + // 15: COMPANY + if let Some(v) = props.get(&15) { + metadata.company = extract_string(v, codepage); + } +} + +/// Extract string from property value with proper encoding +fn extract_string(value: &PropertyValue, codepage: Option) -> Option { + match value { + PropertyValue::Lpstr(bytes) => { + if bytes.is_empty() { + None + } else { + litchi_core::encoding::decode_bytes(bytes, codepage) + } + }, + PropertyValue::Lpwstr(s) => { + if s.is_empty() { + None + } else { + Some(s.clone()) + } + }, + _ => None, + } +} + +// Convert CFB-substrate metadata into the unified `litchi_core::Metadata`. +// +// Used to live in the umbrella crate's `metadata_ext.rs`, but the orphan rule +// forbids implementing `From for external`. This impl is local to +// `litchi-cfb` because `OleMetadata` is defined here. +impl From for litchi_core::Metadata { + fn from(ole_metadata: OleMetadata) -> Self { + litchi_core::Metadata { + title: ole_metadata.title, + subject: ole_metadata.subject, + author: ole_metadata.author, + keywords: ole_metadata.keywords, + description: ole_metadata.comments, + template: ole_metadata.template, + last_modified_by: ole_metadata.last_saved_by, + revision: ole_metadata.revision_number, + created: ole_metadata.create_time, + modified: ole_metadata.last_saved_time, + page_count: ole_metadata.num_pages, + word_count: ole_metadata.num_words, + character_count: ole_metadata.num_chars, + application: ole_metadata.creating_application, + category: ole_metadata.category, + company: ole_metadata.company, + manager: ole_metadata.manager, + content_status: None, // OLE doesn't have this field + last_printed_time: ole_metadata.last_printed_time, + security: ole_metadata.security, + codepage: ole_metadata.codepage, + } + } +} diff --git a/crates/litchi-cfb/src/writer/core.rs b/crates/litchi-cfb/src/writer/core.rs new file mode 100644 index 0000000..721d01d --- /dev/null +++ b/crates/litchi-cfb/src/writer/core.rs @@ -0,0 +1,734 @@ +/// OLE file writer implementation +/// +/// This module provides functionality to create and modify OLE2 structured storage files. +/// It supports creating new files, adding/updating/deleting streams and storages, +/// and properly managing the FAT (File Allocation Table) and directory structure. +/// +/// # Architecture +/// +/// The writer uses a transactional approach where changes are accumulated in memory +/// and written atomically when `save()` is called. This ensures file integrity even +/// if the write operation fails partway through. +/// +/// # Stream Allocation vs Directory Ordering +/// +/// **IMPORTANT**: This is a critical distinction for Microsoft Office compatibility! +/// +/// 1. **Stream ALLOCATION order** determines which sector each stream is written to: +/// - Streams are allocated sectors in the order they are added via `create_stream()` +/// - For DOC files, `WordDocument` MUST be added first to get sector 0 +/// - This is enforced in the FAT allocation logic (see lines 345-358) +/// +/// 2. **Directory ENTRY order** determines how entries appear in the directory tree: +/// - Directory entries are sorted using Apache POI's PropertyComparator rules +/// - Entries are organized into a balanced binary search tree +/// - This happens during directory generation (see `DirectoryBuilder`) +/// +/// ## Example: DOC File Structure +/// +/// ```text +/// Stream creation order: +/// 1. create_stream(["WordDocument"], ...) → allocated to sector 0 +/// 2. create_stream(["1Table"], ...) → allocated to sector 8 +/// +/// Directory tree (after sorting by name length): +/// Root Entry (SID 0) +/// └─ WordDocument (SID 1, sector 0) [midpoint] +/// └─ 1Table (SID 2, sector 8) [left child] +/// ``` +/// +/// # Example +/// +/// ```rust,no_run +/// use litchi_cfb::writer::OleWriter; +/// +/// // Create a new OLE file +/// let mut writer = OleWriter::new(); +/// +/// // Add a stream +/// writer.create_stream(&["MyStream"], b"Hello, World!")?; +/// +/// // Create a storage and add a stream inside it +/// writer.create_storage(&["MyStorage"])?; +/// writer.create_stream(&["MyStorage", "NestedStream"], b"Nested content")?; +/// +/// // Save to file +/// writer.save("output.ole")?; +/// # Ok::<(), Box>(()) +/// ``` +use super::super::consts::*; +use super::super::file::OleError; +use super::difat::DifatBuilder; +use super::directory::DirectoryBuilder; +use super::fat::FatBuilder; +use super::header::HeaderBuilder; +use super::minifat::MiniFatBuilder; +use std::collections::HashMap; +use std::io::{Seek, SeekFrom, Write}; + +/// Represents a pending stream write operation +#[derive(Debug, Clone)] +#[allow(dead_code)] // Reserved for future implementation +struct StreamWrite { + /// Path to the stream + path: Vec, + /// Stream data + data: Vec, +} + +/// Represents a pending storage creation operation +#[derive(Debug, Clone)] +#[allow(dead_code)] // Reserved for future implementation +struct StorageCreate { + /// Path to the storage + path: Vec, +} + +/// Directory entry for writing +#[derive(Debug, Clone)] +#[allow(dead_code)] // Reserved for future implementation +struct WriteDirectoryEntry { + /// Entry name + name: String, + /// Entry type (stream, storage, root) + entry_type: u8, + /// Left sibling SID + sid_left: u32, + /// Right sibling SID + sid_right: u32, + /// Child SID + sid_child: u32, + /// CLSID (16 bytes) + clsid: [u8; 16], + /// State bits + state_bits: u32, + /// Creation time (FILETIME) + creation_time: u64, + /// Modified time (FILETIME) + modified_time: u64, + /// Starting sector + start_sector: u32, + /// Stream size + stream_size: u64, +} + +/// OLE file writer +/// +/// Provides methods to create and modify OLE2 structured storage files. +/// All operations are buffered in memory until `save()` is called. +pub struct OleWriter { + /// Sector size (512 or 4096 bytes) + sector_size: usize, + /// Mini sector size (typically 64 bytes) + mini_sector_size: usize, + /// Mini stream cutoff size (typically 4096 bytes) + mini_stream_cutoff: u32, + /// Directory entries + entries: Vec, + /// Stream data in insertion order (path, data) + /// Using Vec instead of HashMap to preserve insertion order for directory entries + streams: Vec<(Vec, Vec)>, + /// Storages indexed by path + storages: HashMap, ()>, +} + +impl OleWriter { + /// Create a new empty OLE writer with default settings (512-byte sectors) + /// + /// # Example + /// + /// ```rust + /// use litchi_cfb::writer::OleWriter; + /// + /// let writer = OleWriter::new(); + /// ``` + pub fn new() -> Self { + Self::with_sector_size(512) + } + + /// Create a new OLE writer with specified sector size + /// + /// # Arguments + /// + /// * `sector_size` - Sector size in bytes (512 or 4096) + /// + /// # Panics + /// + /// Panics if sector_size is not 512 or 4096 + pub fn with_sector_size(sector_size: usize) -> Self { + assert!( + sector_size == 512 || sector_size == 4096, + "Sector size must be 512 or 4096" + ); + + let mut writer = OleWriter { + sector_size, + mini_sector_size: 64, + mini_stream_cutoff: 4096, + entries: Vec::new(), + streams: Vec::new(), + storages: HashMap::new(), + }; + + // Initialize with root entry + writer.entries.push(WriteDirectoryEntry { + name: "Root Entry".to_string(), + entry_type: STGTY_ROOT, + sid_left: NOSTREAM, + sid_right: NOSTREAM, + sid_child: NOSTREAM, + clsid: [0; 16], + state_bits: 0, + creation_time: 0, + modified_time: 0, + start_sector: 0, // Will be updated when writing ministream + stream_size: 0, // Will be updated when writing ministream + }); + + writer + } + + /// Set the CLSID (Class ID) for the root entry + /// + /// This is required for Microsoft Office to recognize the document type. + /// For Word 97-2003 documents, use: `{00020906-0000-0000-C000-000000000046}` + /// + /// # Arguments + /// + /// * `clsid` - 16-byte CLSID in little-endian format + /// + /// # Example + /// + /// ```rust,no_run + /// # use litchi_cfb::writer::OleWriter; + /// let mut writer = OleWriter::new(); + /// // Word 97-2003 Document CLSID: {00020906-0000-0000-C000-000000000046} + /// let word_clsid = [0x06, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + /// 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46]; + /// writer.set_root_clsid(word_clsid); + /// # Ok::<(), litchi_cfb::OleError>(()) + /// ``` + pub fn set_root_clsid(&mut self, clsid: [u8; 16]) { + // Update the root entry (always at index 0) + if !self.entries.is_empty() { + self.entries[0].clsid = clsid; + } + } + + /// Create a new stream at the specified path + /// + /// If a stream already exists at this path, it will be overwritten. + /// + /// # Arguments + /// + /// * `path` - Path components (e.g., `&["MyStorage", "MyStream"]`) + /// * `data` - Stream contents + /// + /// # Returns + /// + /// * `Result<(), OleError>` - Success or error + /// + /// # Example + /// + /// ```rust,no_run + /// # use litchi_cfb::writer::OleWriter; + /// let mut writer = OleWriter::new(); + /// writer.create_stream(&["MyStream"], b"Hello, World!")?; + /// # Ok::<(), litchi_cfb::OleError>(()) + /// ``` + pub fn create_stream(&mut self, path: &[&str], data: &[u8]) -> Result<(), OleError> { + if path.is_empty() { + return Err(OleError::InvalidData("Empty path".to_string())); + } + + // Convert path to owned strings + let owned_path: Vec = path.iter().map(|s| s.to_string()).collect(); + + // Check if stream already exists and update it + if let Some(pos) = self.streams.iter().position(|(p, _)| p == &owned_path) { + self.streams[pos].1 = data.to_vec(); + } else { + // Store stream data in insertion order + self.streams.push((owned_path, data.to_vec())); + } + + Ok(()) + } + + /// Update an existing stream + /// + /// This is an alias for `create_stream` since both create and update operations + /// have the same behavior (overwrite if exists). + /// + /// # Arguments + /// + /// * `path` - Path components + /// * `data` - New stream contents + pub fn update_stream(&mut self, path: &[&str], data: &[u8]) -> Result<(), OleError> { + self.create_stream(path, data) + } + + /// Delete a stream + /// + /// # Arguments + /// + /// * `path` - Path components + /// + /// # Returns + /// + /// * `Result<(), OleError>` - Success or error if stream doesn't exist + pub fn delete_stream(&mut self, path: &[&str]) -> Result<(), OleError> { + let owned_path: Vec = path.iter().map(|s| s.to_string()).collect(); + + if let Some(pos) = self.streams.iter().position(|(p, _)| p == &owned_path) { + self.streams.remove(pos); + Ok(()) + } else { + Err(OleError::StreamNotFound) + } + } + + /// Create a new storage (directory) at the specified path + /// + /// Parent storages are created automatically if they don't exist. + /// + /// # Arguments + /// + /// * `path` - Path components (e.g., `&["MyStorage"]`) + /// + /// # Example + /// + /// ```rust,no_run + /// # use litchi_cfb::writer::OleWriter; + /// let mut writer = OleWriter::new(); + /// writer.create_storage(&["MyStorage"])?; + /// writer.create_stream(&["MyStorage", "MyStream"], b"data")?; + /// # Ok::<(), litchi_cfb::OleError>(()) + /// ``` + pub fn create_storage(&mut self, path: &[&str]) -> Result<(), OleError> { + if path.is_empty() { + return Err(OleError::InvalidData("Empty path".to_string())); + } + + let owned_path: Vec = path.iter().map(|s| s.to_string()).collect(); + self.storages.insert(owned_path, ()); + + Ok(()) + } + + /// Delete a storage and all its contents + /// + /// # Arguments + /// + /// * `path` - Path components + /// + /// # Returns + /// + /// * `Result<(), OleError>` - Success or error if storage doesn't exist + /// + /// # Implementation Notes + /// + /// Currently only supports deleting empty storages. Recursive deletion + /// of child streams and storages is planned for a future enhancement + /// when nested storage support is added. + pub fn delete_storage(&mut self, path: &[&str]) -> Result<(), OleError> { + let owned_path: Vec = path.iter().map(|s| s.to_string()).collect(); + + if self.storages.remove(&owned_path).is_none() { + return Err(OleError::InvalidFormat("Storage not found".to_string())); + } + + // Note: Recursive deletion of child streams/storages will be implemented + // when nested storage support is added + + Ok(()) + } + + /// Save the OLE file to a writer + /// + /// This writes all buffered changes to the output writer in OLE2 format. + /// + /// # Arguments + /// + /// * `writer` - Output writer (must implement Write + Seek) + /// + /// # Returns + /// + /// * `Result<(), OleError>` - Success or error + /// + /// # Implementation Notes + /// + /// The write process follows these steps: + /// 1. Classify streams as small (< 4096 bytes) or large (>= 4096 bytes) + /// 2. Allocate mini sectors for small streams and build MiniFAT + /// 3. Allocate regular sectors for large streams and build FAT + /// 4. Build directory structure with proper sector references + /// 5. Generate and write header, FAT, MiniFAT, directory, and data sectors + /// + /// This is based on Apache POI's POIFSFileSystem.writeFilesystem() method. + pub fn write_to(&mut self, writer: &mut W) -> Result<(), OleError> { + // Initialize builders + let mut fat = FatBuilder::new_with_size(self.sector_size); + let mut minifat = MiniFatBuilder::new(self.mini_sector_size); + + // Separate small and large streams + let mut small_streams: Vec<(Vec, Vec)> = Vec::new(); + let mut large_streams: Vec<(Vec, Vec)> = Vec::new(); + + for (path, data) in &self.streams { + if data.len() < self.mini_stream_cutoff as usize { + small_streams.push((path.clone(), data.clone())); + } else { + large_streams.push((path.clone(), data.clone())); + } + } + + // Allocate mini sectors for small streams and track their start sectors + let mut small_stream_sectors: Vec<(Vec, Vec, u32)> = Vec::new(); + for (path, data) in &small_streams { + let start_mini_sector = minifat.allocate_mini_chain(data); + small_stream_sectors.push((path.clone(), data.clone(), start_mini_sector)); + } + + // CRITICAL: Allocate large streams FIRST to ensure WordDocument gets sector 0 + // Microsoft Word requires WordDocument at sector 0! + + // Add large streams to directory (using FAT) - BEFORE ministream + let mut large_stream_data: Vec<(u32, Vec, Vec)> = Vec::new(); + for (path, data) in &large_streams { + let start_sector = if data.is_empty() { + ENDOFCHAIN + } else { + fat.allocate_chain(data.len()) + }; + + large_stream_data.push((start_sector, data.clone(), path.clone())); + } + + // NOW allocate ministream (after large streams) + let (ministream_start, ministream_size) = if !minifat.is_empty() { + let ministream_data = minifat.ministream_data(); + let start = fat.allocate_chain(ministream_data.len()); + (start, minifat.ministream_size()) + } else { + (ENDOFCHAIN, 0u64) + }; + + // Initialize directory builder with ministream info + let mut directory = DirectoryBuilder::new(ministream_start, ministream_size); + + // Set root CLSID if specified (e.g., for Word documents) + if !self.entries.is_empty() && self.entries[0].clsid != [0u8; 16] { + directory.set_root_clsid(self.entries[0].clsid); + } + + // Pre-create storages declared explicitly by user + for storage_path in self.storages.keys() { + directory.add_storage_path(storage_path); + } + + // Add large streams to directory using full path + for (start_sector, data, path) in &large_stream_data { + let full: Vec = path.clone(); + let _sid = directory.add_stream_path(&full, *start_sector, data.len() as u64); + } + + // Add small streams to directory (using MiniFAT) with full path + for (path, data, start_mini_sector) in &small_stream_sectors { + let full: Vec = path.clone(); + let _sid = directory.add_stream_path(&full, *start_mini_sector, data.len() as u64); + } + + // Generate directory stream + let dir_stream = directory.generate_directory_stream(); + let dir_sector_count = (dir_stream.len().div_ceil(self.sector_size)) as u32; + let dir_start_sector = fat.allocate_chain(dir_stream.len()); + + // Generate MiniFAT sectors (if needed) + let (minifat_start_sector, num_minifat_sectors) = if !minifat.is_empty() { + let minifat_sectors = minifat.generate_minifat_sectors(self.sector_size); + let num_sectors = minifat_sectors.len() as u32; + + if num_sectors > 0 { + let start = fat.allocate_chain(num_sectors as usize * self.sector_size); + (start, num_sectors) + } else { + (ENDOFCHAIN, 0) + } + } else { + (ENDOFCHAIN, 0) + }; + + // === Compute FAT/DIFAT sectors requirement iteratively === + let entries_per_fat_sector = self.sector_size as u32 / 4; + let ids_per_difat_sector = entries_per_fat_sector - 1; // last u32 is next pointer + + let n_used = fat.total_sectors(); + let mut n_fat: u32 = 0; + let mut n_difat: u32 = 0; + for _ in 0..8 { + let total_entries = n_used + n_fat + n_difat; + let new_n_fat = total_entries.div_ceil(entries_per_fat_sector); + let new_n_difat = if new_n_fat > 109 { + let over = new_n_fat - 109; + over.div_ceil(ids_per_difat_sector) + } else { + 0 + }; + if new_n_fat == n_fat && new_n_difat == n_difat { + break; + } + n_fat = new_n_fat; + n_difat = new_n_difat; + } + + // Reserve DIFAT sectors then FAT sectors + let difat_start_sector = if n_difat > 0 { + fat.allocate_special(n_difat, DIFSECT) + } else { + ENDOFCHAIN + }; + let fat_start_sector = if n_fat > 0 { + fat.allocate_special(n_fat, FATSECT) + } else { + ENDOFCHAIN + }; + + // Prepare FAT sector data now that reservations are included + let fat_sectors_data = fat.generate_fat_sectors(); + let num_fat_sectors = n_fat; + + // Validate FAT + fat.validate() + .map_err(|e| OleError::InvalidData(format!("FAT validation failed: {}", e)))?; + + // Build header + let mut header_builder = HeaderBuilder::new(self.sector_size); + header_builder.set_first_dir_sector(dir_start_sector); + header_builder.set_num_dir_sectors(dir_sector_count); + header_builder.set_minifat(minifat_start_sector, num_minifat_sectors); + + // Handle DIFAT if needed (> 109 FAT sectors) + let fat_sector_ids: Vec = if num_fat_sectors > 0 { + (fat_start_sector..fat_start_sector + num_fat_sectors).collect() + } else { + Vec::new() + }; + + let (num_difat_sectors, difat_sectors) = if num_fat_sectors > 109 { + let mut difat = DifatBuilder::new(self.sector_size); + difat.set_fat_sectors(&fat_sector_ids); + let num_difat = difat.calculate_difat_sector_count(); + let sectors = if num_difat > 0 { + difat.generate_difat_sectors(difat_start_sector) + } else { + Vec::new() + }; + (num_difat, sectors) + } else { + (0, Vec::new()) + }; + + // Add first 109 FAT sector IDs to header + header_builder.add_fat_sectors(&fat_sector_ids); + + // Set DIFAT info in header + if num_difat_sectors > 0 { + header_builder.set_difat(difat_start_sector, num_difat_sectors); + } + + let header = header_builder.generate(); + + // === Write the file === + + // Write header (sector 0 position is offset 0, but actual sectors start at +1) + writer.write_all(&header)?; + + // Write ministream data (if any) + if !minifat.is_empty() && ministream_start != ENDOFCHAIN { + let position = ((ministream_start as u64) + 1) * (self.sector_size as u64); + writer.seek(SeekFrom::Start(position))?; + + let ministream_data = minifat.ministream_data(); + let padded_size = ministream_data.len().div_ceil(self.sector_size) * self.sector_size; + let mut padded_data = ministream_data.to_vec(); + padded_data.resize(padded_size, 0); + writer.write_all(&padded_data)?; + } + + // Write large stream data sectors + for (start_sector, data, _path) in &large_stream_data { + if *start_sector == ENDOFCHAIN { + continue; + } + + // Calculate file position for this sector + let position = ((*start_sector as u64) + 1) * (self.sector_size as u64); + writer.seek(SeekFrom::Start(position))?; + + // Write data (padded to sector boundaries) + let padded_size = data.len().div_ceil(self.sector_size) * self.sector_size; + let mut padded_data = data.clone(); + padded_data.resize(padded_size, 0); + writer.write_all(&padded_data)?; + } + + // Write directory stream + let dir_position = ((dir_start_sector as u64) + 1) * (self.sector_size as u64); + writer.seek(SeekFrom::Start(dir_position))?; + let dir_padded_size = dir_stream.len().div_ceil(self.sector_size) * self.sector_size; + let mut dir_padded = dir_stream; + dir_padded.resize(dir_padded_size, 0); + writer.write_all(&dir_padded)?; + + // Write MiniFAT sectors (if any) + if !minifat.is_empty() && minifat_start_sector != ENDOFCHAIN { + let minifat_sectors = minifat.generate_minifat_sectors(self.sector_size); + + for (current_sector, minifat_sector_data) in + (minifat_start_sector..).zip(minifat_sectors.iter()) + { + let position = ((current_sector as u64) + 1) * (self.sector_size as u64); + writer.seek(SeekFrom::Start(position))?; + writer.write_all(minifat_sector_data)?; + } + } + + // Write FAT sectors + for (i, fat_sector_data) in fat_sectors_data.iter().enumerate() { + let sector_id = fat_start_sector + i as u32; + let position = ((sector_id as u64) + 1) * (self.sector_size as u64); + writer.seek(SeekFrom::Start(position))?; + writer.write_all(fat_sector_data)?; + } + + // Write DIFAT sectors (if any) + if !difat_sectors.is_empty() { + for (current_sector, difat_sector_data) in + (difat_start_sector..).zip(difat_sectors.iter()) + { + let position = ((current_sector as u64) + 1) * (self.sector_size as u64); + writer.seek(SeekFrom::Start(position))?; + writer.write_all(difat_sector_data)?; + } + } + + writer.flush()?; + + Ok(()) + } + + /// Save the OLE file to a file path + /// + /// # Arguments + /// + /// * `path` - Output file path + /// + /// # Returns + /// + /// * `Result<(), OleError>` - Success or error + /// + /// # Example + /// + /// ```rust,no_run + /// # use litchi_cfb::writer::OleWriter; + /// let mut writer = OleWriter::new(); + /// writer.create_stream(&["Test"], b"Hello")?; + /// writer.save("output.ole")?; + /// # Ok::<(), litchi_cfb::OleError>(()) + /// ``` + pub fn save>(&mut self, path: P) -> Result<(), OleError> { + let file = std::fs::File::create(path)?; + let mut buffered = std::io::BufWriter::new(file); + self.write_to(&mut buffered)?; + buffered.flush()?; + Ok(()) + } +} + +impl Default for OleWriter { + fn default() -> Self { + Self::new() + } +} + +/// Encode a string to UTF-16LE bytes (padded to 64 bytes) +/// +/// This is used for directory entry names in OLE files. +/// +/// # Arguments +/// +/// * `name` - The string to encode (max 31 characters) +/// +/// # Returns +/// +/// * `[u8; 64]` - UTF-16LE encoded bytes with null terminator +/// +/// # Implementation Notes +/// +/// All core helper functions have been implemented: +/// - ✅ UTF-16LE encoding (this function) +/// - ✅ FAT chain building (FatBuilder) +/// - ✅ MiniFAT allocation (MiniFatBuilder) +/// - ✅ DIFAT handling (DifatBuilder) +/// - ✅ Directory tree building (DirectoryBuilder) +/// - Future: Balanced red-black tree (planned enhancement) +#[allow(dead_code)] // Reserved for future implementation +fn encode_name_utf16le(name: &str) -> [u8; 64] { + let mut result = [0u8; 64]; + let utf16: Vec = name.encode_utf16().collect(); + + // Copy UTF-16 data (max 31 characters + null terminator) + let max_chars = 31.min(utf16.len()); + for (i, &ch) in utf16.iter().take(max_chars).enumerate() { + let bytes = ch.to_le_bytes(); + result[i * 2] = bytes[0]; + result[i * 2 + 1] = bytes[1]; + } + + // Null terminator + if max_chars < 32 { + result[max_chars * 2] = 0; + result[max_chars * 2 + 1] = 0; + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_writer() { + let writer = OleWriter::new(); + assert_eq!(writer.sector_size, 512); + assert_eq!(writer.mini_sector_size, 64); + assert_eq!(writer.mini_stream_cutoff, 4096); + assert_eq!(writer.entries.len(), 1); // Root entry + } + + #[test] + fn test_create_stream() { + let mut writer = OleWriter::new(); + writer.create_stream(&["Test"], b"Hello").unwrap(); + assert_eq!(writer.streams.len(), 1); + } + + #[test] + fn test_create_storage() { + let mut writer = OleWriter::new(); + writer.create_storage(&["Storage"]).unwrap(); + assert_eq!(writer.storages.len(), 1); + } + + #[test] + fn test_encode_name() { + let encoded = encode_name_utf16le("Test"); + // Verify UTF-16LE encoding: 'T' = 0x0054, 'e' = 0x0065, etc. + assert_eq!(encoded[0], 0x54); // 'T' low byte + assert_eq!(encoded[1], 0x00); // 'T' high byte + assert_eq!(encoded[2], 0x65); // 'e' low byte + assert_eq!(encoded[3], 0x00); // 'e' high byte + } +} diff --git a/src/ole/writer/difat.rs b/crates/litchi-cfb/src/writer/difat.rs similarity index 100% rename from src/ole/writer/difat.rs rename to crates/litchi-cfb/src/writer/difat.rs diff --git a/src/ole/writer/directory.rs b/crates/litchi-cfb/src/writer/directory.rs similarity index 99% rename from src/ole/writer/directory.rs rename to crates/litchi-cfb/src/writer/directory.rs index 3b3753e..d43dd65 100644 --- a/src/ole/writer/directory.rs +++ b/crates/litchi-cfb/src/writer/directory.rs @@ -104,7 +104,7 @@ impl DirectoryEntryBuilder { /// # Example /// /// ```ignore - /// use litchi::ole::writer::DirectoryEntryBuilder; + /// use litchi_cfb::writer::DirectoryEntryBuilder; /// /// // Word 97-2003 Document CLSID: {00020906-0000-0000-C000-000000000046} /// let word_clsid = [0x06, 0x09, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/src/ole/writer/fat.rs b/crates/litchi-cfb/src/writer/fat.rs similarity index 100% rename from src/ole/writer/fat.rs rename to crates/litchi-cfb/src/writer/fat.rs diff --git a/src/ole/writer/header.rs b/crates/litchi-cfb/src/writer/header.rs similarity index 100% rename from src/ole/writer/header.rs rename to crates/litchi-cfb/src/writer/header.rs diff --git a/src/ole/writer/minifat.rs b/crates/litchi-cfb/src/writer/minifat.rs similarity index 100% rename from src/ole/writer/minifat.rs rename to crates/litchi-cfb/src/writer/minifat.rs diff --git a/src/ole/writer/mod.rs b/crates/litchi-cfb/src/writer/mod.rs similarity index 100% rename from src/ole/writer/mod.rs rename to crates/litchi-cfb/src/writer/mod.rs diff --git a/src/ole/writer/tests.rs b/crates/litchi-cfb/src/writer/tests.rs similarity index 100% rename from src/ole/writer/tests.rs rename to crates/litchi-cfb/src/writer/tests.rs diff --git a/crates/litchi-core/Cargo.toml b/crates/litchi-core/Cargo.toml new file mode 100644 index 0000000..0d75918 --- /dev/null +++ b/crates/litchi-core/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "litchi-core" +description = "Common types, traits, and utilities for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[features] +# These features mirror the umbrella's per-format features only because +# `src/common/encoding.rs` is feature-gated on `ole|rtf` today, and +# `src/common/detection/odf.rs` is feature-gated on `odf`. The umbrella +# enables these on `litchi-core` to preserve the existing gating. +default = [] +odf = [] +ole = ["dep:encoding_rs"] +rtf = ["dep:encoding_rs"] + +[dependencies] +aho-corasick = { workspace = true } +chrono = { workspace = true } +encoding_rs = { workspace = true, optional = true } +fast-float2 = { workspace = true } +once_cell = { workspace = true } +quick-xml = { workspace = true } +rand = { workspace = true } +serde = { workspace = true } +smallvec = { workspace = true } +soapberry-zip = { workspace = true } +thiserror = { workspace = true } +zerocopy = { workspace = true } diff --git a/crates/litchi-core/README.md b/crates/litchi-core/README.md new file mode 100644 index 0000000..4c72e4e --- /dev/null +++ b/crates/litchi-core/README.md @@ -0,0 +1,46 @@ +# litchi-core + +Shared types, traits, and utilities used by every format crate in the Litchi workspace. + +## Overview + +`litchi-core` is the foundation of the [Litchi](https://github.com/DevExzh/litchi) +office-formats library. It provides the unified `Error`/`Result` types, +file-format detection, BOM and encoding helpers, document metadata, and +length/style primitives shared across the OLE, OOXML, ODF, iWork, and RTF +format crates. + +Most users will pull this crate in transitively via `litchi` or one of the +format crates rather than depending on it directly. + +## Usage + +```toml +[dependencies] +litchi-core = "0.0.1" +``` + +```rust +use litchi_core::{FileFormat, Result}; +use litchi_core::bom::strip_bom; + +fn inspect(bytes: &[u8]) -> Result<()> { + let format = FileFormat::detect(bytes); + let body = strip_bom(bytes); + println!("format = {:?}, body = {} bytes", format, body.len()); + Ok(()) +} +``` + +## Features + +- Unified `Error` and `Result` types built on `thiserror` +- File-format detection via the `FileFormat` enum +- BOM detection and stripping for UTF-8/16/32 streams +- Document metadata, shape, and style/length primitives shared across formats +- SIMD-friendly helpers and zero-copy XML slice types + +## License + +Licensed under the Apache License, Version 2.0. Part of the +[Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-core/examples/bom_demo.rs b/crates/litchi-core/examples/bom_demo.rs new file mode 100644 index 0000000..1dc9da3 --- /dev/null +++ b/crates/litchi-core/examples/bom_demo.rs @@ -0,0 +1,194 @@ +//! Demonstrates Byte Order Mark (BOM) handling in `litchi-core`. +//! +//! Walks through every variant of [`BomKind`], synthesises a small payload +//! with that BOM prefixed, then uses [`strip_bom`] to detect the BOM and +//! [`write_bom`] to emit one. For each case the program prints the BOM +//! detected, the body length after stripping, and a hex preview of the +//! first few bytes. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-core --example bom_demo +//! ``` +//! +//! No CLI arguments are required — the example is fully self-contained. + +use litchi_core::{ + BomKind, UTF8_BOM, UTF16_BE_BOM, UTF16_LE_BOM, UTF32_BE_BOM, UTF32_LE_BOM, strip_bom, write_bom, +}; +use std::io::Cursor; + +fn main() -> Result<(), Box> { + println!("=== litchi-core BOM demo ===\n"); + + print_bom_constants(); + println!(); + + // Body fed alongside each BOM. ASCII for UTF-8; for UTF-16/UTF-32 we + // encode "Hi" in the matching width so the hex preview looks plausible. + // The detector itself only inspects the prefix, so any opaque trailer + // works. + let body_ascii = b"Hello, BOM!"; + let body_utf16_le = encode_utf16_le("Hi"); + let body_utf16_be = encode_utf16_be("Hi"); + let body_utf32_le = encode_utf32_le("Hi"); + let body_utf32_be = encode_utf32_be("Hi"); + + demo_round_trip("UTF-8", BomKind::Utf8, body_ascii)?; + demo_round_trip("UTF-16 LE", BomKind::Utf16Le, &body_utf16_le)?; + demo_round_trip("UTF-16 BE", BomKind::Utf16Be, &body_utf16_be)?; + demo_round_trip("UTF-32 LE", BomKind::Utf32Le, &body_utf32_le)?; + demo_round_trip("UTF-32 BE", BomKind::Utf32Be, &body_utf32_be)?; + + println!(); + demo_no_bom(b"no-bom-here, just plain ASCII")?; + + Ok(()) +} + +/// Print the raw constants exposed at the crate root, plus each variant's +/// `as_bytes()` and `len()` accessors. +fn print_bom_constants() { + println!("Public BOM constants:"); + println!(" UTF8_BOM = {}", hex(&UTF8_BOM)); + println!(" UTF16_LE_BOM = {}", hex(&UTF16_LE_BOM)); + println!(" UTF16_BE_BOM = {}", hex(&UTF16_BE_BOM)); + println!(" UTF32_LE_BOM = {}", hex(&UTF32_LE_BOM)); + println!(" UTF32_BE_BOM = {}", hex(&UTF32_BE_BOM)); + + println!("\nBomKind accessors:"); + for kind in [ + BomKind::Utf8, + BomKind::Utf16Le, + BomKind::Utf16Be, + BomKind::Utf32Le, + BomKind::Utf32Be, + ] { + println!( + " {:?}: {} bytes, prefix = {}", + kind, + kind.len(), + hex(kind.as_bytes()) + ); + } +} + +/// Build `[BOM | body]`, write it via `write_bom`, then read it back via +/// `strip_bom` and report what was detected. +fn demo_round_trip( + label: &str, + kind: BomKind, + body: &[u8], +) -> Result<(), Box> { + // 1. Build the payload using `write_bom`. + let mut payload: Vec = Vec::with_capacity(kind.len() + body.len()); + write_bom(&mut payload, kind)?; + payload.extend_from_slice(body); + + // 2. Detect it back via `strip_bom`, which seeks past the BOM on success. + let mut cursor = Cursor::new(&payload); + let detected = strip_bom(&mut cursor)?; + + // 3. Whatever remains is the body. + let mut remaining = Vec::new(); + std::io::Read::read_to_end(&mut cursor, &mut remaining)?; + + println!("--- {} ---", label); + println!( + " full payload : {} bytes preview = {}", + payload.len(), + hex_preview(&payload, 12) + ); + match detected { + Some((found, consumed)) => { + println!( + " detected BOM : {:?} ({} bytes consumed)", + found, consumed + ); + assert_eq!(found, kind, "round trip mismatch for {:?}", kind); + }, + None => println!(" detected BOM : "), + } + println!( + " body after strip : {} bytes preview = {}", + remaining.len(), + hex_preview(&remaining, 12) + ); + println!(); + Ok(()) +} + +/// Show that `strip_bom` returns `Ok(None)` and does NOT advance the cursor +/// when the input lacks a BOM. +fn demo_no_bom(body: &[u8]) -> Result<(), Box> { + let mut cursor = Cursor::new(body); + let detected = strip_bom(&mut cursor)?; + let pos_after = std::io::Seek::stream_position(&mut cursor)?; + + println!("--- no BOM ---"); + println!( + " input : {} bytes preview = {}", + body.len(), + hex_preview(body, 12) + ); + println!(" detected BOM : {:?}", detected); + println!( + " cursor position : {} (should be 0 — strip_bom rewinds on miss)", + pos_after + ); + Ok(()) +} + +// --- helpers ----------------------------------------------------------------- + +fn hex(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 3); + for (i, b) in bytes.iter().enumerate() { + if i > 0 { + s.push(' '); + } + s.push_str(&format!("{:02X}", b)); + } + s +} + +fn hex_preview(bytes: &[u8], max: usize) -> String { + let take = bytes.len().min(max); + let mut out = hex(&bytes[..take]); + if bytes.len() > take { + out.push_str(" ..."); + } + out +} + +fn encode_utf16_le(s: &str) -> Vec { + let mut out = Vec::with_capacity(s.len() * 2); + for unit in s.encode_utf16() { + out.extend_from_slice(&unit.to_le_bytes()); + } + out +} + +fn encode_utf16_be(s: &str) -> Vec { + let mut out = Vec::with_capacity(s.len() * 2); + for unit in s.encode_utf16() { + out.extend_from_slice(&unit.to_be_bytes()); + } + out +} + +fn encode_utf32_le(s: &str) -> Vec { + let mut out = Vec::with_capacity(s.len() * 4); + for ch in s.chars() { + out.extend_from_slice(&(ch as u32).to_le_bytes()); + } + out +} + +fn encode_utf32_be(s: &str) -> Vec { + let mut out = Vec::with_capacity(s.len() * 4); + for ch in s.chars() { + out.extend_from_slice(&(ch as u32).to_be_bytes()); + } + out +} diff --git a/crates/litchi-core/examples/detect_format.rs b/crates/litchi-core/examples/detect_format.rs new file mode 100644 index 0000000..1d342dc --- /dev/null +++ b/crates/litchi-core/examples/detect_format.rs @@ -0,0 +1,138 @@ +//! Demonstrates signature-based file format detection in `litchi-core`. +//! +//! Reads up to the first 512 bytes of a file (or every file in a directory) +//! and runs the public detection helpers from `litchi_core::detection` to +//! classify the contents. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-core --example detect_format --all-features -- +//! cargo run -p litchi-core --example detect_format --all-features -- test-data/ooxml/docx +//! ``` +//! +//! Without `--all-features`, ODF detection is stubbed and returns `None` when +//! the `odf` feature is not enabled. + +use litchi_core::FileFormat; +use litchi_core::detection::simd_utils::check_office_signatures; +use std::fs; +use std::io::Read; +use std::path::Path; + +const HEAD_BYTES: usize = 512; + +fn main() -> Result<(), Box> { + let mut args = std::env::args().skip(1); + let target = match args.next() { + Some(p) => p, + None => { + eprintln!("usage: detect_format "); + std::process::exit(2); + }, + }; + + let path = Path::new(&target); + if path.is_dir() { + walk_dir(path)?; + } else if path.is_file() { + let label = describe(path)?; + println!("{}: {}", path.display(), label); + } else { + eprintln!("path does not exist or is not accessible: {}", target); + std::process::exit(1); + } + + Ok(()) +} + +/// Walk the immediate children of `dir` (non-recursive) and detect each file. +fn walk_dir(dir: &Path) -> Result<(), Box> { + let mut entries: Vec<_> = fs::read_dir(dir)?.filter_map(Result::ok).collect(); + entries.sort_by_key(|e| e.file_name()); + + println!("Scanning {} ...", dir.display()); + for entry in entries { + let p = entry.path(); + if !p.is_file() { + continue; + } + let name = p.file_name().unwrap().to_string_lossy().into_owned(); + match describe(&p) { + Ok(label) => println!(" {:<40} {}", name, label), + Err(e) => println!(" {:<40} ", name, e), + } + } + Ok(()) +} + +/// Read the first `HEAD_BYTES` bytes of `path` and classify them. +fn describe(path: &Path) -> Result> { + let mut file = fs::File::open(path)?; + let mut buf = vec![0u8; HEAD_BYTES]; + let read = read_up_to(&mut file, &mut buf)?; + buf.truncate(read); + + Ok(format_label(&buf, path)) +} + +/// Read until either the buffer is full or EOF. +fn read_up_to(reader: &mut R, buf: &mut [u8]) -> std::io::Result { + let mut filled = 0; + while filled < buf.len() { + match reader.read(&mut buf[filled..])? { + 0 => break, + n => filled += n, + } + } + Ok(filled) +} + +/// Run the available signature checks against `bytes` and return a friendly +/// label. +/// +/// `litchi-core` exposes signature-level helpers; the umbrella `litchi` +/// crate layers ZIP-content inspection on top to disambiguate +/// .docx vs .xlsx vs .pptx vs iWork. We surface the underlying signature +/// here so callers can see exactly what the byte-level detection tells us. +fn format_label(bytes: &[u8], path: &Path) -> String { + // RTF: matches `{\rtf` prefix (always available regardless of feature + // flags — the helper itself has no `cfg` gate). + if let Some(fmt) = litchi_core::detection::rtf::detect_rtf_format(bytes) { + return describe_format(fmt); + } + + // ODF: requires reading the ZIP `mimetype` member. The `odf` feature + // gates the real implementation; the stub returns `None`. + if let Some(fmt) = litchi_core::detection::odf::detect_odf_format(bytes) { + return describe_format(fmt); + } + + // Lower-level signature mask for the remaining categories. + let mask = check_office_signatures(bytes); + if mask.is_ole2() { + return format!( + "OLE2 container (legacy Office: .doc/.xls/.ppt) — {}", + suffix(path) + ); + } + if mask.is_zip() { + return format!("ZIP container (OOXML/ODF/iWork) — {}", suffix(path)); + } + if mask.is_rtf() { + // Defensive — should already be caught above. + return describe_format(FileFormat::Rtf); + } + + "unknown / unrecognised signature".to_string() +} + +fn describe_format(fmt: FileFormat) -> String { + format!("{:?}", fmt) +} + +fn suffix(path: &Path) -> String { + path.extension() + .and_then(|s| s.to_str()) + .map(|e| format!("extension: .{}", e)) + .unwrap_or_else(|| "no extension".to_string()) +} diff --git a/crates/litchi-core/fuzz/.gitignore b/crates/litchi-core/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/crates/litchi-core/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/litchi-core/fuzz/Cargo.toml b/crates/litchi-core/fuzz/Cargo.toml new file mode 100644 index 0000000..113e096 --- /dev/null +++ b/crates/litchi-core/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "litchi-core-fuzz" +version = "0.0.0" +edition = "2024" +publish = false +authors = ["Ryker Zhu "] +license = "Apache-2.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +litchi-core = { path = "..", features = ["odf"] } + +[[bin]] +name = "detect_format" +path = "fuzz_targets/detect_format.rs" +test = false +doc = false +bench = false + +[profile.release] +debug = 1 +codegen-units = 1 +lto = "thin" + +[workspace] diff --git a/crates/litchi-core/fuzz/fuzz_targets/detect_format.rs b/crates/litchi-core/fuzz/fuzz_targets/detect_format.rs new file mode 100644 index 0000000..87a5d63 --- /dev/null +++ b/crates/litchi-core/fuzz/fuzz_targets/detect_format.rs @@ -0,0 +1,10 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +// Drives raw bytes through litchi-core's format detection. +// Errors are expected on malformed input; we only want to ensure +// the detector does not panic, OOM, or hit UB on arbitrary bytes. +fuzz_target!(|data: &[u8]| { + let _ = litchi_core::detection::odf::detect_odf_format(data); +}); diff --git a/src/common/binary.rs b/crates/litchi-core/src/binary.rs similarity index 95% rename from src/common/binary.rs rename to crates/litchi-core/src/binary.rs index 5365fc0..6182285 100644 --- a/src/common/binary.rs +++ b/crates/litchi-core/src/binary.rs @@ -42,7 +42,7 @@ pub type BinaryResult = Result; /// # Examples /// /// ``` -/// use litchi::common::binary::read_u16_le; +/// use litchi_core::binary::read_u16_le; /// let data = [0x34, 0x12, 0x78, 0x56]; /// assert_eq!(read_u16_le(&data, 0).unwrap(), 0x1234); /// assert_eq!(read_u16_le(&data, 2).unwrap(), 0x5678); @@ -71,7 +71,7 @@ pub fn read_u16_le_at(data: &[u8], offset: usize) -> BinaryResult { /// # Examples /// /// ``` -/// use litchi::common::binary::read_i16_le; +/// use litchi_core::binary::read_i16_le; /// let data = [0xFF, 0xFF]; /// assert_eq!(read_i16_le(&data, 0).unwrap(), -1i16); /// ``` @@ -93,7 +93,7 @@ pub fn read_i16_le(data: &[u8], offset: usize) -> BinaryResult { /// # Examples /// /// ``` -/// use litchi::common::binary::read_u32_le; +/// use litchi_core::binary::read_u32_le; /// let data = [0x78, 0x56, 0x34, 0x12]; /// assert_eq!(read_u32_le(&data, 0).unwrap(), 0x12345678); /// ``` @@ -121,7 +121,7 @@ pub fn read_u32_le_at(data: &[u8], offset: usize) -> BinaryResult { /// # Examples /// /// ``` -/// use litchi::common::binary::read_i32_le; +/// use litchi_core::binary::read_i32_le; /// let data = [0xFF, 0xFF, 0xFF, 0xFF]; /// assert_eq!(read_i32_le(&data, 0).unwrap(), -1i32); /// ``` @@ -143,7 +143,7 @@ pub fn read_i32_le(data: &[u8], offset: usize) -> BinaryResult { /// # Examples /// /// ``` -/// use litchi::common::binary::read_f64_le; +/// use litchi_core::binary::read_f64_le; /// let data = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F]; /// assert!((read_f64_le(&data, 0).unwrap() - 1.0).abs() < f64::EPSILON); /// ``` @@ -174,7 +174,7 @@ pub fn read_f64_le_at(data: &[u8], offset: usize) -> BinaryResult { /// # Examples /// /// ``` -/// use litchi::common::binary::parse_utf16le_string; +/// use litchi_core::binary::parse_utf16le_string; /// let data = vec![0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00, 0x00, 0x00]; /// assert_eq!(parse_utf16le_string(&data), "Hello"); /// ``` @@ -215,7 +215,7 @@ pub fn parse_utf16le_string(data: &[u8]) -> String { /// # Examples /// /// ``` -/// use litchi::common::binary::parse_utf16le_string_len; +/// use litchi_core::binary::parse_utf16le_string_len; /// let data = vec![0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00]; /// assert_eq!(parse_utf16le_string_len(&data, 0, 5), "Hello"); /// assert_eq!(parse_utf16le_string_len(&data, 0, 3), "Hel"); @@ -251,7 +251,7 @@ pub fn parse_utf16le_string_len(data: &[u8], offset: usize, char_count: usize) - /// # Examples /// /// ``` -/// use litchi::common::binary::parse_windows1252_string; +/// use litchi_core::binary::parse_windows1252_string; /// let data = b"Hello\x93World\x94\0"; /// let result = parse_windows1252_string(data); /// assert!(result.starts_with("Hello")); @@ -268,7 +268,7 @@ pub fn parse_windows1252_string(data: &[u8]) -> String { /// # Examples /// /// ``` -/// use litchi::common::binary::parse_windows1252_string_len; +/// use litchi_core::binary::parse_windows1252_string_len; /// let data = b"Hello World"; /// assert_eq!(parse_windows1252_string_len(data, 0, 5), "Hello"); /// ``` diff --git a/src/common/bom.rs b/crates/litchi-core/src/bom.rs similarity index 99% rename from src/common/bom.rs rename to crates/litchi-core/src/bom.rs index 104f9b3..8e59b23 100644 --- a/src/common/bom.rs +++ b/crates/litchi-core/src/bom.rs @@ -3,7 +3,7 @@ //! Provides detection, stripping, and writing helpers for common Unicode //! encodings used in text-based formats. -use crate::common::Result; +use crate::Result; use std::io::{Read, Seek, SeekFrom, Write}; /// Supported BOM encodings. diff --git a/crates/litchi-core/src/detection/mod.rs b/crates/litchi-core/src/detection/mod.rs new file mode 100644 index 0000000..fea9a61 --- /dev/null +++ b/crates/litchi-core/src/detection/mod.rs @@ -0,0 +1,13 @@ +//! File format detection utilities. +//! +//! This module provides fast, safe, and memory-efficient signature-based +//! file format detection. The "smart" detection that opens files via +//! per-format crates lives in the umbrella `litchi` crate (see `litchi::detection_smart`). + +pub mod odf; +pub mod rtf; +pub mod simd_utils; +pub mod types; +pub mod utils; + +pub use types::FileFormat; diff --git a/src/common/detection/odf.rs b/crates/litchi-core/src/detection/odf.rs similarity index 95% rename from src/common/detection/odf.rs rename to crates/litchi-core/src/detection/odf.rs index f1b7821..3779f83 100644 --- a/src/common/detection/odf.rs +++ b/crates/litchi-core/src/detection/odf.rs @@ -8,11 +8,11 @@ //! //! Uses SIMD-accelerated signature matching for improved performance. -use crate::common::detection::FileFormat; +use crate::detection::FileFormat; use std::io::{Read, Seek}; #[cfg(feature = "odf")] -use crate::common::detection::simd_utils::signature_matches; +use crate::detection::simd_utils::signature_matches; /// Standard ODF MIME types for supported document types. const ODT_MIME: &str = "application/vnd.oasis.opendocument.text"; @@ -39,8 +39,8 @@ const ODP_TEMPLATE_MIME: &str = "application/vnd.oasis.opendocument.presentation /// # Examples /// /// ```rust -/// use litchi::common::detection::odf::detect_odf_format_from_mimetype; -/// use litchi::common::detection::FileFormat; +/// use litchi_core::detection::odf::detect_odf_format_from_mimetype; +/// use litchi_core::detection::FileFormat; /// /// let mime = b"application/vnd.oasis.opendocument.text"; /// assert_eq!(detect_odf_format_from_mimetype(mime), Some(FileFormat::Odt)); @@ -82,8 +82,7 @@ pub fn detect_odf_format_from_mimetype(mimetype: &[u8]) -> Option { #[cfg(feature = "odf")] pub fn detect_odf_format(bytes: &[u8]) -> Option { // Quick validation: check ZIP signature using SIMD - if bytes.len() < 4 || !signature_matches(bytes, crate::common::detection::utils::ZIP_SIGNATURE) - { + if bytes.len() < 4 || !signature_matches(bytes, crate::detection::utils::ZIP_SIGNATURE) { return None; } diff --git a/src/common/detection/rtf.rs b/crates/litchi-core/src/detection/rtf.rs similarity index 95% rename from src/common/detection/rtf.rs rename to crates/litchi-core/src/detection/rtf.rs index 13e67bf..0dcdd8b 100644 --- a/src/common/detection/rtf.rs +++ b/crates/litchi-core/src/detection/rtf.rs @@ -5,8 +5,8 @@ //! //! Uses SIMD-accelerated signature matching for improved performance. -use crate::common::detection::FileFormat; -use crate::common::detection::simd_utils::signature_matches; +use crate::detection::FileFormat; +use crate::detection::simd_utils::signature_matches; use std::io::{Read, Seek}; /// RTF signature patterns. @@ -37,7 +37,7 @@ const RTF_SIGNATURE_LEN: usize = 5; /// # Examples /// /// ```rust -/// use litchi::common::detection::rtf::detect_rtf_format; +/// use litchi_core::detection::rtf::detect_rtf_format; /// /// let rtf_data = b"{\\rtf1\\ansi\\deff0 Hello World}"; /// assert!(detect_rtf_format(rtf_data).is_some()); diff --git a/src/common/detection/simd_utils.rs b/crates/litchi-core/src/detection/simd_utils.rs similarity index 97% rename from src/common/detection/simd_utils.rs rename to crates/litchi-core/src/detection/simd_utils.rs index 8a59bc5..2cfc7fd 100644 --- a/src/common/detection/simd_utils.rs +++ b/crates/litchi-core/src/detection/simd_utils.rs @@ -5,7 +5,7 @@ //! //! Uses `smallvec` to avoid heap allocations for common small result sets. -use crate::common::simd::cmp::simd_eq_u8; +use crate::simd::cmp::simd_eq_u8; use smallvec::SmallVec; /// Check if a byte slice starts with a given signature using SIMD acceleration. @@ -26,7 +26,7 @@ use smallvec::SmallVec; /// # Examples /// /// ```rust -/// use litchi::common::detection::simd_utils::signature_matches; +/// use litchi_core::detection::simd_utils::signature_matches; /// /// let ole2_sig = &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; /// let data = &[0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, 0x00, 0x00]; @@ -71,7 +71,7 @@ pub fn signature_matches(data: &[u8], signature: &[u8]) -> bool { /// # Examples /// /// ```rust -/// use litchi::common::detection::simd_utils::signature_matches_any; +/// use litchi_core::detection::simd_utils::signature_matches_any; /// /// let data = &[0x50, 0x4B, 0x03, 0x04]; // ZIP signature /// let signatures = [ @@ -120,7 +120,7 @@ pub fn signature_matches_any(data: &[u8], signatures: &[&[u8]]) -> Option /// # Examples /// /// ```rust -/// use litchi::common::detection::simd_utils::parallel_signature_check; +/// use litchi_core::detection::simd_utils::parallel_signature_check; /// /// let data = &[0x50, 0x4B, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00]; /// let signatures = [ @@ -180,7 +180,7 @@ pub fn parallel_signature_check(data: &[u8], signatures: &[&[u8]]) -> SmallVec<[ /// # Examples /// /// ```rust -/// use litchi::common::detection::simd_utils::{check_office_signatures, FormatSignatureMask}; +/// use litchi_core::detection::simd_utils::{check_office_signatures, FormatSignatureMask}; /// /// let zip_data = &[0x50, 0x4B, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00]; /// let mask = check_office_signatures(zip_data); @@ -197,12 +197,12 @@ pub fn check_office_signatures(data: &[u8]) -> FormatSignatureMask { let mut mask = FormatSignatureMask::empty(); // Check OLE2 signature (8 bytes) - if signature_matches(&data[0..8], crate::common::detection::utils::OLE2_SIGNATURE) { + if signature_matches(&data[0..8], crate::detection::utils::OLE2_SIGNATURE) { mask |= FormatSignatureMask::OLE2; } // Check ZIP signature (4 bytes) - if signature_matches(&data[0..4], crate::common::detection::utils::ZIP_SIGNATURE) { + if signature_matches(&data[0..4], crate::detection::utils::ZIP_SIGNATURE) { mask |= FormatSignatureMask::ZIP; } @@ -305,7 +305,7 @@ impl std::ops::BitOrAssign for FormatSignatureMask { /// # Examples /// /// ```rust -/// use litchi::common::detection::simd_utils::find_pattern; +/// use litchi_core::detection::simd_utils::find_pattern; /// /// let content = b"application/vnd.openxmlformats-officedocument.wordprocessingml.document"; /// let pattern = b"wordprocessingml"; diff --git a/crates/litchi-core/src/detection/types.rs b/crates/litchi-core/src/detection/types.rs new file mode 100644 index 0000000..4c195c4 --- /dev/null +++ b/crates/litchi-core/src/detection/types.rs @@ -0,0 +1,35 @@ +//! File format type enumeration. + +/// Supported file formats that can be detected. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum FileFormat { + /// Microsoft Word Document (OLE2 format, .doc) + Doc, + /// Microsoft Word Document (OOXML format, .docx) + Docx, + /// Microsoft PowerPoint Presentation (OLE2 format, .ppt) + Ppt, + /// Microsoft PowerPoint Presentation (OOXML format, .pptx) + Pptx, + /// Microsoft Excel Spreadsheet (OLE2 format, .xls) + Xls, + /// Microsoft Excel Spreadsheet (OOXML format, .xlsx) + Xlsx, + /// Microsoft Excel Spreadsheet (Binary OOXML format, .xlsb) + Xlsb, + /// Rich Text Format Document (.rtf) + Rtf, + /// Apple Pages Document (.pages) + Pages, + /// Apple Keynote Presentation (.key) + Keynote, + /// Apple Numbers Spreadsheet (.numbers) + Numbers, + /// OpenDocument Text (.odt) + Odt, + /// OpenDocument Spreadsheet (.ods) + Ods, + /// OpenDocument Presentation (.odp) + Odp, +} diff --git a/src/common/detection/utils.rs b/crates/litchi-core/src/detection/utils.rs similarity index 100% rename from src/common/detection/utils.rs rename to crates/litchi-core/src/detection/utils.rs diff --git a/src/common/encoding.rs b/crates/litchi-core/src/encoding.rs similarity index 99% rename from src/common/encoding.rs rename to crates/litchi-core/src/encoding.rs index dcc2c9d..d5e33a2 100644 --- a/src/common/encoding.rs +++ b/crates/litchi-core/src/encoding.rs @@ -4,7 +4,7 @@ //! formats that use codepage-based text encoding (OLE, RTF, etc.). It supports common Windows //! codepages and provides efficient conversion to UTF-8. -use crate::common::{Error, Result}; +use crate::{Error, Result}; use encoding_rs::Encoding; /// Map Windows codepage identifier to encoding_rs Encoding. @@ -25,7 +25,7 @@ use encoding_rs::Encoding; /// /// # Examples /// ``` -/// use litchi::common::encoding::codepage_to_encoding; +/// use litchi_core::encoding::codepage_to_encoding; /// /// let encoding = codepage_to_encoding(936).unwrap(); /// assert_eq!(encoding.name(), "GBK"); @@ -112,7 +112,7 @@ pub fn codepage_to_encoding(codepage: u32) -> Option<&'static Encoding> { /// /// # Examples /// ``` -/// use litchi::common::encoding::decode_utf16le; +/// use litchi_core::encoding::decode_utf16le; /// /// let bytes = b"H\x00e\x00l\x00l\x00o\x00"; /// let text = decode_utf16le(bytes); @@ -168,7 +168,7 @@ pub fn strip_null_terminators(bytes: &[u8]) -> &[u8] { /// # Examples /// /// ``` -/// use litchi::common::encoding::decode_bytes; +/// use litchi_core::encoding::decode_bytes; /// /// // Decode Windows-1252 (Western European) text /// let bytes = b"Hello, World!"; @@ -225,7 +225,7 @@ pub fn decode_bytes(bytes: &[u8], codepage: Option) -> Option { /// # Examples /// /// ``` -/// use litchi::common::encoding::decode_hex_data; +/// use litchi_core::encoding::decode_hex_data; /// /// let hex = "48656C6C6F"; // "Hello" /// let decoded = decode_hex_data(hex).unwrap(); @@ -251,7 +251,7 @@ pub fn decode_hex_data(hex_str: &str) -> Result> { let hex_clean: String = hex_str.chars().filter(|c| !c.is_whitespace()).collect(); // Validate even length - if !hex_clean.len().is_multiple_of(2) { + if hex_clean.len() % 2 != 0 { return Err(Error::ParseError( "Hex data must have even number of characters".to_string(), )); diff --git a/crates/litchi-core/src/error/conversions.rs b/crates/litchi-core/src/error/conversions.rs new file mode 100644 index 0000000..a8adc38 --- /dev/null +++ b/crates/litchi-core/src/error/conversions.rs @@ -0,0 +1,21 @@ +//! Error conversion implementations. +//! +//! This module contains `From` trait implementations for error types whose +//! source crates are reachable from `litchi-core` (quick-xml, soapberry-zip). +//! Conversions for per-format error types (`crate::ole::*`, `crate::ooxml::*`) +//! live in the umbrella crate at `src/error_ext.rs` because their source +//! types are not visible to `litchi-core`. + +use super::types::Error; + +impl From for Error { + fn from(err: quick_xml::Error) -> Self { + Error::XmlError(err.to_string()) + } +} + +impl From for Error { + fn from(err: soapberry_zip::Error) -> Self { + Error::ZipError(err.to_string()) + } +} diff --git a/src/common/error/mod.rs b/crates/litchi-core/src/error/mod.rs similarity index 100% rename from src/common/error/mod.rs rename to crates/litchi-core/src/error/mod.rs diff --git a/crates/litchi-core/src/error/types.rs b/crates/litchi-core/src/error/types.rs new file mode 100644 index 0000000..7090ac5 --- /dev/null +++ b/crates/litchi-core/src/error/types.rs @@ -0,0 +1,68 @@ +//! Unified error types for Litchi library. +//! +//! This module provides a unified error type that encompasses errors from both +//! OLE2 and OOXML parsing, presenting a consistent API to users. +use crate::binary::BinaryError; +use thiserror::Error; + +/// Main error type for Litchi operations. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum Error { + /// IO error + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + /// Parse error occurred + #[error("Parse error: {0}")] + ParseError(String), + + /// Invalid file format + #[error("Invalid format: {0}")] + InvalidFormat(String), + + /// File is not a recognized Office format + #[error("Not a valid Office file")] + NotOfficeFile, + + /// Corrupted or malformed file + #[error("Corrupted file: {0}")] + CorruptedFile(String), + + /// Stream or part not found + #[error("Component not found: {0}")] + ComponentNotFound(String), + + /// XML parsing error + #[error("XML error: {0}")] + XmlError(String), + + /// Invalid content type + #[error("Invalid content type: expected {expected}, got {got}")] + InvalidContentType { expected: String, got: String }, + + /// ZIP archive error + #[error("ZIP error: {0}")] + ZipError(String), + + /// Unsupported feature + #[error("Unsupported feature: {0}")] + Unsupported(String), + + /// Feature disabled at compile time + #[error("Feature '{0}' is disabled. Enable it with --features {0}")] + FeatureDisabled(String), + + /// Generic error + #[error("{0}")] + Other(String), +} + +impl From for Error { + fn from(err: BinaryError) -> Self { + Error::ParseError(format!("Binary parsing error: {}", err)) + } +} + +/// Result type for Litchi operations. +pub type Result = std::result::Result; diff --git a/src/common/id.rs b/crates/litchi-core/src/id.rs similarity index 97% rename from src/common/id.rs rename to crates/litchi-core/src/id.rs index 431edf0..cb761bf 100644 --- a/src/common/id.rs +++ b/crates/litchi-core/src/id.rs @@ -1,4 +1,4 @@ -use crate::common::simd::fmt::hex_encode_to_string; +use crate::simd::fmt::hex_encode_to_string; use rand::RngExt; /// Generate a random RFC4122 v4 GUID as raw 16 bytes diff --git a/crates/litchi-core/src/lib.rs b/crates/litchi-core/src/lib.rs new file mode 100644 index 0000000..163179b --- /dev/null +++ b/crates/litchi-core/src/lib.rs @@ -0,0 +1,49 @@ +// Many items in this crate were previously private inside the umbrella +// `litchi` crate and never had public doc coverage. Tighten this once a +// docs pass lands. +#![allow(missing_docs)] +//! Common types, traits, and utilities shared across formats. +//! +//! This module provides unified types and traits used by both OLE2 (legacy) +//! and OOXML (modern) implementations, ensuring a consistent API for users. + +// Submodule declarations +pub mod binary; +pub mod bom; +pub mod detection; +#[cfg(any(feature = "ole", feature = "rtf"))] +pub mod encoding; +pub mod error; +pub mod metadata; +pub mod shapes; +pub mod sheet; +pub mod simd; +pub mod style; +/// Common unit conversion utilities (length units used across all formats) +pub mod unit; +/// XML utilities +pub mod xml; +/// Shared byte slice for zero-copy element storage across formats +pub mod xml_slice; +// ID generation utilities +pub mod id; + +#[cfg(test)] +mod tests; + +// Re-exports for convenience +pub use bom::{ + BomKind, UTF8_BOM, UTF16_BE_BOM, UTF16_LE_BOM, UTF32_BE_BOM, UTF32_LE_BOM, strip_bom, write_bom, +}; +pub use detection::FileFormat; +pub use error::{Error, Result}; +pub use metadata::Metadata; +pub use shapes::{PlaceholderType, ShapeType}; +pub use style::{Length, RGBColor, VerticalPosition}; +// Unit conversions +pub use unit::LengthUnit; +// Shared slice types — kept `pub` (not `pub(crate)` per spec) because the +// umbrella's docx code uses `XmlSlice` in public type signatures across the +// crate boundary. `#[doc(hidden)]` suppresses public docs surface. +#[doc(hidden)] +pub use xml_slice::{XmlArenaBuilder, XmlSlice}; diff --git a/crates/litchi-core/src/metadata.rs b/crates/litchi-core/src/metadata.rs new file mode 100644 index 0000000..0907b20 --- /dev/null +++ b/crates/litchi-core/src/metadata.rs @@ -0,0 +1,126 @@ +//! Unified metadata representation for Word documents. +//! +//! This module provides a unified interface for document metadata +//! that works with both OLE (.doc) and OOXML (.docx) formats. +//! +//! Format-specific conversions (e.g. `From`) and +//! YAML front-matter serialization (which depends on `serde_saphyr`, a +//! workspace dep not present in `litchi-core`) live in the umbrella crate +//! at `src/metadata_ext.rs`. +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +/// Unified document metadata structure. +/// +/// Contains standard document properties that can be extracted from +/// both OLE and OOXML document formats. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Metadata { + /// Document title + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, + /// Document subject + #[serde(skip_serializing_if = "Option::is_none")] + pub subject: Option, + /// Document author/creator + #[serde(skip_serializing_if = "Option::is_none")] + pub author: Option, + /// Keywords associated with the document + #[serde(skip_serializing_if = "Option::is_none")] + pub keywords: Option, + /// Document description/comments + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Template used to create the document + #[serde(skip_serializing_if = "Option::is_none")] + pub template: Option, + /// Last person to modify the document + #[serde(skip_serializing_if = "Option::is_none")] + pub last_modified_by: Option, + /// Revision number + #[serde(skip_serializing_if = "Option::is_none")] + pub revision: Option, + /// Creation date (Unix timestamp) + #[serde(skip_serializing_if = "Option::is_none")] + pub created: Option>, + /// Last modification date (Unix timestamp) + #[serde(skip_serializing_if = "Option::is_none")] + pub modified: Option>, + /// Number of pages + #[serde(skip_serializing_if = "Option::is_none")] + pub page_count: Option, + /// Number of words + #[serde(skip_serializing_if = "Option::is_none")] + pub word_count: Option, + /// Number of characters + #[serde(skip_serializing_if = "Option::is_none")] + pub character_count: Option, + /// Application that created the document + #[serde(skip_serializing_if = "Option::is_none")] + pub application: Option, + /// Document category + #[serde(skip_serializing_if = "Option::is_none")] + pub category: Option, + /// Company/organization + #[serde(skip_serializing_if = "Option::is_none")] + pub company: Option, + /// Manager name + #[serde(skip_serializing_if = "Option::is_none")] + pub manager: Option, + /// Content status (draft, final, etc.) + #[serde(skip_serializing_if = "Option::is_none")] + pub content_status: Option, + /// Last printed time + #[serde(skip_serializing_if = "Option::is_none")] + pub last_printed_time: Option>, + /// Security level + #[serde(skip_serializing_if = "Option::is_none")] + pub security: Option, + /// Codepage for text encoding + #[serde(skip_serializing_if = "Option::is_none")] + pub codepage: Option, +} + +impl Metadata { + /// Check if the metadata contains any actual data. + /// + /// Returns true if at least one field is populated. + pub fn has_data(&self) -> bool { + self.title.is_some() + || self.subject.is_some() + || self.author.is_some() + || self.keywords.is_some() + || self.description.is_some() + || self.template.is_some() + || self.last_modified_by.is_some() + || self.revision.is_some() + || self.created.is_some() + || self.modified.is_some() + || self.page_count.is_some() + || self.word_count.is_some() + || self.character_count.is_some() + || self.application.is_some() + || self.category.is_some() + || self.company.is_some() + || self.manager.is_some() + || self.security.is_some() + || self.codepage.is_some() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_metadata_has_data() { + let empty_metadata = Metadata::default(); + assert!(!empty_metadata.has_data()); + + let metadata_with_title = Metadata { + title: Some("Test Document".to_string()), + ..Default::default() + }; + assert!(metadata_with_title.has_data()); + } +} diff --git a/src/common/shapes/mod.rs b/crates/litchi-core/src/shapes/mod.rs similarity index 100% rename from src/common/shapes/mod.rs rename to crates/litchi-core/src/shapes/mod.rs diff --git a/crates/litchi-core/src/shapes/types.rs b/crates/litchi-core/src/shapes/types.rs new file mode 100644 index 0000000..f7e6dda --- /dev/null +++ b/crates/litchi-core/src/shapes/types.rs @@ -0,0 +1,114 @@ +//! Common shape types and enumerations. +//! +//! This module provides unified shape types used by both legacy (.ppt) and +//! modern (.pptx) presentation formats. +use std::fmt; + +/// Types of shapes in presentations. +/// +/// This enumeration is used for both legacy .ppt and modern .pptx formats, +/// providing a unified interface for shape type identification. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum ShapeType { + /// Text box or text shape + TextBox, + /// Placeholder shape (title, content, footer, etc.) + Placeholder, + /// Auto shape (rectangle, oval, arrow, etc.) + AutoShape, + /// Picture/image shape + Picture, + /// Group shape (container for other shapes) + Group, + /// Line shape + Line, + /// Connector shape + Connector, + /// Table shape + Table, + /// Graphic frame (chart, SmartArt, etc.) + GraphicFrame, + /// Unknown or unsupported shape type + Unknown, +} + +impl fmt::Display for ShapeType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ShapeType::TextBox => write!(f, "TextBox"), + ShapeType::Placeholder => write!(f, "Placeholder"), + ShapeType::AutoShape => write!(f, "AutoShape"), + ShapeType::Picture => write!(f, "Picture"), + ShapeType::Group => write!(f, "Group"), + ShapeType::Line => write!(f, "Line"), + ShapeType::Connector => write!(f, "Connector"), + ShapeType::Table => write!(f, "Table"), + ShapeType::GraphicFrame => write!(f, "GraphicFrame"), + ShapeType::Unknown => write!(f, "Unknown"), + } + } +} + +/// Placeholder types in presentations. +/// +/// Defines the semantic role of a placeholder shape. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub enum PlaceholderType { + /// Title placeholder + Title, + /// Body/content placeholder + Body, + /// Center title placeholder + CenteredTitle, + /// Subtitle placeholder + Subtitle, + /// Date placeholder + Date, + /// Slide number placeholder + SlideNumber, + /// Footer placeholder + Footer, + /// Header placeholder + Header, + /// Object placeholder (chart, table, etc.) + Object, + /// Chart placeholder + Chart, + /// Table placeholder + Table, + /// Clip art placeholder + ClipArt, + /// Diagram/organization chart placeholder + Diagram, + /// Media placeholder (audio, video) + Media, + /// Picture placeholder + Picture, + /// Unknown placeholder type + Unknown, +} + +impl fmt::Display for PlaceholderType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PlaceholderType::Title => write!(f, "Title"), + PlaceholderType::Body => write!(f, "Body"), + PlaceholderType::CenteredTitle => write!(f, "CenteredTitle"), + PlaceholderType::Subtitle => write!(f, "Subtitle"), + PlaceholderType::Date => write!(f, "Date"), + PlaceholderType::SlideNumber => write!(f, "SlideNumber"), + PlaceholderType::Footer => write!(f, "Footer"), + PlaceholderType::Header => write!(f, "Header"), + PlaceholderType::Object => write!(f, "Object"), + PlaceholderType::Chart => write!(f, "Chart"), + PlaceholderType::Table => write!(f, "Table"), + PlaceholderType::ClipArt => write!(f, "ClipArt"), + PlaceholderType::Diagram => write!(f, "Diagram"), + PlaceholderType::Media => write!(f, "Media"), + PlaceholderType::Picture => write!(f, "Picture"), + PlaceholderType::Unknown => write!(f, "Unknown"), + } + } +} diff --git a/crates/litchi-core/src/sheet/mod.rs b/crates/litchi-core/src/sheet/mod.rs new file mode 100644 index 0000000..ac91b47 --- /dev/null +++ b/crates/litchi-core/src/sheet/mod.rs @@ -0,0 +1,6 @@ +//! Spreadsheet abstraction traits and value types shared across formats. +pub mod traits; +pub mod types; + +pub use traits::{Cell, CellIterator, RowIterator, WorkbookTrait, Worksheet, WorksheetIterator}; +pub use types::{CellValue, Result}; diff --git a/src/sheet/traits.rs b/crates/litchi-core/src/sheet/traits.rs similarity index 100% rename from src/sheet/traits.rs rename to crates/litchi-core/src/sheet/traits.rs diff --git a/src/sheet/types.rs b/crates/litchi-core/src/sheet/types.rs similarity index 100% rename from src/sheet/types.rs rename to crates/litchi-core/src/sheet/types.rs diff --git a/src/common/simd/cmp.rs b/crates/litchi-core/src/simd/cmp.rs similarity index 99% rename from src/common/simd/cmp.rs rename to crates/litchi-core/src/simd/cmp.rs index a748280..67ba995 100644 --- a/src/common/simd/cmp.rs +++ b/crates/litchi-core/src/simd/cmp.rs @@ -20,7 +20,7 @@ //! # Examples //! //! ```rust -//! use litchi::common::simd::cmp::{simd_eq_u8, simd_ne_u8}; +//! use litchi_core::simd::cmp::{simd_eq_u8, simd_ne_u8}; //! //! let a = vec![1u8, 2, 3, 4, 5, 6, 7, 8]; //! let b = vec![1u8, 2, 0, 4, 5, 0, 7, 8]; @@ -2049,7 +2049,7 @@ pub use aarch64_impl::*; /// # Examples /// /// ```rust -/// use litchi::common::simd::cmp::simd_eq_u8; +/// use litchi_core::simd::cmp::simd_eq_u8; /// /// let a = vec![1u8, 2, 3, 4]; /// let b = vec![1u8, 2, 0, 4]; @@ -2106,7 +2106,7 @@ pub fn simd_ne_u8(a: &[u8], b: &[u8], result: &mut [u8]) { /// # Examples /// /// ```rust -/// use litchi::common::simd::cmp::is_all_zero; +/// use litchi_core::simd::cmp::is_all_zero; /// /// let zeros = [0u8; 16]; /// assert!(is_all_zero(&zeros)); diff --git a/src/common/simd/fmt.rs b/crates/litchi-core/src/simd/fmt.rs similarity index 98% rename from src/common/simd/fmt.rs rename to crates/litchi-core/src/simd/fmt.rs index 3d9d3d4..ff95766 100644 --- a/src/common/simd/fmt.rs +++ b/crates/litchi-core/src/simd/fmt.rs @@ -38,7 +38,7 @@ //! # Examples //! //! ```rust -//! use litchi::common::simd::fmt::hex_encode; +//! use litchi_core::simd::fmt::hex_encode; //! //! let data = b"\x01\x23\x45\x67\x89\xAB\xCD\xEF"; //! let hex = hex_encode(data); @@ -67,7 +67,7 @@ const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef"; /// # Examples /// /// ``` -/// use litchi::common::simd::fmt::hex_encode; +/// use litchi_core::simd::fmt::hex_encode; /// /// let data = b"\xDE\xAD\xBE\xEF"; /// assert_eq!(hex_encode(data), "DEADBEEF"); @@ -84,7 +84,7 @@ pub fn hex_encode(bytes: &[u8]) -> String { /// # Examples /// /// ``` -/// use litchi::common::simd::fmt::hex_encode_lower; +/// use litchi_core::simd::fmt::hex_encode_lower; /// /// let data = b"\xDE\xAD\xBE\xEF"; /// assert_eq!(hex_encode_lower(data), "deadbeef"); @@ -562,7 +562,7 @@ unsafe fn hex_encode_sve2(bytes: &[u8], output: &mut String, lowercase: bool) { /// # Examples /// /// ``` -/// use litchi::common::simd::fmt::format_hex_with_separator; +/// use litchi_core::simd::fmt::format_hex_with_separator; /// /// let data = b"\x01\x23\x45\x67"; /// assert_eq!(format_hex_with_separator(data, ":"), "01:23:45:67"); @@ -606,7 +606,7 @@ pub fn format_hex_with_separator(bytes: &[u8], separator: &str) -> String { /// # Examples /// /// ``` -/// use litchi::hex_fmt; +/// use litchi_core::hex_fmt; /// /// let hex = hex_fmt!(b"\xDE\xAD\xBE\xEF"); /// assert_eq!(hex, "DEADBEEF"); @@ -614,7 +614,7 @@ pub fn format_hex_with_separator(bytes: &[u8], separator: &str) -> String { #[macro_export] macro_rules! hex_fmt { ($bytes:expr) => { - $crate::common::simd::fmt::hex_encode($bytes) + $crate::simd::fmt::hex_encode($bytes) }; } @@ -623,7 +623,7 @@ macro_rules! hex_fmt { /// # Examples /// /// ``` -/// use litchi::hex_fmt_lower; +/// use litchi_core::hex_fmt_lower; /// /// let hex = hex_fmt_lower!(b"\xDE\xAD\xBE\xEF"); /// assert_eq!(hex, "deadbeef"); @@ -631,7 +631,7 @@ macro_rules! hex_fmt { #[macro_export] macro_rules! hex_fmt_lower { ($bytes:expr) => { - $crate::common::simd::fmt::hex_encode_lower($bytes) + $crate::simd::fmt::hex_encode_lower($bytes) }; } diff --git a/crates/litchi-core/src/simd/mod.rs b/crates/litchi-core/src/simd/mod.rs new file mode 100644 index 0000000..4cbcb69 --- /dev/null +++ b/crates/litchi-core/src/simd/mod.rs @@ -0,0 +1,101 @@ +//! Common SIMD operations +//! +//! This module provides high-performance SIMD (Single Instruction, Multiple Data) operations +//! optimized for various CPU architectures and instruction sets. +//! +//! # Supported Architectures +//! +//! ## x86_64 +//! - **SSE** (Streaming SIMD Extensions): 128-bit vectors +//! - **SSE2**: Enhanced 128-bit integer operations +//! - **SSE3**: Additional 128-bit operations +//! - **SSSE3**: Supplemental 128-bit operations +//! - **SSE4.1**: 128-bit operations with additional instructions +//! - **SSE4.2**: 128-bit operations with string/text processing +//! - **AVX** (Advanced Vector Extensions): 256-bit floating-point operations +//! - **AVX2**: 256-bit integer operations +//! - **AVX-512**: 512-bit operations (F, BW, DQ, VL extensions) +//! +//! ## aarch64 (ARM) +//! - **NEON**: Fixed 128-bit SIMD operations (always available on aarch64) +//! - **SVE** (Scalable Vector Extension): Variable-length vectors (128-2048 bits) +//! - **SVE2**: Enhanced SVE with additional DSP and multimedia operations +//! +//! # Modules +//! +//! - [`cmp`]: Vector comparison operations (equal, not equal, greater than, less than, etc.) +//! - [`fmt`]: SIMD-optimized formatting operations (hex encoding, GUID/CLSID formatting) +//! +//! # Performance Considerations +//! +//! This module is designed with performance as the top priority: +//! +//! - **Runtime Feature Detection**: Automatically selects the best available instruction set +//! - **Zero-Copy Operations**: Leverages Rust's ownership system to avoid unnecessary allocations +//! - **Inline Functions**: All hot-path functions are marked `#[inline]` for optimal performance +//! - **Cache-Friendly**: Operations are designed to maximize CPU cache utilization +//! - **Minimal Overhead**: Direct mapping to hardware instructions where possible +//! +//! # Examples +//! +//! ```rust +//! use litchi_core::simd::cmp::simd_eq_u8; +//! +//! // Compare two byte arrays for equality +//! let a = vec![1u8, 2, 3, 4, 5, 6, 7, 8]; +//! let b = vec![1u8, 2, 0, 4, 5, 0, 7, 8]; +//! let mut result = vec![0u8; 8]; +//! +//! simd_eq_u8(&a, &b, &mut result); +//! // result[i] is 0xFF where a[i] == b[i], 0x00 otherwise +//! ``` +//! +//! # SVE/SVE2 Features +//! +//! ARM's Scalable Vector Extension (SVE) and SVE2 provide unique capabilities: +//! +//! ## SVE Key Features +//! +//! - **Scalable**: Vector length determined at runtime (128-2048 bits in 128-bit increments) +//! - **Predicated**: All operations use predicate masks for efficient conditional execution +//! - **Loop-friendly**: `svwhilelt` and similar instructions simplify vectorized loops +//! - **Future-proof**: Same code automatically leverages larger vectors on newer hardware +//! +//! ## SVE2 Additions +//! +//! SVE2 extends SVE with instructions for: +//! - DSP operations (saturating arithmetic, complex numbers) +//! - Multimedia processing (polynomial math, CRC) +//! - Bit manipulation (population count, bit permutations) +//! - Table operations and histogram processing +//! +//! ## Availability +//! +//! - **SVE**: Available on some ARMv8.2-A+ processors (e.g., Fujitsu A64FX, AWS Graviton3) +//! - **SVE2**: Available on ARMv9-A processors (e.g., Apple M4, AWS Graviton4) +//! - **Detection**: Compile with `+sve` or `+sve2` target features, or check HWCAP at runtime +//! +//! # Safety +//! +//! Functions using SIMD intrinsics are marked as `unsafe` when they require specific CPU features. +//! High-level API functions perform runtime feature detection to ensure safety across different CPUs. +//! +//! When using low-level intrinsics directly, ensure the target CPU supports the required features +//! either through: +//! - Runtime detection with `is_x86_feature_detected!()` on x86_64 +//! - Compile-time target features: `#[target_feature(enable = "avx2")]` or `#[target_feature(enable = "sve")]` +//! - Compiler flags: `RUSTFLAGS="-C target-feature=+avx2"` or `RUSTFLAGS="-C target-feature=+sve"` +//! +//! ## Example: Compiling with SVE Support +//! +//! ```bash +//! # For SVE +//! RUSTFLAGS="-C target-cpu=native -C target-feature=+sve" cargo build --release +//! +//! # For SVE2 +//! RUSTFLAGS="-C target-cpu=native -C target-feature=+sve2" cargo build --release +//! ``` + +pub mod cmp; +pub mod fmt; +pub mod xor; diff --git a/src/common/simd/xor.rs b/crates/litchi-core/src/simd/xor.rs similarity index 100% rename from src/common/simd/xor.rs rename to crates/litchi-core/src/simd/xor.rs diff --git a/src/common/style/color.rs b/crates/litchi-core/src/style/color.rs similarity index 97% rename from src/common/style/color.rs rename to crates/litchi-core/src/style/color.rs index 0e3c4e5..5d195cc 100644 --- a/src/common/style/color.rs +++ b/crates/litchi-core/src/style/color.rs @@ -10,7 +10,7 @@ use std::str::FromStr; /// # Examples /// /// ```rust -/// use litchi::common::RGBColor; +/// use litchi_core::RGBColor; /// /// // Create a red color /// let red = RGBColor::new(255, 0, 0); @@ -44,7 +44,7 @@ impl RGBColor { /// # Examples /// /// ```rust - /// use litchi::common::RGBColor; + /// use litchi_core::RGBColor; /// /// let color = RGBColor::new(255, 128, 0); // Orange /// ``` @@ -64,7 +64,7 @@ impl RGBColor { /// # Examples /// /// ```rust - /// use litchi::common::RGBColor; + /// use litchi_core::RGBColor; /// /// let red = RGBColor::from_hex("FF0000").unwrap(); /// let blue = RGBColor::from_hex("#0000FF").unwrap(); @@ -113,7 +113,7 @@ impl RGBColor { /// # Examples /// /// ```rust - /// use litchi::common::RGBColor; + /// use litchi_core::RGBColor; /// /// let red = RGBColor::from_name("red").unwrap(); /// let blue = RGBColor::from_name("blue").unwrap(); @@ -160,7 +160,7 @@ impl RGBColor { /// # Examples /// /// ```rust - /// use litchi::common::RGBColor; + /// use litchi_core::RGBColor; /// /// let color = RGBColor::new(255, 0, 0); /// assert_eq!(color.to_hex(), "FF0000"); @@ -174,7 +174,7 @@ impl RGBColor { /// # Examples /// /// ```rust - /// use litchi::common::RGBColor; + /// use litchi_core::RGBColor; /// /// let color = RGBColor::new(255, 0, 0); /// assert_eq!(color.to_hex_string(), "#ff0000"); @@ -237,7 +237,7 @@ impl FromStr for RGBColor { /// # Examples /// /// ```rust - /// use litchi::common::RGBColor; + /// use litchi_core::RGBColor; /// /// let color: RGBColor = "#ff0000".parse().unwrap(); /// let color2: RGBColor = "red".parse().unwrap(); diff --git a/src/common/style/len.rs b/crates/litchi-core/src/style/len.rs similarity index 92% rename from src/common/style/len.rs rename to crates/litchi-core/src/style/len.rs index 66b787a..d42eefd 100644 --- a/src/common/style/len.rs +++ b/crates/litchi-core/src/style/len.rs @@ -1,6 +1,6 @@ use std::fmt; -use crate::common::unit::{EMUS_PER_CM, EMUS_PER_INCH}; +use crate::unit::{EMUS_PER_CM, EMUS_PER_INCH}; /// Length measurement with units. /// @@ -10,7 +10,7 @@ use crate::common::unit::{EMUS_PER_CM, EMUS_PER_INCH}; /// # Examples /// /// ```rust -/// use litchi::common::Length; +/// use litchi_core::Length; /// /// // Create from EMUs /// let length = Length::from_emus(914400); // 1 inch @@ -37,7 +37,7 @@ impl Length { /// # Examples /// /// ```rust - /// use litchi::common::Length; + /// use litchi_core::Length; /// /// let length = Length::from_emus(914400); // 1 inch /// ``` @@ -51,7 +51,7 @@ impl Length { /// # Examples /// /// ```rust - /// use litchi::common::Length; + /// use litchi_core::Length; /// /// let length = Length::from_inches(1.0); /// ``` @@ -67,7 +67,7 @@ impl Length { /// # Examples /// /// ```rust - /// use litchi::common::Length; + /// use litchi_core::Length; /// /// let length = Length::from_cm(2.54); // ~1 inch /// ``` diff --git a/src/common/style/mod.rs b/crates/litchi-core/src/style/mod.rs similarity index 100% rename from src/common/style/mod.rs rename to crates/litchi-core/src/style/mod.rs diff --git a/src/common/style/text/mod.rs b/crates/litchi-core/src/style/text/mod.rs similarity index 100% rename from src/common/style/text/mod.rs rename to crates/litchi-core/src/style/text/mod.rs diff --git a/src/common/style/text/pos.rs b/crates/litchi-core/src/style/text/pos.rs similarity index 100% rename from src/common/style/text/pos.rs rename to crates/litchi-core/src/style/text/pos.rs diff --git a/src/common/unit.rs b/crates/litchi-core/src/unit.rs similarity index 98% rename from src/common/unit.rs rename to crates/litchi-core/src/unit.rs index 8c5980b..4b7ebf2 100644 --- a/src/common/unit.rs +++ b/crates/litchi-core/src/unit.rs @@ -165,7 +165,7 @@ impl fmt::Display for LengthUnit { /// # Examples /// /// ``` -/// use litchi::common::unit::{Length, LengthUnit}; +/// use litchi_core::unit::{Length, LengthUnit}; /// /// // Parse from string /// let length = "2.5cm".parse::().unwrap(); @@ -223,7 +223,7 @@ impl Length { /// # Examples /// /// ``` - /// use litchi::common::unit::{Length, LengthUnit}; + /// use litchi_core::unit::{Length, LengthUnit}; /// /// let inch = Length::new(1.0, LengthUnit::Inch); /// let pixels = inch.to_pixels(96).unwrap(); @@ -257,7 +257,7 @@ impl Length { /// # Examples /// /// ``` - /// use litchi::common::unit::{Length, LengthUnit}; + /// use litchi_core::unit::{Length, LengthUnit}; /// /// let cm = Length::new(2.54, LengthUnit::Centimeter); /// let inches = cm.to_inches().unwrap(); @@ -289,7 +289,7 @@ impl Length { /// # Examples /// /// ``` - /// use litchi::common::unit::{Length, LengthUnit}; + /// use litchi_core::unit::{Length, LengthUnit}; /// /// let inch = Length::new(1.0, LengthUnit::Inch); /// let cm = inch.to_centimeters().unwrap(); @@ -325,7 +325,7 @@ impl FromStr for Length { /// # Examples /// /// ``` - /// use litchi::common::unit::{Length, LengthUnit}; + /// use litchi_core::unit::{Length, LengthUnit}; /// /// let length = "2.5cm".parse::().unwrap(); /// assert_eq!(length.value(), 2.5); diff --git a/src/common/xml/escape.rs b/crates/litchi-core/src/xml/escape.rs similarity index 95% rename from src/common/xml/escape.rs rename to crates/litchi-core/src/xml/escape.rs index 46cbf71..ae02948 100644 --- a/src/common/xml/escape.rs +++ b/crates/litchi-core/src/xml/escape.rs @@ -21,7 +21,7 @@ static XML_UNESCAPER: Lazy = Lazy::new(|| { /// # Examples /// /// ``` -/// use litchi::common::xml::escape_xml; +/// use litchi_core::xml::escape_xml; /// assert_eq!(escape_xml("a & b"), "a & b"); /// assert_eq!(escape_xml("\"hello\""), "<tag>"hello"</tag>"); /// ``` @@ -38,7 +38,7 @@ pub fn escape_xml(s: &str) -> String { /// # Examples /// /// ``` -/// use litchi::common::xml::unescape_xml; +/// use litchi_core::xml::unescape_xml; /// assert_eq!(unescape_xml("<a & b>"), ""); /// assert_eq!(unescape_xml(""hello'"), "\"hello'"); /// assert_eq!(unescape_xml("&lt;"), "<"); // & is matched first diff --git a/src/common/xml/mod.rs b/crates/litchi-core/src/xml/mod.rs similarity index 100% rename from src/common/xml/mod.rs rename to crates/litchi-core/src/xml/mod.rs diff --git a/src/common/xml_slice.rs b/crates/litchi-core/src/xml_slice.rs similarity index 99% rename from src/common/xml_slice.rs rename to crates/litchi-core/src/xml_slice.rs index 1d674d1..803fc9b 100644 --- a/src/common/xml_slice.rs +++ b/crates/litchi-core/src/xml_slice.rs @@ -20,6 +20,7 @@ use std::sync::Arc; /// - No per-element heap allocation - all data lives in the shared arena /// - Memory-efficient: 24 bytes per slice (Arc pointer + start + len) #[derive(Debug, Clone)] +#[doc(hidden)] pub struct XmlSlice { /// Shared reference to the arena buffer containing all data arena: Arc>, @@ -74,6 +75,7 @@ impl XmlSlice { /// This collects all data into a contiguous buffer during parsing, /// then converts to a shared Arc for creating slices. #[derive(Debug)] +#[doc(hidden)] pub struct XmlArenaBuilder { /// The buffer being built buffer: Vec, diff --git a/crates/litchi-eval/Cargo.toml b/crates/litchi-eval/Cargo.toml new file mode 100644 index 0000000..65c66e9 --- /dev/null +++ b/crates/litchi-eval/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "litchi-eval" +description = "Formula evaluator engine for Litchi spreadsheets." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[features] +default = [] +web_functions = ["dep:urlencoding", "dep:reqwest", "dep:sxd-document", "dep:sxd-xpath"] + +[dependencies] +chrono = { workspace = true } +litchi-core = { workspace = true } +num-complex = { workspace = true } +once_cell = { workspace = true } +phf = { workspace = true } +rand = { workspace = true } +reqwest = { workspace = true, optional = true } +statrs = { workspace = true } +sxd-document = { workspace = true, optional = true } +sxd-xpath = { workspace = true, optional = true } +tokio = { workspace = true } +urlencoding = { workspace = true, optional = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/crates/litchi-eval/README.md b/crates/litchi-eval/README.md new file mode 100644 index 0000000..fe0e33c --- /dev/null +++ b/crates/litchi-eval/README.md @@ -0,0 +1,37 @@ +# litchi-eval + +Spreadsheet formula evaluation engine for the Litchi office-formats library. + +## Overview + +`litchi-eval` is the engine behind `=SUM(A1:A10)` and friends in `.xlsx`, `.xlsb`, `.ods`, and `.numbers` workbooks parsed by Litchi. It operates on top of `litchi-core`'s `WorkbookTrait`, prefers cached results embedded in files when present, and falls back to evaluating the formula text. It is used through the `eval_engine` feature of the `litchi` umbrella crate. + +## Usage + +```toml +[dependencies] +litchi-eval = "0.0.1" +``` + +```rust +use litchi_eval::FormulaEvaluator; +use litchi_core::sheet::WorkbookTrait; + +async fn sum_a1(workbook: &impl WorkbookTrait) -> litchi_core::sheet::Result<()> { + let evaluator = FormulaEvaluator::new(workbook); + let value = evaluator.evaluate_cell("Sheet1", 1, 1).await?; + println!("A1 = {:?}", value); + Ok(()) +} +``` + +## Features + +- `FormulaEvaluator::evaluate_cell` and `evaluate_sheet` for per-cell or whole-sheet evaluation. +- Defined names and Excel-style structured table references via `define_name`, `define_name_local`, `define_table`. +- Circular-reference detection and result caching. +- Optional `web_functions` feature gates network-bound functions (uses `reqwest`). + +## License + +Licensed under the Apache License, Version 2.0. Part of the [Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-eval/examples/evaluate_simple.rs b/crates/litchi-eval/examples/evaluate_simple.rs new file mode 100644 index 0000000..ce4cdfe --- /dev/null +++ b/crates/litchi-eval/examples/evaluate_simple.rs @@ -0,0 +1,354 @@ +//! Simple in-memory formula evaluation example for `litchi-eval`. +//! +//! This example shows how to plug a minimal `WorkbookTrait` implementation +//! into [`FormulaEvaluator`], evaluate individual cells (including formulas +//! that rely on cached results), and register a global named range with +//! [`FormulaEvaluator::define_name`]. +//! +//! Run with: +//! +//! ```sh +//! cargo run -p litchi-eval --example evaluate_simple --all-features +//! ``` + +use std::borrow::Cow; +use std::collections::HashMap; + +use litchi_core::sheet::{ + Cell, CellIterator, CellValue, Result, RowIterator, WorkbookTrait, Worksheet, WorksheetIterator, +}; +use litchi_eval::FormulaEvaluator; + +// --------------------------------------------------------------------------- +// Minimal in-memory workbook implementation +// --------------------------------------------------------------------------- + +/// Cell coordinate keyed as `(row, col)`, both 1-based. +type Coord = (u32, u32); + +#[derive(Debug)] +struct MemSheet { + name: String, + cells: HashMap, + dimensions: Option<(u32, u32, u32, u32)>, +} + +impl MemSheet { + fn new(name: impl Into) -> Self { + Self { + name: name.into(), + cells: HashMap::new(), + dimensions: None, + } + } + + fn set(&mut self, row: u32, col: u32, value: CellValue) { + self.cells.insert((row, col), value); + self.dimensions = Some(match self.dimensions { + None => (row, col, row, col), + Some((min_r, min_c, max_r, max_c)) => ( + min_r.min(row), + min_c.min(col), + max_r.max(row), + max_c.max(col), + ), + }); + } +} + +#[derive(Debug)] +struct MemWorkbook { + sheets: Vec, + sheet_names: Vec, +} + +impl MemWorkbook { + fn new() -> Self { + Self { + sheets: Vec::new(), + sheet_names: Vec::new(), + } + } + + fn add_sheet(&mut self, sheet: MemSheet) { + self.sheet_names.push(sheet.name.clone()); + self.sheets.push(sheet); + } +} + +// --- Cell impl -------------------------------------------------------------- + +struct MemCell<'a> { + row: u32, + column: u32, + value: &'a CellValue, +} + +impl<'a> Cell for MemCell<'a> { + fn row(&self) -> u32 { + self.row + } + + fn column(&self) -> u32 { + self.column + } + + fn coordinate(&self) -> String { + format!("{}{}", column_letter(self.column), self.row) + } + + fn value(&self) -> &CellValue { + self.value + } + + fn is_formula(&self) -> bool { + matches!(self.value, CellValue::Formula { .. }) + } +} + +fn column_letter(mut col: u32) -> String { + let mut buf = Vec::new(); + while col > 0 { + let rem = (col - 1) % 26; + buf.push(b'A' + rem as u8); + col = (col - 1) / 26; + } + buf.reverse(); + String::from_utf8(buf).unwrap_or_default() +} + +// --- empty iterators ------------------------------------------------------- + +struct EmptyCellIter; +impl<'a> CellIterator<'a> for EmptyCellIter { + fn next(&mut self) -> Option>> { + None + } +} + +struct EmptyRowIter; +impl<'a> RowIterator<'a> for EmptyRowIter { + fn next(&mut self) -> Option>> { + None + } +} + +struct EmptyWorksheetIter; +impl<'a> WorksheetIterator<'a> for EmptyWorksheetIter { + fn next(&mut self) -> Option>> { + None + } +} + +// --- Worksheet impl -------------------------------------------------------- + +impl Worksheet for MemSheet { + fn name(&self) -> &str { + &self.name + } + + fn row_count(&self) -> usize { + self.dimensions.map(|(_, _, r, _)| r as usize).unwrap_or(0) + } + + fn column_count(&self) -> usize { + self.dimensions.map(|(_, _, _, c)| c as usize).unwrap_or(0) + } + + fn dimensions(&self) -> Option<(u32, u32, u32, u32)> { + self.dimensions + } + + fn cell(&self, row: u32, column: u32) -> Result> { + let value = self.cells.get(&(row, column)).unwrap_or(CellValue::EMPTY); + Ok(Box::new(MemCell { row, column, value })) + } + + fn cell_by_coordinate(&self, _coordinate: &str) -> Result> { + Err("cell_by_coordinate is not implemented for MemSheet".into()) + } + + fn cells(&self) -> Box + '_> { + Box::new(EmptyCellIter) + } + + fn rows(&self) -> Box + '_> { + Box::new(EmptyRowIter) + } + + fn row(&self, _row_idx: usize) -> Result> { + Ok(Cow::Owned(Vec::new())) + } + + fn cell_value(&self, row: u32, column: u32) -> Result> { + Ok(self + .cells + .get(&(row, column)) + .map(Cow::Borrowed) + .unwrap_or(Cow::Borrowed(CellValue::EMPTY))) + } +} + +// --- Workbook impl --------------------------------------------------------- + +impl WorkbookTrait for MemWorkbook { + fn active_worksheet(&self) -> Result> { + self.worksheet_by_index(0) + } + + fn worksheet_names(&self) -> &[String] { + &self.sheet_names + } + + fn worksheet_by_name(&self, name: &str) -> Result> { + let sheet = self + .sheets + .iter() + .find(|s| s.name == name) + .ok_or_else(|| format!("worksheet '{name}' not found"))?; + Ok(Box::new(SheetRef { inner: sheet })) + } + + fn worksheet_by_index(&self, index: usize) -> Result> { + let sheet = self + .sheets + .get(index) + .ok_or_else(|| format!("worksheet index {index} out of range"))?; + Ok(Box::new(SheetRef { inner: sheet })) + } + + fn worksheets(&self) -> Box + '_> { + Box::new(EmptyWorksheetIter) + } + + fn worksheet_count(&self) -> usize { + self.sheets.len() + } + + fn active_sheet_index(&self) -> usize { + 0 + } +} + +/// Borrowed wrapper so we can return `Box` without consuming +/// the underlying `MemSheet`. +struct SheetRef<'a> { + inner: &'a MemSheet, +} + +impl<'a> Worksheet for SheetRef<'a> { + fn name(&self) -> &str { + self.inner.name() + } + + fn row_count(&self) -> usize { + self.inner.row_count() + } + + fn column_count(&self) -> usize { + self.inner.column_count() + } + + fn dimensions(&self) -> Option<(u32, u32, u32, u32)> { + self.inner.dimensions() + } + + fn cell(&self, row: u32, column: u32) -> Result> { + self.inner.cell(row, column) + } + + fn cell_by_coordinate(&self, coordinate: &str) -> Result> { + self.inner.cell_by_coordinate(coordinate) + } + + fn cells(&self) -> Box + '_> { + self.inner.cells() + } + + fn rows(&self) -> Box + '_> { + self.inner.rows() + } + + fn row(&self, row_idx: usize) -> Result> { + self.inner.row(row_idx) + } + + fn cell_value(&self, row: u32, column: u32) -> Result> { + self.inner.cell_value(row, column) + } +} + +// --------------------------------------------------------------------------- +// Demo +// --------------------------------------------------------------------------- + +#[tokio::main] +async fn main() -> std::result::Result<(), Box> { + // Build a workbook with a single sheet: + // A1 = 10 (Int) + // A2 = 20 (Int) + // A3 = =SUM(A1:A2) with cached value 30 + // B1 = "hello" (string) + // B2 = 45292.5 (DateTime serial: 2023-12-31 12:00) + let mut sheet = MemSheet::new("Sheet1"); + sheet.set(1, 1, CellValue::Int(10)); + sheet.set(2, 1, CellValue::Int(20)); + sheet.set( + 3, + 1, + CellValue::Formula { + formula: "SUM(A1:A2)".to_string(), + cached_value: Some(Box::new(CellValue::Int(30))), + is_array: false, + array_range: None, + }, + ); + sheet.set(1, 2, CellValue::String("hello".to_string())); + sheet.set(2, 2, CellValue::DateTime(45292.5)); + + // A formula referring to a named range we will register on the evaluator. + sheet.set( + 4, + 1, + CellValue::Formula { + formula: "SUM(MyRange)".to_string(), + // No cached value: forces actual evaluation through `define_name`. + cached_value: None, + is_array: false, + array_range: None, + }, + ); + + let mut workbook = MemWorkbook::new(); + workbook.add_sheet(sheet); + + // Build the evaluator and register a workbook-scoped name. + let mut evaluator = FormulaEvaluator::new(&workbook); + evaluator.define_name("MyRange", "Sheet1!A1:A2"); + + // Evaluate a few specific cells. + println!("== Single cell evaluations =="); + let a1 = evaluator.evaluate_cell("Sheet1", 1, 1).await?; + println!("A1 (literal int) = {:?}", a1); + + let a3 = evaluator.evaluate_cell("Sheet1", 3, 1).await?; + println!("A3 (=SUM(A1:A2)) = {:?}", a3); + + let a4 = evaluator.evaluate_cell("Sheet1", 4, 1).await?; + println!("A4 (=SUM(MyRange)) = {:?}", a4); + + let b1 = evaluator.evaluate_cell("Sheet1", 1, 2).await?; + println!("B1 (string) = {:?}", b1); + + let b2 = evaluator.evaluate_cell("Sheet1", 2, 2).await?; + println!("B2 (datetime serial) = {:?}", b2); + + // Evaluate the entire sheet as a dense grid. + println!("\n== Full-sheet evaluation =="); + let grid = evaluator.evaluate_sheet("Sheet1").await?; + for (i, row) in grid.iter().enumerate() { + println!("row {}: {:?}", i + 1, row); + } + + Ok(()) +} diff --git a/crates/litchi-eval/examples/evaluate_with_table.rs b/crates/litchi-eval/examples/evaluate_with_table.rs new file mode 100644 index 0000000..b385d7c --- /dev/null +++ b/crates/litchi-eval/examples/evaluate_with_table.rs @@ -0,0 +1,349 @@ +//! Structured-table reference example for `litchi-eval`. +//! +//! Demonstrates [`FormulaEvaluator::define_table`] together with a +//! [`TableConfig`] so structured references (e.g. `Sales[Qty]`) resolve +//! against a region of an in-memory workbook. +//! +//! Run with: +//! +//! ```sh +//! cargo run -p litchi-eval --example evaluate_with_table --all-features +//! ``` + +use std::borrow::Cow; +use std::collections::HashMap; + +use litchi_core::sheet::{ + Cell, CellIterator, CellValue, Result, RowIterator, WorkbookTrait, Worksheet, WorksheetIterator, +}; +use litchi_eval::{FormulaEvaluator, TableConfig}; + +// --------------------------------------------------------------------------- +// Minimal in-memory workbook (same shape as the simple example) +// --------------------------------------------------------------------------- + +type Coord = (u32, u32); + +#[derive(Debug)] +struct MemSheet { + name: String, + cells: HashMap, + dimensions: Option<(u32, u32, u32, u32)>, +} + +impl MemSheet { + fn new(name: impl Into) -> Self { + Self { + name: name.into(), + cells: HashMap::new(), + dimensions: None, + } + } + + fn set(&mut self, row: u32, col: u32, value: CellValue) { + self.cells.insert((row, col), value); + self.dimensions = Some(match self.dimensions { + None => (row, col, row, col), + Some((min_r, min_c, max_r, max_c)) => ( + min_r.min(row), + min_c.min(col), + max_r.max(row), + max_c.max(col), + ), + }); + } +} + +#[derive(Debug)] +struct MemWorkbook { + sheets: Vec, + sheet_names: Vec, +} + +impl MemWorkbook { + fn new() -> Self { + Self { + sheets: Vec::new(), + sheet_names: Vec::new(), + } + } + + fn add_sheet(&mut self, sheet: MemSheet) { + self.sheet_names.push(sheet.name.clone()); + self.sheets.push(sheet); + } +} + +struct MemCell<'a> { + row: u32, + column: u32, + value: &'a CellValue, +} + +impl<'a> Cell for MemCell<'a> { + fn row(&self) -> u32 { + self.row + } + + fn column(&self) -> u32 { + self.column + } + + fn coordinate(&self) -> String { + format!("R{}C{}", self.row, self.column) + } + + fn value(&self) -> &CellValue { + self.value + } + + fn is_formula(&self) -> bool { + matches!(self.value, CellValue::Formula { .. }) + } +} + +struct EmptyCellIter; +impl<'a> CellIterator<'a> for EmptyCellIter { + fn next(&mut self) -> Option>> { + None + } +} + +struct EmptyRowIter; +impl<'a> RowIterator<'a> for EmptyRowIter { + fn next(&mut self) -> Option>> { + None + } +} + +struct EmptyWorksheetIter; +impl<'a> WorksheetIterator<'a> for EmptyWorksheetIter { + fn next(&mut self) -> Option>> { + None + } +} + +impl Worksheet for MemSheet { + fn name(&self) -> &str { + &self.name + } + + fn row_count(&self) -> usize { + self.dimensions.map(|(_, _, r, _)| r as usize).unwrap_or(0) + } + + fn column_count(&self) -> usize { + self.dimensions.map(|(_, _, _, c)| c as usize).unwrap_or(0) + } + + fn dimensions(&self) -> Option<(u32, u32, u32, u32)> { + self.dimensions + } + + fn cell(&self, row: u32, column: u32) -> Result> { + let value = self.cells.get(&(row, column)).unwrap_or(CellValue::EMPTY); + Ok(Box::new(MemCell { row, column, value })) + } + + fn cell_by_coordinate(&self, _coordinate: &str) -> Result> { + Err("cell_by_coordinate is not implemented for MemSheet".into()) + } + + fn cells(&self) -> Box + '_> { + Box::new(EmptyCellIter) + } + + fn rows(&self) -> Box + '_> { + Box::new(EmptyRowIter) + } + + fn row(&self, _row_idx: usize) -> Result> { + Ok(Cow::Owned(Vec::new())) + } + + fn cell_value(&self, row: u32, column: u32) -> Result> { + Ok(self + .cells + .get(&(row, column)) + .map(Cow::Borrowed) + .unwrap_or(Cow::Borrowed(CellValue::EMPTY))) + } +} + +impl WorkbookTrait for MemWorkbook { + fn active_worksheet(&self) -> Result> { + self.worksheet_by_index(0) + } + + fn worksheet_names(&self) -> &[String] { + &self.sheet_names + } + + fn worksheet_by_name(&self, name: &str) -> Result> { + let sheet = self + .sheets + .iter() + .find(|s| s.name == name) + .ok_or_else(|| format!("worksheet '{name}' not found"))?; + Ok(Box::new(SheetRef { inner: sheet })) + } + + fn worksheet_by_index(&self, index: usize) -> Result> { + let sheet = self + .sheets + .get(index) + .ok_or_else(|| format!("worksheet index {index} out of range"))?; + Ok(Box::new(SheetRef { inner: sheet })) + } + + fn worksheets(&self) -> Box + '_> { + Box::new(EmptyWorksheetIter) + } + + fn worksheet_count(&self) -> usize { + self.sheets.len() + } + + fn active_sheet_index(&self) -> usize { + 0 + } +} + +struct SheetRef<'a> { + inner: &'a MemSheet, +} + +impl<'a> Worksheet for SheetRef<'a> { + fn name(&self) -> &str { + self.inner.name() + } + + fn row_count(&self) -> usize { + self.inner.row_count() + } + + fn column_count(&self) -> usize { + self.inner.column_count() + } + + fn dimensions(&self) -> Option<(u32, u32, u32, u32)> { + self.inner.dimensions() + } + + fn cell(&self, row: u32, column: u32) -> Result> { + self.inner.cell(row, column) + } + + fn cell_by_coordinate(&self, coordinate: &str) -> Result> { + self.inner.cell_by_coordinate(coordinate) + } + + fn cells(&self) -> Box + '_> { + self.inner.cells() + } + + fn rows(&self) -> Box + '_> { + self.inner.rows() + } + + fn row(&self, row_idx: usize) -> Result> { + self.inner.row(row_idx) + } + + fn cell_value(&self, row: u32, column: u32) -> Result> { + self.inner.cell_value(row, column) + } +} + +// --------------------------------------------------------------------------- +// Demo +// --------------------------------------------------------------------------- + +#[tokio::main] +async fn main() -> std::result::Result<(), Box> { + // Layout for "Sheet1": + // + // A B C + // 1 "Item" "Qty" "Price" <- table header row + // 2 "Apple" 3 1.50 <- data row 1 + // 3 "Bread" 1 4.20 <- data row 2 + // 5 =SUM(Sales[Qty]) <- structured-reference formula + // 6 =SUM(Sales[Price]) <- structured-reference formula + // + // The table "Sales" spans A1:C3 (header row at row 1, data rows 2..=3). + let mut sheet = MemSheet::new("Sheet1"); + + // Header row (row 1) + sheet.set(1, 1, CellValue::String("Item".to_string())); + sheet.set(1, 2, CellValue::String("Qty".to_string())); + sheet.set(1, 3, CellValue::String("Price".to_string())); + + // Data row 1 (row 2) + sheet.set(2, 1, CellValue::String("Apple".to_string())); + sheet.set(2, 2, CellValue::Int(3)); + sheet.set(2, 3, CellValue::Float(1.50)); + + // Data row 2 (row 3) + sheet.set(3, 1, CellValue::String("Bread".to_string())); + sheet.set(3, 2, CellValue::Int(1)); + sheet.set(3, 3, CellValue::Float(4.20)); + + // Formula cells using structured references. + sheet.set( + 5, + 1, + CellValue::Formula { + formula: "SUM(Sales[Qty])".to_string(), + cached_value: None, + is_array: false, + array_range: None, + }, + ); + sheet.set( + 6, + 1, + CellValue::Formula { + formula: "SUM(Sales[Price])".to_string(), + cached_value: None, + is_array: false, + array_range: None, + }, + ); + + let mut workbook = MemWorkbook::new(); + workbook.add_sheet(sheet); + + // Build the evaluator and register the "Sales" table on it. + let mut evaluator = FormulaEvaluator::new(&workbook); + let headers = vec!["Item".to_string(), "Qty".to_string(), "Price".to_string()]; + evaluator.define_table(TableConfig { + name: "Sales", + sheet_name: "Sheet1", + start_row: 1, + start_col: 1, + end_row: 3, + end_col: 3, + headers: &headers, + }); + + println!("== Structured-reference evaluations =="); + + // Evaluate the SUM over the [Qty] column. + let qty_total = evaluator.evaluate_cell("Sheet1", 5, 1).await?; + println!("A5 =SUM(Sales[Qty]) -> {:?}", qty_total); + + // Evaluate the SUM over the [Price] column. + let price_total = evaluator.evaluate_cell("Sheet1", 6, 1).await?; + println!("A6 =SUM(Sales[Price]) -> {:?}", price_total); + + // Inspect the raw header / data cells too. + println!("\n== Raw table cells =="); + for row in 1..=3 { + let item = evaluator.evaluate_cell("Sheet1", row, 1).await?; + let qty = evaluator.evaluate_cell("Sheet1", row, 2).await?; + let price = evaluator.evaluate_cell("Sheet1", row, 3).await?; + println!("row {row}: {:?} | {:?} | {:?}", item, qty, price); + } + + Ok(()) +} diff --git a/src/sheet/eval/engine.rs b/crates/litchi-eval/src/engine.rs similarity index 98% rename from src/sheet/eval/engine.rs rename to crates/litchi-eval/src/engine.rs index a890f70..45c738a 100644 --- a/src/sheet/eval/engine.rs +++ b/crates/litchi-eval/src/engine.rs @@ -6,7 +6,7 @@ //! The initial implementation is intentionally conservative and supports //! only scalar arithmetic over numeric literals and single-cell references. -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; pub(crate) use super::EngineCtx; use super::parser::Expr; @@ -43,14 +43,11 @@ mod statistical; mod text; mod web; -#[cfg(test)] -mod tests; - #[cfg(test)] pub(crate) mod test_helpers { use super::*; - use crate::sheet::Result; - use crate::sheet::eval::BoxFuture; + use crate::BoxFuture; + use litchi_core::sheet::Result; use std::collections::HashMap; use std::sync::{Arc, RwLock}; @@ -150,7 +147,7 @@ pub(crate) mod test_helpers { false } - #[cfg(feature = "eval_engine_web_functions")] + #[cfg(feature = "web_functions")] fn http_client(&self) -> &reqwest::Client { panic!("TestEngine does not support HTTP client") } diff --git a/src/sheet/eval/engine/aggregate/average.rs b/crates/litchi-eval/src/engine/aggregate/average.rs similarity index 94% rename from src/sheet/eval/engine/aggregate/average.rs rename to crates/litchi-eval/src/engine/aggregate/average.rs index 8c9ed96..a248562 100644 --- a/src/sheet/eval/engine/aggregate/average.rs +++ b/crates/litchi-eval/src/engine/aggregate/average.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, for_each_value_in_expr, to_number}; @@ -132,8 +132,8 @@ pub(crate) async fn eval_avedev( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -150,7 +150,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(20)); engine.set_cell("Sheet1", 2, 0, CellValue::Int(30)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -181,7 +181,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::String("text".to_string())); engine.set_cell("Sheet1", 2, 0, CellValue::Int(30)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -203,7 +203,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Bool(true)); engine.set_cell("Sheet1", 2, 0, CellValue::String("text".to_string())); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -225,7 +225,7 @@ mod tests { let ctx = engine.ctx(); engine.set_cell("Sheet1", 0, 0, CellValue::Bool(false)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(10)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -250,7 +250,7 @@ mod tests { engine.set_cell("Sheet1", 2, 0, CellValue::Int(6)); engine.set_cell("Sheet1", 3, 0, CellValue::Int(7)); engine.set_cell("Sheet1", 4, 0, CellValue::Int(5)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, diff --git a/src/sheet/eval/engine/aggregate/count.rs b/crates/litchi-eval/src/engine/aggregate/count.rs similarity index 90% rename from src/sheet/eval/engine/aggregate/count.rs rename to crates/litchi-eval/src/engine/aggregate/count.rs index b039ac6..9ff2b1f 100644 --- a/src/sheet/eval/engine/aggregate/count.rs +++ b/crates/litchi-eval/src/engine/aggregate/count.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, for_each_value_in_expr, is_blank}; @@ -63,8 +63,8 @@ pub(crate) async fn eval_counta( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; #[tokio::test] async fn test_eval_count_basic() { @@ -73,7 +73,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Float(20.5)); engine.set_cell("Sheet1", 2, 0, CellValue::String("text".to_string())); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -102,7 +102,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::String("text".to_string())); engine.set_cell("Sheet1", 2, 0, CellValue::Empty); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -122,7 +122,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Empty); engine.set_cell("Sheet1", 2, 0, CellValue::Empty); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, diff --git a/src/sheet/eval/engine/aggregate/extrema.rs b/crates/litchi-eval/src/engine/aggregate/extrema.rs similarity index 94% rename from src/sheet/eval/engine/aggregate/extrema.rs rename to crates/litchi-eval/src/engine/aggregate/extrema.rs index d9268c4..357bc3a 100644 --- a/src/sheet/eval/engine/aggregate/extrema.rs +++ b/crates/litchi-eval/src/engine/aggregate/extrema.rs @@ -1,7 +1,7 @@ use std::result::Result as StdResult; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, for_each_value_in_expr, to_number}; @@ -152,8 +152,8 @@ fn coerce_for_mina_maxa(value: &CellValue) -> StdResult, String> { #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -170,7 +170,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(5)); engine.set_cell("Sheet1", 2, 0, CellValue::Int(20)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -205,7 +205,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(5)); engine.set_cell("Sheet1", 2, 0, CellValue::Int(20)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -227,7 +227,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Bool(true)); // 1.0 engine.set_cell("Sheet1", 1, 0, CellValue::Int(0)); engine.set_cell("Sheet1", 2, 0, CellValue::String("text".to_string())); // 0.0 - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -250,7 +250,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Bool(true)); // 1.0 engine.set_cell("Sheet1", 1, 0, CellValue::Int(0)); engine.set_cell("Sheet1", 2, 0, CellValue::String("text".to_string())); // 0.0 - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, diff --git a/src/sheet/eval/engine/aggregate/mod.rs b/crates/litchi-eval/src/engine/aggregate/mod.rs similarity index 100% rename from src/sheet/eval/engine/aggregate/mod.rs rename to crates/litchi-eval/src/engine/aggregate/mod.rs diff --git a/src/sheet/eval/engine/aggregate/sum.rs b/crates/litchi-eval/src/engine/aggregate/sum.rs similarity index 91% rename from src/sheet/eval/engine/aggregate/sum.rs rename to crates/litchi-eval/src/engine/aggregate/sum.rs index cc2b468..116e307 100644 --- a/src/sheet/eval/engine/aggregate/sum.rs +++ b/crates/litchi-eval/src/engine/aggregate/sum.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, flatten_range_expr, for_each_value_in_expr, to_number}; @@ -87,8 +87,8 @@ pub(crate) async fn eval_sumproduct( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -105,7 +105,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(10)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(20)); engine.set_cell("Sheet1", 2, 0, CellValue::Int(30)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -139,7 +139,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(2)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(3)); engine.set_cell("Sheet1", 2, 0, CellValue::Int(4)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -161,7 +161,7 @@ mod tests { let ctx = engine.ctx(); engine.set_cell("Sheet1", 0, 0, CellValue::String("text".to_string())); engine.set_cell("Sheet1", 1, 0, CellValue::Empty); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -189,14 +189,14 @@ mod tests { engine.set_cell("Sheet1", 0, 1, CellValue::Int(4)); engine.set_cell("Sheet1", 1, 1, CellValue::Int(5)); engine.set_cell("Sheet1", 2, 1, CellValue::Int(6)); - let range1 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range1 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, end_row: 2, end_col: 0, }); - let range2 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range2 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, @@ -223,14 +223,14 @@ mod tests { // Array 2: 4, 5 (2 rows) engine.set_cell("Sheet1", 0, 1, CellValue::Int(4)); engine.set_cell("Sheet1", 1, 1, CellValue::Int(5)); - let range1 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range1 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, end_row: 2, end_col: 0, }); - let range2 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range2 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, diff --git a/src/sheet/eval/engine/bin_op.rs b/crates/litchi-eval/src/engine/bin_op.rs similarity index 98% rename from src/sheet/eval/engine/bin_op.rs rename to crates/litchi-eval/src/engine/bin_op.rs index a2d7659..f0a5484 100644 --- a/src/sheet/eval/engine/bin_op.rs +++ b/crates/litchi-eval/src/engine/bin_op.rs @@ -1,4 +1,4 @@ -use crate::sheet::CellValue; +use litchi_core::sheet::CellValue; use super::super::parser::BinaryOp; use super::{to_number, to_text}; @@ -84,8 +84,8 @@ fn eval_comparison(op: BinaryOp, left: CellValue, right: CellValue) -> CellValue #[cfg(test)] mod tests { use super::*; - use crate::sheet::CellValue; - use crate::sheet::eval::parser::BinaryOp; + use crate::parser::BinaryOp; + use litchi_core::sheet::CellValue; #[test] fn test_eval_binary_op_add() { diff --git a/src/sheet/eval/engine/criteria.rs b/crates/litchi-eval/src/engine/criteria.rs similarity index 99% rename from src/sheet/eval/engine/criteria.rs rename to crates/litchi-eval/src/engine/criteria.rs index 87151f4..863258a 100644 --- a/src/sheet/eval/engine/criteria.rs +++ b/crates/litchi-eval/src/engine/criteria.rs @@ -1,4 +1,4 @@ -use crate::sheet::CellValue; +use litchi_core::sheet::CellValue; use super::{to_number, to_text}; diff --git a/src/sheet/eval/engine/criteria_aggs.rs b/crates/litchi-eval/src/engine/criteria_aggs.rs similarity index 98% rename from src/sheet/eval/engine/criteria_aggs.rs rename to crates/litchi-eval/src/engine/criteria_aggs.rs index cf6297e..b98274a 100644 --- a/src/sheet/eval/engine/criteria_aggs.rs +++ b/crates/litchi-eval/src/engine/criteria_aggs.rs @@ -1,4 +1,4 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; use super::super::parser::Expr; use super::criteria::{Criteria, matches_criteria, parse_criteria}; @@ -17,7 +17,7 @@ async fn minmax_ifs( func_name: &str, extremum: Extremum, ) -> Result { - if args.len() < 3 || args.len().is_multiple_of(2) { + if args.len() < 3 || args.len() % 2 == 0 { return Ok(CellValue::Error(format!( "{func_name} expects 3 or more arguments ({func_name_lower} range, criteria_range1, criteria1, ...)", func_name_lower = func_name.to_lowercase() @@ -232,7 +232,7 @@ pub(crate) async fn eval_sumifs( current_sheet: &str, args: &[Expr], ) -> Result { - if args.len() < 3 || args.len().is_multiple_of(2) { + if args.len() < 3 || args.len() % 2 == 0 { return Ok(CellValue::Error( "SUMIFS expects 3 or more arguments (sum_range, criteria_range1, criteria1, ...)" .to_string(), @@ -287,7 +287,7 @@ pub(crate) async fn eval_countifs( current_sheet: &str, args: &[Expr], ) -> Result { - if args.len() < 2 || !args.len().is_multiple_of(2) { + if args.len() < 2 || args.len() % 2 != 0 { return Ok(CellValue::Error( "COUNTIFS expects an even number of arguments (criteria_range1, criteria1, ...)" .to_string(), @@ -345,7 +345,7 @@ pub(crate) async fn eval_averageifs( current_sheet: &str, args: &[Expr], ) -> Result { - if args.len() < 3 || args.len().is_multiple_of(2) { + if args.len() < 3 || args.len() % 2 == 0 { return Ok(CellValue::Error( "AVERAGEIFS expects 3 or more arguments (average_range, criteria_range1, criteria1, ...)".to_string(), )); @@ -405,9 +405,9 @@ pub(crate) async fn eval_averageifs( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; - use crate::sheet::eval::parser::ast::RangeRef; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/crates/litchi-eval/src/engine/database/mod.rs b/crates/litchi-eval/src/engine/database/mod.rs new file mode 100644 index 0000000..b1bc46e --- /dev/null +++ b/crates/litchi-eval/src/engine/database/mod.rs @@ -0,0 +1,908 @@ +use super::{ + EvalCtx, FlatRange, + criteria::{matches_criteria, parse_criteria}, + evaluate_expression, flatten_range_expr, is_blank, to_number, to_text, +}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +pub(crate) async fn eval_dget( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 3 { + return Ok(CellValue::Error( + "DGET expects 3 arguments (database, field, criteria)".to_string(), + )); + } + + let database = flatten_range_expr(ctx, current_sheet, &args[0]).await?; + let rows = database.rows; + let cols = database.cols; + + if rows < 2 || cols == 0 { + return Ok(CellValue::Error( + "DGET database must include header row and at least one record".to_string(), + )); + } + + let header_values = &database.values[..cols]; + let header_texts: Vec = header_values.iter().map(to_text).collect(); + + let field_arg = evaluate_expression(ctx, current_sheet, &args[1]).await?; + let field_index = match field_to_index(&field_arg, &header_texts) { + Some(idx) => idx, + None => { + return Ok(CellValue::Error( + "DGET field must be a valid column header or 1-based index".to_string(), + )); + }, + }; + + let criteria_range = flatten_range_expr(ctx, current_sheet, &args[2]).await?; + if criteria_range.rows < 2 || criteria_range.cols == 0 { + return Ok(CellValue::Error( + "DGET criteria must include header row and at least one criteria row".to_string(), + )); + } + + let criteria_columns = match build_criteria_columns(&criteria_range, &header_texts) { + Ok(cols) => cols, + Err(err) => return Ok(err), + }; + + let matches = matching_records(&database, &criteria_range, &criteria_columns); + match matches.len() { + 0 => Ok(CellValue::Error( + "DGET found no rows matching criteria".to_string(), + )), + 1 => Ok(matches[0][field_index].clone()), + _ => Ok(CellValue::Error( + "DGET found multiple rows matching criteria".to_string(), + )), + } +} + +pub(crate) async fn eval_dmax( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DMAX", DatabaseStat::Max).await +} + +pub(crate) async fn eval_dmin( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DMIN", DatabaseStat::Min).await +} + +pub(crate) async fn eval_dcount( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat( + ctx, + current_sheet, + args, + "DCOUNT", + DatabaseStat::CountNumeric, + ) + .await +} + +pub(crate) async fn eval_dcounta( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DCOUNTA", DatabaseStat::CountAll).await +} + +pub(crate) async fn eval_dsum( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DSUM", DatabaseStat::Sum).await +} + +pub(crate) async fn eval_dproduct( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DPRODUCT", DatabaseStat::Product).await +} + +pub(crate) async fn eval_daverage( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DAVERAGE", DatabaseStat::Average).await +} + +pub(crate) async fn eval_dstdev( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DSTDEV", DatabaseStat::StdSample).await +} + +pub(crate) async fn eval_dstdevp( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat( + ctx, + current_sheet, + args, + "DSTDEVP", + DatabaseStat::StdPopulation, + ) + .await +} + +pub(crate) async fn eval_dvar( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat(ctx, current_sheet, args, "DVAR", DatabaseStat::VarSample).await +} + +pub(crate) async fn eval_dvarp( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + eval_database_stat( + ctx, + current_sheet, + args, + "DVARP", + DatabaseStat::VarPopulation, + ) + .await +} + +enum DatabaseStat { + Max, + Min, + CountNumeric, + CountAll, + Sum, + Product, + Average, + StdSample, + StdPopulation, + VarSample, + VarPopulation, +} + +async fn eval_database_stat( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], + name: &str, + mode: DatabaseStat, +) -> Result { + if args.len() != 3 { + return Ok(CellValue::Error(format!( + "{name} expects 3 arguments (database, field, criteria)" + ))); + } + + let database = flatten_range_expr(ctx, current_sheet, &args[0]).await?; + if database.rows < 2 || database.cols == 0 { + return Ok(CellValue::Error(format!( + "{name} database must include header row and at least one record" + ))); + } + + let header_values = &database.values[..database.cols]; + let header_texts: Vec = header_values.iter().map(to_text).collect(); + + let field_arg = evaluate_expression(ctx, current_sheet, &args[1]).await?; + let field_index = match field_to_index(&field_arg, &header_texts) { + Some(idx) => idx, + None => { + return Ok(CellValue::Error(format!( + "{name} field must be a valid column header or 1-based index" + ))); + }, + }; + + let criteria_range = flatten_range_expr(ctx, current_sheet, &args[2]).await?; + if criteria_range.rows < 2 || criteria_range.cols == 0 { + return Ok(CellValue::Error(format!( + "{name} criteria must include header row and at least one criteria row" + ))); + } + + let criteria_columns = match build_criteria_columns(&criteria_range, &header_texts) { + Ok(cols) => cols, + Err(err) => return Ok(err), + }; + + let matches = matching_records(&database, &criteria_range, &criteria_columns); + if matches.is_empty() { + return Ok(CellValue::Error(format!( + "{name} found no rows matching criteria" + ))); + } + + let mut numbers = Vec::new(); + let mut best_numeric: Option = None; + let mut best_value: Option = None; + let mut sum_acc = 0.0f64; + let mut sum_found = false; + let mut product_acc = 1.0f64; + let mut product_found = false; + let mut count_numeric = 0i64; + let mut count_all = 0i64; + let mut average_sum = 0.0f64; + let mut average_count = 0i64; + + for row in matches { + let value = &row[field_index]; + let numeric = to_number(value); + + match mode { + DatabaseStat::Max => { + if let Some(n) = numeric + && best_numeric.is_none_or(|current| n > current) + { + best_numeric = Some(n); + best_value = Some(value.clone()); + } + }, + DatabaseStat::Min => { + if let Some(n) = numeric + && best_numeric.is_none_or(|current| n < current) + { + best_numeric = Some(n); + best_value = Some(value.clone()); + } + }, + DatabaseStat::CountNumeric => { + if numeric.is_some() { + count_numeric += 1; + } + }, + DatabaseStat::CountAll => { + if !is_blank(value) { + count_all += 1; + } + }, + DatabaseStat::Sum => { + if let Some(n) = numeric { + sum_acc += n; + sum_found = true; + } + }, + DatabaseStat::Average => { + if let Some(n) = numeric { + average_sum += n; + average_count += 1; + } + }, + DatabaseStat::Product => { + if let Some(n) = numeric { + product_acc *= n; + product_found = true; + } + }, + DatabaseStat::StdSample + | DatabaseStat::StdPopulation + | DatabaseStat::VarSample + | DatabaseStat::VarPopulation => { + if let Some(n) = numeric { + numbers.push(n); + } + }, + } + } + + match mode { + DatabaseStat::Max => match best_value { + Some(v) => Ok(v), + None => Ok(CellValue::Error(format!( + "{name} found no numeric values in the specified field" + ))), + }, + DatabaseStat::Min => match best_value { + Some(v) => Ok(v), + None => Ok(CellValue::Error(format!( + "{name} found no numeric values in the specified field" + ))), + }, + DatabaseStat::CountNumeric => Ok(CellValue::Int(count_numeric)), + DatabaseStat::CountAll => Ok(CellValue::Int(count_all)), + DatabaseStat::Sum => { + if !sum_found { + return Ok(CellValue::Float(0.0)); + } + Ok(CellValue::Float(sum_acc)) + }, + DatabaseStat::Average => { + if average_count == 0 { + return Ok(CellValue::Error(format!( + "{name} found no numeric values in the specified field" + ))); + } + Ok(CellValue::Float(average_sum / average_count as f64)) + }, + DatabaseStat::Product => { + if !product_found { + return Ok(CellValue::Float(0.0)); + } + Ok(CellValue::Float(product_acc)) + }, + DatabaseStat::StdSample => { + if numbers.len() < 2 { + return Ok(CellValue::Error(format!( + "{name} requires at least two numeric records" + ))); + } + let mean = numbers.iter().sum::() / numbers.len() as f64; + let variance = numbers.iter().map(|v| (v - mean).powi(2)).sum::() + / (numbers.len() as f64 - 1.0); + Ok(CellValue::Float(variance.sqrt())) + }, + DatabaseStat::StdPopulation => { + if numbers.is_empty() { + return Ok(CellValue::Error(format!( + "{name} requires at least one numeric record" + ))); + } + let mean = numbers.iter().sum::() / numbers.len() as f64; + let variance = + numbers.iter().map(|v| (v - mean).powi(2)).sum::() / numbers.len() as f64; + Ok(CellValue::Float(variance.sqrt())) + }, + DatabaseStat::VarSample => { + if numbers.len() < 2 { + return Ok(CellValue::Error(format!( + "{name} requires at least two numeric records" + ))); + } + let mean = numbers.iter().sum::() / numbers.len() as f64; + let variance = numbers.iter().map(|v| (v - mean).powi(2)).sum::() + / (numbers.len() as f64 - 1.0); + Ok(CellValue::Float(variance)) + }, + DatabaseStat::VarPopulation => { + if numbers.is_empty() { + return Ok(CellValue::Error(format!( + "{name} requires at least one numeric record" + ))); + } + let mean = numbers.iter().sum::() / numbers.len() as f64; + let variance = + numbers.iter().map(|v| (v - mean).powi(2)).sum::() / numbers.len() as f64; + Ok(CellValue::Float(variance)) + }, + } +} + +fn field_to_index(field: &CellValue, headers: &[String]) -> Option { + match field { + CellValue::Int(i) => { + if *i <= 0 { + return None; + } + let idx = (*i - 1) as usize; + (idx < headers.len()).then_some(idx) + }, + _ => { + let name = to_text(field); + if name.is_empty() { + None + } else { + headers.iter().position(|h| h == &name) + } + }, + } +} + +fn build_criteria_columns( + criteria: &FlatRange, + headers: &[String], +) -> std::result::Result>, CellValue> { + let mut columns = Vec::with_capacity(criteria.cols); + for c in 0..criteria.cols { + let label = to_text(&criteria.values[c]); + if label.is_empty() { + columns.push(None); + continue; + } + match headers.iter().position(|h| h == &label) { + Some(idx) => columns.push(Some(idx)), + None => { + return Err(CellValue::Error(format!( + "DGET criteria column '{}' not found in database headers", + label + ))); + }, + } + } + Ok(columns) +} + +fn record_matches( + record: &[CellValue], + criteria_range: &FlatRange, + column_map: &[Option], +) -> bool { + let criteria_cols = criteria_range.cols; + for r in 1..criteria_range.rows { + let mut row_ok = true; + let mut has_condition = false; + for (c, column_index_opt) in column_map.iter().enumerate() { + let crit_value = &criteria_range.values[r * criteria_cols + c]; + if is_blank(crit_value) { + continue; + } + + let column_index = match column_index_opt { + Some(idx) => *idx, + None => { + row_ok = false; + break; + }, + }; + + has_condition = true; + let crit_text = to_text(crit_value); + let criteria = match parse_criteria(&crit_text) { + Some(c) => c, + None => { + row_ok = false; + break; + }, + }; + + if column_index >= record.len() || !matches_criteria(&record[column_index], &criteria) { + row_ok = false; + break; + } + } + + if row_ok && has_condition { + return true; + } + } + + false +} + +fn matching_records<'a>( + database: &'a FlatRange, + criteria_range: &FlatRange, + column_map: &[Option], +) -> Vec<&'a [CellValue]> { + let mut rows = Vec::new(); + let cols = database.cols; + for r in 1..database.rows { + let row_slice = &database.values[r * cols..(r + 1) * cols]; + if record_matches(row_slice, criteria_range, column_map) { + rows.push(row_slice); + } + } + rows +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + use crate::parser::{Expr, RangeRef}; + + fn create_database_range() -> Expr { + // Create a database with headers: Name, Age, Score + // Records: Alice(25, 85), Bob(30, 90), Carol(25, 78), Dave(35, 92) + Expr::Range(RangeRef { + sheet: "Sheet1".to_string(), + start_col: 0, + start_row: 0, + end_col: 2, + end_row: 4, + }) + } + + fn setup_database(engine: &TestEngine) { + // Header row + engine.set_cell("Sheet1", 0, 0, CellValue::String("Name".to_string())); + engine.set_cell("Sheet1", 0, 1, CellValue::String("Age".to_string())); + engine.set_cell("Sheet1", 0, 2, CellValue::String("Score".to_string())); + // Data rows + engine.set_cell("Sheet1", 1, 0, CellValue::String("Alice".to_string())); + engine.set_cell("Sheet1", 1, 1, CellValue::Int(25)); + engine.set_cell("Sheet1", 1, 2, CellValue::Int(85)); + engine.set_cell("Sheet1", 2, 0, CellValue::String("Bob".to_string())); + engine.set_cell("Sheet1", 2, 1, CellValue::Int(30)); + engine.set_cell("Sheet1", 2, 2, CellValue::Int(90)); + engine.set_cell("Sheet1", 3, 0, CellValue::String("Carol".to_string())); + engine.set_cell("Sheet1", 3, 1, CellValue::Int(25)); + engine.set_cell("Sheet1", 3, 2, CellValue::Int(78)); + engine.set_cell("Sheet1", 4, 0, CellValue::String("Dave".to_string())); + engine.set_cell("Sheet1", 4, 1, CellValue::Int(35)); + engine.set_cell("Sheet1", 4, 2, CellValue::Int(92)); + } + + fn create_criteria_range(criteria_col: u32, criteria_row: u32) -> Expr { + Expr::Range(RangeRef { + sheet: "Sheet1".to_string(), + start_col: criteria_col, + start_row: criteria_row, + end_col: criteria_col + 2, + end_row: criteria_row + 1, + }) + } + + fn setup_age_criteria( + engine: &TestEngine, + criteria_col: u32, + criteria_row: u32, + age_value: i64, + ) { + // Criteria header + engine.set_cell( + "Sheet1", + criteria_row, + criteria_col, + CellValue::String("Age".to_string()), + ); + // Criteria value + engine.set_cell( + "Sheet1", + criteria_row + 1, + criteria_col, + CellValue::Int(age_value), + ); + } + + #[tokio::test] + async fn test_dget_single_match() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 30); // Age = 30 + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dget(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(90)); // Bob's score + } + + #[tokio::test] + async fn test_dget_no_match() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 99); // Age = 99 (no match) + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dget(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("found no rows")), + _ => panic!("Expected Error result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_dget_multiple_matches() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 25); // Age = 25 (Alice and Carol) + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dget(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("multiple rows")), + _ => panic!("Expected Error result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_dget_field_by_index() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 30); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::Int(3)); // 3rd column (Score) + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dget(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(90)); + } + + #[tokio::test] + async fn test_dsum() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 25); // Age = 25 (Alice=85, Carol=78) + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dsum(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Float(163.0)); // 85 + 78 + } + + #[tokio::test] + async fn test_daverage() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 25); // Age = 25 + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_daverage(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Float(81.5)); // (85 + 78) / 2 + } + + #[tokio::test] + async fn test_dcount() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 25); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dcount(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(2)); // 2 numeric scores + } + + #[tokio::test] + async fn test_dcounta() { + let engine = TestEngine::new(); + setup_database(&engine); + // Add criteria to match all rows (using Score > 0) + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Name".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dcounta(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(4)); // 4 names match Score > 0 + } + + #[tokio::test] + async fn test_dmax() { + let engine = TestEngine::new(); + setup_database(&engine); + // Match all by using criteria with blank value + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dmax(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(92)); // Dave's score + } + + #[tokio::test] + async fn test_dmin() { + let engine = TestEngine::new(); + setup_database(&engine); + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dmin(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(78)); // Carol's score + } + + #[tokio::test] + async fn test_dproduct() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 25); // Age = 25 (scores 85, 78) + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dproduct(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Float(6630.0)); // 85 * 78 + } + + #[tokio::test] + async fn test_dstddev() { + let engine = TestEngine::new(); + setup_database(&engine); + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dstdev(ctx, "Sheet1", &args).await.unwrap(); + // Sample standard deviation of [85, 90, 78, 92] + // mean = 86.25, variance = 38.916667, stddev = 6.238322 + match result { + CellValue::Float(v) => { + let expected = 6.23832242407; + assert!( + (v - expected).abs() < 0.001, + "Expected ~{}, got {}", + expected, + v + ); + }, + _ => panic!("Expected Float result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_dstddevp() { + let engine = TestEngine::new(); + setup_database(&engine); + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dstdevp(ctx, "Sheet1", &args).await.unwrap(); + // Population standard deviation of [85, 90, 78, 92] + // mean = 86.25, variance = 29.1875, stddev = 5.402546 + match result { + CellValue::Float(v) => { + let expected = 5.40254569624; + assert!( + (v - expected).abs() < 0.001, + "Expected ~{}, got {}", + expected, + v + ); + }, + _ => panic!("Expected Float result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_dvar() { + let engine = TestEngine::new(); + setup_database(&engine); + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dvar(ctx, "Sheet1", &args).await.unwrap(); + // Sample variance of [85, 90, 78, 92] = 38.916667 + match result { + CellValue::Float(v) => { + let expected = 38.9166666667; + assert!( + (v - expected).abs() < 0.001, + "Expected ~{}, got {}", + expected, + v + ); + }, + _ => panic!("Expected Float result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_dvarp() { + let engine = TestEngine::new(); + setup_database(&engine); + engine.set_cell("Sheet1", 0, 10, CellValue::String("Score".to_string())); + engine.set_cell("Sheet1", 1, 10, CellValue::String(">0".to_string())); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("Score".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dvarp(ctx, "Sheet1", &args).await.unwrap(); + // Population variance of [85, 90, 78, 92] = 29.1875 + match result { + CellValue::Float(v) => { + let expected = 29.1875; + assert!( + (v - expected).abs() < 0.001, + "Expected ~{}, got {}", + expected, + v + ); + }, + _ => panic!("Expected Float result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_dget_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![]; + let result = eval_dget(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 3 arguments")), + _ => panic!("Expected Error result"), + } + } + + #[tokio::test] + async fn test_dget_invalid_field() { + let engine = TestEngine::new(); + setup_database(&engine); + setup_age_criteria(&engine, 10, 0, 30); + + let ctx = engine.ctx(); + let database = create_database_range(); + let field = Expr::Literal(CellValue::String("InvalidField".to_string())); + let criteria = create_criteria_range(10, 0); + + let args = vec![database, field, criteria]; + let result = eval_dget(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("valid column header")), + _ => panic!("Expected Error result, got {:?}", result), + } + } +} diff --git a/crates/litchi-eval/src/engine/date_time/components.rs b/crates/litchi-eval/src/engine/date_time/components.rs new file mode 100644 index 0000000..278cf2d --- /dev/null +++ b/crates/litchi-eval/src/engine/date_time/components.rs @@ -0,0 +1,248 @@ +use crate::engine::{EvalCtx, evaluate_expression}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +use chrono::Datelike; + +use super::helpers::{SECONDS_PER_DAY, coerce_date_value, coerce_time_fraction}; + +pub(crate) async fn eval_year( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("YEAR expects 1 argument".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match coerce_date_value(&value) { + Some(date) => Ok(CellValue::Int(date.year() as i64)), + None => Ok(CellValue::Error( + "YEAR expects a valid date serial or text".to_string(), + )), + } +} + +pub(crate) async fn eval_month( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("MONTH expects 1 argument".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match coerce_date_value(&value) { + Some(date) => Ok(CellValue::Int(date.month() as i64)), + None => Ok(CellValue::Error( + "MONTH expects a valid date serial or text".to_string(), + )), + } +} + +pub(crate) async fn eval_day( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("DAY expects 1 argument".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match coerce_date_value(&value) { + Some(date) => Ok(CellValue::Int(date.day() as i64)), + None => Ok(CellValue::Error( + "DAY expects a valid date serial or text".to_string(), + )), + } +} + +pub(crate) async fn eval_hour( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("HOUR expects 1 argument".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match coerce_time_fraction(&value) { + Some(frac) => { + let seconds = (frac * SECONDS_PER_DAY).rem_euclid(SECONDS_PER_DAY); + Ok(CellValue::Int((seconds / 3600.0).floor() as i64)) + }, + None => Ok(CellValue::Error( + "HOUR expects a valid time serial or text".to_string(), + )), + } +} + +pub(crate) async fn eval_minute( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("MINUTE expects 1 argument".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match coerce_time_fraction(&value) { + Some(frac) => { + let seconds = (frac * SECONDS_PER_DAY).rem_euclid(SECONDS_PER_DAY); + let minutes = (seconds / 60.0).floor() as i64 % 60; + Ok(CellValue::Int(minutes)) + }, + None => Ok(CellValue::Error( + "MINUTE expects a valid time serial or text".to_string(), + )), + } +} + +pub(crate) async fn eval_second( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("SECOND expects 1 argument".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match coerce_time_fraction(&value) { + Some(frac) => { + let seconds = (frac * SECONDS_PER_DAY).rem_euclid(SECONDS_PER_DAY); + Ok(CellValue::Int((seconds % 60.0).round() as i64)) + }, + None => Ok(CellValue::Error( + "SECOND expects a valid time serial or text".to_string(), + )), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::Expr; + + fn num_expr(n: f64) -> Expr { + if n == n.floor() { + Expr::Literal(CellValue::Int(n as i64)) + } else { + Expr::Literal(CellValue::Float(n)) + } + } + + #[tokio::test] + async fn test_eval_year_from_serial() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // Excel serial for 2024-03-15 is approximately 45366 + let args = vec![num_expr(45366.0)]; + let result = eval_year(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 2024), + _ => panic!("Expected Int(2024)"), + } + } + + #[tokio::test] + async fn test_eval_month_from_serial() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // Excel serial for 2024-03-15 + let args = vec![num_expr(45366.0)]; + let result = eval_month(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 3), + _ => panic!("Expected Int(3)"), + } + } + + #[tokio::test] + async fn test_eval_day_from_serial() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // Excel serial for 2024-03-15 + let args = vec![num_expr(45366.0)]; + let result = eval_day(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 15), + _ => panic!("Expected Int(15)"), + } + } + + #[tokio::test] + async fn test_eval_hour() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // 0.5 is 12:00:00 (noon) + let args = vec![num_expr(0.5)]; + let result = eval_hour(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 12), + _ => panic!("Expected Int(12)"), + } + } + + #[tokio::test] + async fn test_eval_hour_with_date() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // 45366.75 is 2024-03-15 18:00:00 + let args = vec![num_expr(45366.75)]; + let result = eval_hour(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 18), + _ => panic!("Expected Int(18)"), + } + } + + #[tokio::test] + async fn test_eval_minute() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // 12:30:00 = (12 * 3600 + 30 * 60) / 86400 = 45000 / 86400 = 0.520833333333... + let args = vec![num_expr(45000.0 / 86400.0)]; + let result = eval_minute(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 30), + _ => panic!("Expected Int(30)"), + } + } + + #[tokio::test] + async fn test_eval_second() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // 0.000011574 is approximately 1 second (1/86400) + let args = vec![num_expr(0.000011574)]; + let result = eval_second(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert_eq!(v, 1), + _ => panic!("Expected Int(1)"), + } + } + + #[tokio::test] + async fn test_eval_year_wrong_args() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![]; + let result = eval_year(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 argument")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_hour_invalid() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("not a time".to_string()))]; + let result = eval_hour(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects a valid time")), + _ => panic!("Expected Error"), + } + } +} diff --git a/src/sheet/eval/engine/date_time/constructors.rs b/crates/litchi-eval/src/engine/date_time/constructors.rs similarity index 90% rename from src/sheet/eval/engine/date_time/constructors.rs rename to crates/litchi-eval/src/engine/date_time/constructors.rs index 157e84e..fa22e01 100644 --- a/src/sheet/eval/engine/date_time/constructors.rs +++ b/crates/litchi-eval/src/engine/date_time/constructors.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use chrono::Timelike; @@ -170,7 +170,7 @@ pub(crate) async fn eval_timevalue( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -186,7 +186,7 @@ mod tests { #[tokio::test] async fn test_eval_date() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2024.0), num_expr(3.0), num_expr(15.0)]; let result = eval_date(ctx, "Sheet1", &args).await.unwrap(); @@ -198,7 +198,7 @@ mod tests { #[tokio::test] async fn test_eval_date_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2024.0), num_expr(3.0)]; let result = eval_date(ctx, "Sheet1", &args).await.unwrap(); @@ -210,7 +210,7 @@ mod tests { #[tokio::test] async fn test_eval_time() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(12.0), num_expr(30.0), num_expr(0.0)]; let result = eval_time(ctx, "Sheet1", &args).await.unwrap(); @@ -225,7 +225,7 @@ mod tests { #[tokio::test] async fn test_eval_time_out_of_range() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(25.0), num_expr(0.0), num_expr(0.0)]; let result = eval_time(ctx, "Sheet1", &args).await.unwrap(); @@ -237,7 +237,7 @@ mod tests { #[tokio::test] async fn test_eval_datevalue() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("2024-03-15")]; let result = eval_datevalue(ctx, "Sheet1", &args).await.unwrap(); @@ -249,7 +249,7 @@ mod tests { #[tokio::test] async fn test_eval_datevalue_numeric() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(45000.75)]; let result = eval_datevalue(ctx, "Sheet1", &args).await.unwrap(); @@ -261,7 +261,7 @@ mod tests { #[tokio::test] async fn test_eval_datevalue_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("not a date")]; let result = eval_datevalue(ctx, "Sheet1", &args).await.unwrap(); @@ -273,7 +273,7 @@ mod tests { #[tokio::test] async fn test_eval_timevalue() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("12:00:00")]; let result = eval_timevalue(ctx, "Sheet1", &args).await.unwrap(); @@ -288,7 +288,7 @@ mod tests { #[tokio::test] async fn test_eval_timevalue_numeric() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(45000.75)]; let result = eval_timevalue(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/date_time/current.rs b/crates/litchi-eval/src/engine/date_time/current.rs similarity index 84% rename from src/sheet/eval/engine/date_time/current.rs rename to crates/litchi-eval/src/engine/date_time/current.rs index 2562d91..f7067e4 100644 --- a/src/sheet/eval/engine/date_time/current.rs +++ b/crates/litchi-eval/src/engine/date_time/current.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::EvalCtx; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::EvalCtx; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use chrono::Utc; @@ -37,11 +37,11 @@ pub(crate) async fn eval_now( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; #[tokio::test] async fn test_eval_today() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_today(ctx, "Sheet1", &args).await.unwrap(); @@ -58,7 +58,7 @@ mod tests { #[tokio::test] async fn test_eval_today_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![Expr::Literal(CellValue::Int(1))]; let result = eval_today(ctx, "Sheet1", &args).await.unwrap(); @@ -70,7 +70,7 @@ mod tests { #[tokio::test] async fn test_eval_now() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_now(ctx, "Sheet1", &args).await.unwrap(); @@ -85,7 +85,7 @@ mod tests { #[tokio::test] async fn test_eval_now_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![Expr::Literal(CellValue::Int(1))]; let result = eval_now(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/date_time/differences.rs b/crates/litchi-eval/src/engine/date_time/differences.rs similarity index 98% rename from src/sheet/eval/engine/date_time/differences.rs rename to crates/litchi-eval/src/engine/date_time/differences.rs index 000cdd3..c5d1bfa 100644 --- a/src/sheet/eval/engine/date_time/differences.rs +++ b/crates/litchi-eval/src/engine/date_time/differences.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use chrono::{Datelike, NaiveDate}; @@ -446,8 +446,8 @@ fn is_leap_year(year: i32) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/crates/litchi-eval/src/engine/date_time/helpers.rs b/crates/litchi-eval/src/engine/date_time/helpers.rs new file mode 100644 index 0000000..94a2fd3 --- /dev/null +++ b/crates/litchi-eval/src/engine/date_time/helpers.rs @@ -0,0 +1,304 @@ +use crate::engine::{EvalCtx, evaluate_expression, flatten_range_expr, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +use chrono::{Datelike, Duration, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Weekday}; + +pub(crate) const SECONDS_PER_DAY: f64 = 86_400.0; + +pub(super) async fn number_arg( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result> { + let v = evaluate_expression(ctx, current_sheet, expr).await?; + Ok(to_number(&v)) +} + +pub(super) fn date_to_excel_serial_1900(date: NaiveDate) -> f64 { + let base = NaiveDate::from_ymd_opt(1899, 12, 30).expect("Invalid Excel 1900 base date"); + let days = (date - base).num_days(); + days as f64 +} + +pub(super) fn datetime_to_excel_serial_1900(dt: NaiveDateTime) -> f64 { + let date_serial = date_to_excel_serial_1900(dt.date()); + let seconds = dt.time().num_seconds_from_midnight() as f64; + date_serial + seconds / SECONDS_PER_DAY +} + +pub(super) fn serial_to_excel_date_1900(serial: f64) -> Option { + let base = NaiveDate::from_ymd_opt(1899, 12, 30)?; + let days = serial.floor() as i64; + base.checked_add_signed(Duration::days(days)) +} + +pub(super) fn make_date_serial_1900(year: f64, month: f64, day: f64) -> Option { + let y = year.trunc() as i32; + let m = month.trunc() as i32; + let d = day.trunc() as i64; + + // Excel-style month normalization: months can be outside 1..=12. + let mut ym = (y, m - 1); // month 0-based + ym.0 += ym.1.div_euclid(12); + ym.1 = ym.1.rem_euclid(12); + let norm_year = ym.0; + let norm_month = (ym.1 + 1) as u32; + + let first_of_month = NaiveDate::from_ymd_opt(norm_year, norm_month, 1)?; + let target = first_of_month.checked_add_signed(Duration::days(d - 1))?; + Some(date_to_excel_serial_1900(target)) +} + +pub(super) fn parse_date_string(s: &str) -> Option { + if let Ok(d) = NaiveDate::parse_from_str(s, "%Y-%m-%d") { + return Some(d); + } + if let Ok(d) = NaiveDate::parse_from_str(s, "%m/%d/%Y") { + return Some(d); + } + None +} + +pub(super) fn parse_time_string(s: &str) -> Option { + if let Ok(t) = NaiveTime::parse_from_str(s, "%H:%M:%S") { + return Some(t); + } + if let Ok(t) = NaiveTime::parse_from_str(s, "%H:%M") { + return Some(t); + } + None +} + +pub(super) fn parse_datetime_string(s: &str) -> Option { + const FORMATS: &[&str] = &[ + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%m/%d/%Y %H:%M:%S", + "%m/%d/%Y %H:%M", + ]; + for fmt in FORMATS { + if let Ok(dt) = NaiveDateTime::parse_from_str(s, fmt) { + return Some(dt); + } + } + None +} + +pub(super) fn coerce_date_value(value: &CellValue) -> Option { + if let Some(n) = to_number(value) + && let Some(date) = serial_to_excel_date_1900(n) + { + return Some(date); + } + if let CellValue::String(s) = value { + let trimmed = s.trim(); + if let Some(dt) = parse_datetime_string(trimmed) { + return Some(dt.date()); + } + if let Some(d) = parse_date_string(trimmed) { + return Some(d); + } + } + None +} + +pub(super) fn coerce_time_fraction(value: &CellValue) -> Option { + if let Some(n) = to_number(value) { + let frac = n.fract(); + return Some(if frac < 0.0 { (frac + 1.0) % 1.0 } else { frac }); + } + if let CellValue::String(s) = value { + let trimmed = s.trim(); + if let Some(dt) = parse_datetime_string(trimmed) { + return Some(dt.time().num_seconds_from_midnight() as f64 / SECONDS_PER_DAY); + } + if let Some(t) = parse_time_string(trimmed) { + return Some(t.num_seconds_from_midnight() as f64 / SECONDS_PER_DAY); + } + } + None +} + +pub(super) fn weekday_number(weekday: Weekday, return_type: i32) -> Option { + let num = match return_type { + 1 => weekday.num_days_from_sunday() as i64 + 1, + 2 => weekday.num_days_from_monday() as i64 + 1, + 3 => weekday.num_days_from_monday() as i64, + _ => return None, + }; + Some(num) +} + +pub(super) fn weeknum_value(date: NaiveDate, return_type: i32) -> Option { + let start_weekday = match return_type { + 1 => Weekday::Sun, + 2 => Weekday::Mon, + _ => return None, + }; + let first_day = NaiveDate::from_ymd_opt(date.year(), 1, 1)?; + let mut week_start = first_day; + while week_start.weekday() != start_weekday { + week_start = week_start.checked_sub_signed(Duration::days(1))?; + } + let days = (date - week_start).num_days(); + Some(days / 7 + 1) +} + +pub(super) fn last_day_of_month(date: NaiveDate) -> Option { + let year = date.year(); + let month = date.month(); + let first_next_month = if month == 12 { + NaiveDate::from_ymd_opt(year + 1, 1, 1)? + } else { + NaiveDate::from_ymd_opt(year, month + 1, 1)? + }; + first_next_month.checked_sub_signed(Duration::days(1)) +} + +pub(super) fn add_months(date: NaiveDate, months: i32) -> Option { + let mut year = date.year(); + let mut month_index = date.month0() as i32 + months; // 0-based + + year += month_index.div_euclid(12); + month_index = month_index.rem_euclid(12); + let month = (month_index + 1) as u32; + + let day = date.day(); + let mut result = NaiveDate::from_ymd_opt(year, month, day); + if result.is_none() { + // Clamp to last valid day of target month. + let last = last_day_of_month(NaiveDate::from_ymd_opt(year, month, 1)?)?; + result = Some(last); + } + result +} + +pub(super) async fn collect_holiday_dates( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result> { + let range = flatten_range_expr(ctx, current_sheet, expr).await?; + let mut out = Vec::new(); + for v in &range.values { + if let Some(n) = to_number(v) + && let Some(d) = serial_to_excel_date_1900(n) + { + out.push(d); + } + } + Ok(out) +} + +pub(super) fn is_business_day( + date: NaiveDate, + holidays: &[NaiveDate], + weekend: &WeekendConfig, +) -> bool { + if weekend.is_weekend(date.weekday()) { + return false; + } + !holidays.contains(&date) +} + +#[derive(Debug, Clone)] +pub(super) enum WeekendConfig { + Code(i32), + Pattern(String), +} + +impl Default for WeekendConfig { + fn default() -> Self { + WeekendConfig::Code(1) + } +} + +impl WeekendConfig { + pub(super) fn is_weekend(&self, weekday: Weekday) -> bool { + match self { + WeekendConfig::Code(c) => match c { + 1 => matches!(weekday, Weekday::Sat | Weekday::Sun), + 2 => matches!(weekday, Weekday::Sun | Weekday::Mon), + 3 => matches!(weekday, Weekday::Mon | Weekday::Tue), + 4 => matches!(weekday, Weekday::Tue | Weekday::Wed), + 5 => matches!(weekday, Weekday::Wed | Weekday::Thu), + 6 => matches!(weekday, Weekday::Thu | Weekday::Fri), + 7 => matches!(weekday, Weekday::Fri | Weekday::Sat), + 11 => matches!(weekday, Weekday::Sun), + 12 => matches!(weekday, Weekday::Mon), + 13 => matches!(weekday, Weekday::Tue), + 14 => matches!(weekday, Weekday::Wed), + 15 => matches!(weekday, Weekday::Thu), + 16 => matches!(weekday, Weekday::Fri), + 17 => matches!(weekday, Weekday::Sat), + _ => matches!(weekday, Weekday::Sat | Weekday::Sun), // Default + }, + WeekendConfig::Pattern(p) => { + if p.len() != 7 { + return matches!(weekday, Weekday::Sat | Weekday::Sun); + } + let idx = match weekday { + Weekday::Mon => 0, + Weekday::Tue => 1, + Weekday::Wed => 2, + Weekday::Thu => 3, + Weekday::Fri => 4, + Weekday::Sat => 5, + Weekday::Sun => 6, + }; + p.as_bytes().get(idx) == Some(&b'1') + }, + } + } +} + +pub(super) fn workday_core( + start: NaiveDate, + days: i64, + holidays: &[NaiveDate], + weekend: &WeekendConfig, +) -> Option { + if days == 0 { + return Some(start); + } + + let step = if days > 0 { 1 } else { -1 }; + let mut remaining = days.abs(); + let mut date = start; + + while remaining > 0 { + date = date.checked_add_signed(Duration::days(step))?; + if is_business_day(date, holidays, weekend) { + remaining -= 1; + } + } + + Some(date) +} + +pub(super) fn networkdays_core( + start: NaiveDate, + end: NaiveDate, + holidays: &[NaiveDate], + weekend: &WeekendConfig, +) -> i64 { + let (mut from, to, sign) = if start <= end { + (start, end, 1) + } else { + (end, start, -1) + }; + + let mut count = 0i64; + while from <= to { + if is_business_day(from, holidays, weekend) { + count += 1; + } + from = match from.checked_add_signed(Duration::days(1)) { + Some(d) => d, + None => break, + }; + } + + count * sign +} diff --git a/src/sheet/eval/engine/date_time/mod.rs b/crates/litchi-eval/src/engine/date_time/mod.rs similarity index 100% rename from src/sheet/eval/engine/date_time/mod.rs rename to crates/litchi-eval/src/engine/date_time/mod.rs diff --git a/src/sheet/eval/engine/date_time/offsets.rs b/crates/litchi-eval/src/engine/date_time/offsets.rs similarity index 96% rename from src/sheet/eval/engine/date_time/offsets.rs rename to crates/litchi-eval/src/engine/date_time/offsets.rs index 9f81a7b..f917a63 100644 --- a/src/sheet/eval/engine/date_time/offsets.rs +++ b/crates/litchi-eval/src/engine/date_time/offsets.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::EvalCtx; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::EvalCtx; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{ add_months, date_to_excel_serial_1900, last_day_of_month, number_arg, serial_to_excel_date_1900, @@ -114,8 +114,8 @@ pub(crate) async fn eval_eomonth( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/src/sheet/eval/engine/date_time/week.rs b/crates/litchi-eval/src/engine/date_time/week.rs similarity index 98% rename from src/sheet/eval/engine/date_time/week.rs rename to crates/litchi-eval/src/engine/date_time/week.rs index e48753e..06152ab 100644 --- a/src/sheet/eval/engine/date_time/week.rs +++ b/crates/litchi-eval/src/engine/date_time/week.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use chrono::Datelike; @@ -114,8 +114,8 @@ pub(crate) async fn eval_isoweeknum( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/src/sheet/eval/engine/date_time/workdays.rs b/crates/litchi-eval/src/engine/date_time/workdays.rs similarity index 97% rename from src/sheet/eval/engine/date_time/workdays.rs rename to crates/litchi-eval/src/engine/date_time/workdays.rs index 50b687b..ffb6b59 100644 --- a/src/sheet/eval/engine/date_time/workdays.rs +++ b/crates/litchi-eval/src/engine/date_time/workdays.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{ WeekendConfig, collect_holiday_dates, date_to_excel_serial_1900, networkdays_core, number_arg, @@ -260,8 +260,8 @@ pub(crate) async fn eval_networkdays_intl( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -310,7 +310,7 @@ mod tests { // Set up holidays engine.set_cell("Sheet1", 0, 1, CellValue::DateTime(45367.0)); // March 16 engine.set_cell("Sheet1", 1, 1, CellValue::DateTime(45368.0)); // March 17 - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, @@ -363,7 +363,7 @@ mod tests { // Set up holidays engine.set_cell("Sheet1", 0, 1, CellValue::DateTime(45367.0)); engine.set_cell("Sheet1", 1, 1, CellValue::DateTime(45368.0)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, diff --git a/src/sheet/eval/engine/dispatch.rs b/crates/litchi-eval/src/engine/dispatch.rs similarity index 99% rename from src/sheet/eval/engine/dispatch.rs rename to crates/litchi-eval/src/engine/dispatch.rs index 0ba6a70..ad185ca 100644 --- a/src/sheet/eval/engine/dispatch.rs +++ b/crates/litchi-eval/src/engine/dispatch.rs @@ -1,4 +1,4 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; use super::super::parser::Expr; use super::registry::{self, FUNCTION_MAP}; @@ -19,8 +19,8 @@ pub(super) async fn eval_function( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/src/sheet/eval/engine/engineering/complex.rs b/crates/litchi-eval/src/engine/engineering/complex.rs similarity index 92% rename from src/sheet/eval/engine/engineering/complex.rs rename to crates/litchi-eval/src/engine/engineering/complex.rs index 4abe909..9ca8fab 100644 --- a/src/sheet/eval/engine/engineering/complex.rs +++ b/crates/litchi-eval/src/engine/engineering/complex.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number, to_text}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use num_complex::Complex64; /// Parses an Excel complex number string (e.g., "3+4i", "j", "-2i") into a Complex64. @@ -526,7 +526,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -620,7 +620,7 @@ mod tests { #[tokio::test] async fn test_eval_complex_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.0), num_expr(4.0)]; let result = eval_complex(ctx, "Sheet1", &args).await.unwrap(); @@ -632,7 +632,7 @@ mod tests { #[tokio::test] async fn test_eval_complex_with_j() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.0), num_expr(4.0), str_expr("j")]; let result = eval_complex(ctx, "Sheet1", &args).await.unwrap(); @@ -644,7 +644,7 @@ mod tests { #[tokio::test] async fn test_eval_complex_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.0)]; let result = eval_complex(ctx, "Sheet1", &args).await.unwrap(); @@ -656,7 +656,7 @@ mod tests { #[tokio::test] async fn test_eval_imabs() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // |3+4i| = 5 let args = vec![str_expr("3+4i")]; @@ -669,7 +669,7 @@ mod tests { #[tokio::test] async fn test_eval_imreal() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("3+4i")]; let result = eval_imreal(ctx, "Sheet1", &args).await.unwrap(); @@ -681,7 +681,7 @@ mod tests { #[tokio::test] async fn test_eval_imaginary() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("3+4i")]; let result = eval_imaginary(ctx, "Sheet1", &args).await.unwrap(); @@ -693,7 +693,7 @@ mod tests { #[tokio::test] async fn test_eval_imargument() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // arg(1+0i) = 0 let args = vec![str_expr("1")]; @@ -706,7 +706,7 @@ mod tests { #[tokio::test] async fn test_eval_imconjugate() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // conj(3+4i) = 3-4i let args = vec![str_expr("3+4i")]; @@ -719,7 +719,7 @@ mod tests { #[tokio::test] async fn test_eval_imsum() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // (3+4i) + (1+2i) = 4+6i let args = vec![str_expr("3+4i"), str_expr("1+2i")]; @@ -732,7 +732,7 @@ mod tests { #[tokio::test] async fn test_eval_imsub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // (3+4i) - (1+2i) = 2+2i let args = vec![str_expr("3+4i"), str_expr("1+2i")]; @@ -745,7 +745,7 @@ mod tests { #[tokio::test] async fn test_eval_improduct() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // (3+4i) * (1+2i) = -5+10i let args = vec![str_expr("3+4i"), str_expr("1+2i")]; @@ -761,7 +761,7 @@ mod tests { #[tokio::test] async fn test_eval_imdiv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // (5+10i) / (1+2i) = 5 let args = vec![str_expr("5+10i"), str_expr("1+2i")]; @@ -774,7 +774,7 @@ mod tests { #[tokio::test] async fn test_eval_imdiv_by_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("3+4i"), str_expr("0")]; let result = eval_imdiv(ctx, "Sheet1", &args).await.unwrap(); @@ -786,7 +786,7 @@ mod tests { #[tokio::test] async fn test_eval_imsin() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("0")]; let result = eval_imsin(ctx, "Sheet1", &args).await.unwrap(); @@ -798,7 +798,7 @@ mod tests { #[tokio::test] async fn test_eval_imcos() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("0")]; let result = eval_imcos(ctx, "Sheet1", &args).await.unwrap(); @@ -810,7 +810,7 @@ mod tests { #[tokio::test] async fn test_eval_imsqrt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // sqrt(4) = 2 let args = vec![str_expr("4")]; @@ -823,7 +823,7 @@ mod tests { #[tokio::test] async fn test_eval_imexp() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // exp(0) = 1 let args = vec![str_expr("0")]; @@ -836,7 +836,7 @@ mod tests { #[tokio::test] async fn test_eval_impower() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // (3+4i)^2 = -7+24i let args = vec![str_expr("3+4i"), num_expr(2.0)]; @@ -856,7 +856,7 @@ mod tests { #[tokio::test] async fn test_eval_imln() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ln(1) = 0 let args = vec![str_expr("1")]; @@ -869,7 +869,7 @@ mod tests { #[tokio::test] async fn test_eval_imlog10() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // log10(10) = 1 let args = vec![str_expr("10")]; @@ -882,7 +882,7 @@ mod tests { #[tokio::test] async fn test_eval_imlog2() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // log2(2) = 1 let args = vec![str_expr("2")]; @@ -895,7 +895,7 @@ mod tests { #[tokio::test] async fn test_eval_complex_invalid_suffix() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.0), num_expr(4.0), str_expr("k")]; let result = eval_complex(ctx, "Sheet1", &args).await.unwrap(); @@ -907,7 +907,7 @@ mod tests { #[tokio::test] async fn test_eval_imabs_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("invalid")]; let result = eval_imabs(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/engineering/convert.rs b/crates/litchi-eval/src/engine/engineering/convert.rs similarity index 90% rename from src/sheet/eval/engine/engineering/convert.rs rename to crates/litchi-eval/src/engine/engineering/convert.rs index aacdc2d..f7e1f28 100644 --- a/src/sheet/eval/engine/engineering/convert.rs +++ b/crates/litchi-eval/src/engine/engineering/convert.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number, to_text}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use once_cell::sync::Lazy; use std::collections::HashMap; @@ -372,7 +372,7 @@ fn convert_temp(val: f64, from: &str, to: &str) -> f64 { #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -388,7 +388,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_weight() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 kg to grams let args = vec![num_expr(1.0), str_expr("kg"), str_expr("g")]; @@ -401,7 +401,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_distance() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 mile to meters let args = vec![num_expr(1.0), str_expr("mi"), str_expr("m")]; @@ -414,7 +414,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_time() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 hour to seconds let args = vec![num_expr(1.0), str_expr("hr"), str_expr("sec")]; @@ -427,7 +427,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_temperature_c_to_f() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 0°C to Fahrenheit (32°F) let args = vec![num_expr(0.0), str_expr("C"), str_expr("F")]; @@ -440,7 +440,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_temperature_f_to_c() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 212°F to Celsius (100°C) let args = vec![num_expr(212.0), str_expr("F"), str_expr("C")]; @@ -453,7 +453,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_temperature_c_to_k() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 0°C to Kelvin (273.15K) let args = vec![num_expr(0.0), str_expr("C"), str_expr("K")]; @@ -466,7 +466,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), str_expr("kg")]; let result = eval_convert(ctx, "Sheet1", &args).await.unwrap(); @@ -478,7 +478,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_invalid_unit() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), str_expr("kg"), str_expr("invalid")]; let result = eval_convert(ctx, "Sheet1", &args).await.unwrap(); @@ -490,7 +490,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_incompatible_units() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Can't convert weight to distance let args = vec![num_expr(1.0), str_expr("kg"), str_expr("m")]; @@ -503,7 +503,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_pressure() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 atm to Pa let args = vec![num_expr(1.0), str_expr("atm"), str_expr("Pa")]; @@ -516,7 +516,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_force() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 lbf to Newtons let args = vec![num_expr(1.0), str_expr("lbf"), str_expr("N")]; @@ -529,7 +529,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_power() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 HP to Watts let args = vec![num_expr(1.0), str_expr("HP"), str_expr("W")]; @@ -542,7 +542,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_volume() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Convert 1 gallon to liters let args = vec![num_expr(1.0), str_expr("gal"), str_expr("l")]; @@ -555,7 +555,7 @@ mod tests { #[tokio::test] async fn test_eval_convert_non_numeric() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("not a number"), str_expr("kg"), str_expr("g")]; let result = eval_convert(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/engineering/mod.rs b/crates/litchi-eval/src/engine/engineering/mod.rs similarity index 100% rename from src/sheet/eval/engine/engineering/mod.rs rename to crates/litchi-eval/src/engine/engineering/mod.rs diff --git a/src/sheet/eval/engine/engineering/special.rs b/crates/litchi-eval/src/engine/engineering/special.rs similarity index 88% rename from src/sheet/eval/engine/engineering/special.rs rename to crates/litchi-eval/src/engine/engineering/special.rs index 922cb0f..71a42df 100644 --- a/src/sheet/eval/engine/engineering/special.rs +++ b/crates/litchi-eval/src/engine/engineering/special.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use statrs::function::erf::{erf, erfc}; pub(crate) async fn eval_erf( @@ -139,7 +139,7 @@ pub(crate) async fn eval_bessely( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -151,7 +151,7 @@ mod tests { #[tokio::test] async fn test_eval_erf_single_arg() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ERF(0) = 0 let args = vec![num_expr(0.0)]; @@ -164,7 +164,7 @@ mod tests { #[tokio::test] async fn test_eval_erf_positive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ERF(1) ≈ 0.8427 let args = vec![num_expr(1.0)]; @@ -177,7 +177,7 @@ mod tests { #[tokio::test] async fn test_eval_erf_two_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ERF(0, 1) = ERF(1) - ERF(0) ≈ 0.8427 let args = vec![num_expr(0.0), num_expr(1.0)]; @@ -190,7 +190,7 @@ mod tests { #[tokio::test] async fn test_eval_erf_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_erf(ctx, "Sheet1", &args).await.unwrap(); @@ -202,7 +202,7 @@ mod tests { #[tokio::test] async fn test_eval_erfc() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ERFC(0) = 1 let args = vec![num_expr(0.0)]; @@ -215,7 +215,7 @@ mod tests { #[tokio::test] async fn test_eval_erfc_positive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ERFC(1) ≈ 0.1573 let args = vec![num_expr(1.0)]; @@ -228,7 +228,7 @@ mod tests { #[tokio::test] async fn test_eval_erfc_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0)]; let result = eval_erfc(ctx, "Sheet1", &args).await.unwrap(); @@ -240,7 +240,7 @@ mod tests { #[tokio::test] async fn test_eval_besseli() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // BESSELI is a stub returning 0.0 let args = vec![num_expr(1.0), num_expr(0.0)]; @@ -253,7 +253,7 @@ mod tests { #[tokio::test] async fn test_eval_besseli_negative_order() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(-1.0)]; let result = eval_besseli(ctx, "Sheet1", &args).await.unwrap(); @@ -265,7 +265,7 @@ mod tests { #[tokio::test] async fn test_eval_besselj() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // BESSELJ is a stub returning 0.0 let args = vec![num_expr(1.0), num_expr(0.0)]; @@ -278,7 +278,7 @@ mod tests { #[tokio::test] async fn test_eval_besselk_positive_x() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // BESSELK requires x > 0 let args = vec![num_expr(1.0), num_expr(0.0)]; @@ -291,7 +291,7 @@ mod tests { #[tokio::test] async fn test_eval_besselk_non_positive_x() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0), num_expr(0.0)]; let result = eval_besselk(ctx, "Sheet1", &args).await.unwrap(); @@ -303,7 +303,7 @@ mod tests { #[tokio::test] async fn test_eval_bessely_positive_x() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // BESSELY requires x > 0 let args = vec![num_expr(1.0), num_expr(0.0)]; @@ -316,7 +316,7 @@ mod tests { #[tokio::test] async fn test_eval_bessely_non_positive_x() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0), num_expr(0.0)]; let result = eval_bessely(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/financial/bond.rs b/crates/litchi-eval/src/engine/financial/bond.rs similarity index 92% rename from src/sheet/eval/engine/financial/bond.rs rename to crates/litchi-eval/src/engine/financial/bond.rs index 0cbc770..fe5c68a 100644 --- a/src/sheet/eval/engine/financial/bond.rs +++ b/crates/litchi-eval/src/engine/financial/bond.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::EvalCtx; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::EvalCtx; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{number_arg, solve_irr}; @@ -588,7 +588,7 @@ pub(crate) async fn eval_received( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -600,7 +600,7 @@ mod tests { #[tokio::test] async fn test_eval_yield_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // YIELD with settlement=0, maturity=365, rate=5%, pr=95, redemption=100, frequency=2 let args = vec![ @@ -620,7 +620,7 @@ mod tests { #[tokio::test] async fn test_eval_yield_with_basis() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // YIELD with basis=1 (actual/actual) let args = vec![ @@ -641,7 +641,7 @@ mod tests { #[tokio::test] async fn test_eval_yield_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0), num_expr(365.0)]; let result = eval_yield(ctx, "Sheet1", &args).await.unwrap(); @@ -653,7 +653,7 @@ mod tests { #[tokio::test] async fn test_eval_yield_invalid_frequency() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -672,7 +672,7 @@ mod tests { #[tokio::test] async fn test_eval_yield_maturity_before_settlement() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(365.0), @@ -691,7 +691,7 @@ mod tests { #[tokio::test] async fn test_eval_duration_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // DURATION with settlement=0, maturity=730 (2 years), coupon=5%, yield=6%, frequency=2 let args = vec![ @@ -711,7 +711,7 @@ mod tests { #[tokio::test] async fn test_eval_duration_with_basis() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -730,7 +730,7 @@ mod tests { #[tokio::test] async fn test_eval_duration_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0), num_expr(365.0)]; let result = eval_duration(ctx, "Sheet1", &args).await.unwrap(); @@ -742,7 +742,7 @@ mod tests { #[tokio::test] async fn test_eval_duration_invalid_frequency() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -760,7 +760,7 @@ mod tests { #[tokio::test] async fn test_eval_accrint_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ACCRINT: issue=0, first_interest=180, settlement=90, rate=5%, par=1000 let args = vec![ @@ -783,7 +783,7 @@ mod tests { #[tokio::test] async fn test_eval_accrint_with_basis() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ACCRINT with basis=1 (actual/365) let args = vec![ @@ -806,7 +806,7 @@ mod tests { #[tokio::test] async fn test_eval_accrint_invalid_rate() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -824,7 +824,7 @@ mod tests { #[tokio::test] async fn test_eval_accrint_settlement_before_issue() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(90.0), @@ -842,7 +842,7 @@ mod tests { #[tokio::test] async fn test_eval_accrintm_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ACCRINTM: issue=0, settlement=180, rate=5%, par=1000 let args = vec![ @@ -864,7 +864,7 @@ mod tests { #[tokio::test] async fn test_eval_accrintm_default_par() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ACCRINTM with default par=1000 let args = vec![num_expr(0.0), num_expr(180.0), num_expr(0.05)]; @@ -877,7 +877,7 @@ mod tests { #[tokio::test] async fn test_eval_yielddisc_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // YIELDDISC: settlement=0, maturity=180, pr=95, redemption=100 let args = vec![ @@ -896,7 +896,7 @@ mod tests { #[tokio::test] async fn test_eval_yielddisc_invalid_price() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -913,7 +913,7 @@ mod tests { #[tokio::test] async fn test_eval_yielddisc_settlement_after_maturity() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(180.0), @@ -930,7 +930,7 @@ mod tests { #[tokio::test] async fn test_eval_yieldmat_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // YIELDMAT: settlement=90, maturity=365, issue=0, rate=5%, pr=98 let args = vec![ @@ -949,7 +949,7 @@ mod tests { #[tokio::test] async fn test_eval_yieldmat_invalid_dates() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Settlement after maturity let args = vec![ @@ -968,7 +968,7 @@ mod tests { #[tokio::test] async fn test_eval_disc_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // DISC: settlement=0, maturity=180, pr=95, redemption=100 let args = vec![ @@ -987,7 +987,7 @@ mod tests { #[tokio::test] async fn test_eval_disc_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -1004,7 +1004,7 @@ mod tests { #[tokio::test] async fn test_eval_intrate_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // INTRATE: settlement=0, maturity=180, investment=95000, redemption=100000 let args = vec![ @@ -1023,7 +1023,7 @@ mod tests { #[tokio::test] async fn test_eval_intrate_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(0.0), @@ -1040,7 +1040,7 @@ mod tests { #[tokio::test] async fn test_eval_coupdaybs_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_coupdaybs(ctx, "Sheet1", &args).await.unwrap(); @@ -1052,7 +1052,7 @@ mod tests { #[tokio::test] async fn test_eval_coupdays_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_coupdays(ctx, "Sheet1", &args).await.unwrap(); @@ -1064,7 +1064,7 @@ mod tests { #[tokio::test] async fn test_eval_coupdaysnc_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_coupdaysnc(ctx, "Sheet1", &args).await.unwrap(); @@ -1076,7 +1076,7 @@ mod tests { #[tokio::test] async fn test_eval_coupncd_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_coupncd(ctx, "Sheet1", &args).await.unwrap(); @@ -1088,7 +1088,7 @@ mod tests { #[tokio::test] async fn test_eval_coupnum_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_coupnum(ctx, "Sheet1", &args).await.unwrap(); @@ -1100,7 +1100,7 @@ mod tests { #[tokio::test] async fn test_eval_couppcd_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_couppcd(ctx, "Sheet1", &args).await.unwrap(); @@ -1112,7 +1112,7 @@ mod tests { #[tokio::test] async fn test_eval_amordegrc_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_amordegrc(ctx, "Sheet1", &args).await.unwrap(); @@ -1124,7 +1124,7 @@ mod tests { #[tokio::test] async fn test_eval_amorlinc_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_amorlinc(ctx, "Sheet1", &args).await.unwrap(); @@ -1136,7 +1136,7 @@ mod tests { #[tokio::test] async fn test_eval_pricedisc_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_pricedisc(ctx, "Sheet1", &args).await.unwrap(); @@ -1148,7 +1148,7 @@ mod tests { #[tokio::test] async fn test_eval_pricemat_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_pricemat(ctx, "Sheet1", &args).await.unwrap(); @@ -1160,7 +1160,7 @@ mod tests { #[tokio::test] async fn test_eval_received_stub() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_received(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/financial/cashflows.rs b/crates/litchi-eval/src/engine/financial/cashflows.rs similarity index 93% rename from src/sheet/eval/engine/financial/cashflows.rs rename to crates/litchi-eval/src/engine/financial/cashflows.rs index 7bddfe0..54f7f38 100644 --- a/src/sheet/eval/engine/financial/cashflows.rs +++ b/crates/litchi-eval/src/engine/financial/cashflows.rs @@ -1,9 +1,9 @@ -use crate::sheet::CellValue; -use crate::sheet::Result; -use crate::sheet::eval::engine::{ +use crate::engine::{ EvalCtx, evaluate_expression, flatten_range_expr, for_each_value_in_expr, to_bool, to_number, }; -use crate::sheet::eval::parser::Expr; +use crate::parser::Expr; +use litchi_core::sheet::CellValue; +use litchi_core::sheet::Result; use super::helpers::{ future_value, number_arg, present_value, solve_irr, solve_rate, solve_xirr, xnpv, @@ -1215,7 +1215,7 @@ pub(crate) async fn eval_vdb( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -1227,7 +1227,7 @@ mod tests { #[tokio::test] async fn test_eval_pv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // PV of $1000 payments for 5 years at 5% let args = vec![num_expr(0.05), num_expr(5.0), num_expr(-1000.0)]; @@ -1240,7 +1240,7 @@ mod tests { #[tokio::test] async fn test_eval_fv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // FV of $1000 payments for 5 years at 5% let args = vec![num_expr(0.05), num_expr(5.0), num_expr(-1000.0)]; @@ -1253,7 +1253,7 @@ mod tests { #[tokio::test] async fn test_eval_pmt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // PMT to pay off $10000 over 5 years at 5% let args = vec![num_expr(0.05), num_expr(5.0), num_expr(10000.0)]; @@ -1266,7 +1266,7 @@ mod tests { #[tokio::test] async fn test_eval_nper() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // NPER to pay off $10000 loan with $2309.75 payments at 5% // PV is negative (loan received), PMT is positive (payment made) @@ -1280,7 +1280,7 @@ mod tests { #[tokio::test] async fn test_eval_sln() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Straight-line depreciation: cost=10000, salvage=1000, life=5 let args = vec![num_expr(10000.0), num_expr(1000.0), num_expr(5.0)]; @@ -1293,7 +1293,7 @@ mod tests { #[tokio::test] async fn test_eval_syd() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Sum of years' digits depreciation: cost=10000, salvage=1000, life=5, period=1 let args = vec![ @@ -1311,7 +1311,7 @@ mod tests { #[tokio::test] async fn test_eval_ddb() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Double declining balance: cost=10000, salvage=1000, life=5, period=1 let args = vec![ @@ -1329,7 +1329,7 @@ mod tests { #[tokio::test] async fn test_eval_nominal() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // NOMINAL rate from 5% effective rate with 4 periods/year let args = vec![num_expr(0.05), num_expr(4.0)]; @@ -1342,7 +1342,7 @@ mod tests { #[tokio::test] async fn test_eval_effect() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // EFFECT rate from 5% nominal rate with 4 periods/year let args = vec![num_expr(0.05), num_expr(4.0)]; @@ -1355,7 +1355,7 @@ mod tests { #[tokio::test] async fn test_eval_rri() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // RRI for 10000 growing to 15000 over 5 periods let args = vec![num_expr(5.0), num_expr(10000.0), num_expr(15000.0)]; @@ -1368,7 +1368,7 @@ mod tests { #[tokio::test] async fn test_eval_pduration() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // PDURATION for 10000 growing to 15000 at 8.4472% let args = vec![num_expr(0.084472), num_expr(10000.0), num_expr(15000.0)]; @@ -1381,7 +1381,7 @@ mod tests { #[tokio::test] async fn test_eval_npv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // NPV with 10% rate and cash flows -1000, 300, 400, 500 // NPV = -1000/(1.1)^1 + 300/(1.1)^2 + 400/(1.1)^3 + 500/(1.1)^4 @@ -1402,7 +1402,7 @@ mod tests { #[tokio::test] async fn test_eval_ispmt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // ISPMT for a loan at 10% rate, period 1 of 4, PV=10000 let args = vec![ @@ -1420,7 +1420,7 @@ mod tests { #[tokio::test] async fn test_eval_fvschedule() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // FVSCHEDULE with principal 10000 and single rate 0.05 // Note: Multi-rate schedules require range expressions which need @@ -1438,7 +1438,7 @@ mod tests { #[tokio::test] async fn test_eval_dollarde() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // DOLLARDE: 1.02 with fraction 16 = 1 + 2/16 = 1.125 let args = vec![num_expr(1.02), num_expr(16.0)]; @@ -1451,7 +1451,7 @@ mod tests { #[tokio::test] async fn test_eval_dollarfr() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // DOLLARFR: 1.125 with fraction 16 = 1.02 let args = vec![num_expr(1.125), num_expr(16.0)]; @@ -1466,7 +1466,7 @@ mod tests { #[tokio::test] async fn test_eval_db() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // DB: cost=10000, salvage=1000, life=5, period=1, month=12 let args = vec![ @@ -1488,7 +1488,7 @@ mod tests { #[tokio::test] async fn test_eval_db_default_month() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // DB with default month=12 let args = vec![ @@ -1506,7 +1506,7 @@ mod tests { #[tokio::test] async fn test_eval_db_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10000.0), num_expr(1000.0)]; let result = eval_db(ctx, "Sheet1", &args).await.unwrap(); @@ -1518,7 +1518,7 @@ mod tests { #[tokio::test] async fn test_eval_ipmt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // IPMT for period 1 of 5-year loan at 5% with PV=10000 let args = vec![ @@ -1539,7 +1539,7 @@ mod tests { #[tokio::test] async fn test_eval_ipmt_with_fv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // IPMT with FV and type (beginning of period) let args = vec![ @@ -1559,7 +1559,7 @@ mod tests { #[tokio::test] async fn test_eval_ppmt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // PPMT for period 1 of 5-year loan at 5% with PV=10000 let args = vec![ @@ -1580,7 +1580,7 @@ mod tests { #[tokio::test] async fn test_eval_rate() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // RATE for 5-year loan with payments of 2309.75 and PV=10000 let args = vec![num_expr(5.0), num_expr(-2309.75), num_expr(10000.0)]; @@ -1596,7 +1596,7 @@ mod tests { #[tokio::test] async fn test_eval_rate_with_fv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // RATE with FV and guess let args = vec![ @@ -1623,7 +1623,7 @@ mod tests { #[tokio::test] async fn test_eval_irr_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // IRR with no args let args: Vec = vec![]; @@ -1636,7 +1636,7 @@ mod tests { #[tokio::test] async fn test_eval_mirr_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // MIRR with no args let args: Vec = vec![]; @@ -1649,7 +1649,7 @@ mod tests { #[tokio::test] async fn test_eval_xirr_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // XIRR with 1 arg let args = vec![num_expr(-1000.0)]; @@ -1662,7 +1662,7 @@ mod tests { #[tokio::test] async fn test_eval_xnpv_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // XNPV with 2 args let args = vec![num_expr(0.05), num_expr(-1000.0)]; @@ -1675,7 +1675,7 @@ mod tests { #[tokio::test] async fn test_eval_vdb() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // VDB: cost=10000, salvage=1000, life=5, start_period=0, end_period=1 let args = vec![ @@ -1697,7 +1697,7 @@ mod tests { #[tokio::test] async fn test_eval_vdb_full() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // VDB with all optional arguments let args = vec![ @@ -1718,7 +1718,7 @@ mod tests { #[tokio::test] async fn test_eval_vdb_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10000.0), num_expr(1000.0), num_expr(5.0)]; let result = eval_vdb(ctx, "Sheet1", &args).await.unwrap(); @@ -1732,7 +1732,7 @@ mod tests { #[tokio::test] async fn test_eval_pv_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_pv(ctx, "Sheet1", &args).await.unwrap(); @@ -1744,7 +1744,7 @@ mod tests { #[tokio::test] async fn test_eval_fv_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_fv(ctx, "Sheet1", &args).await.unwrap(); @@ -1756,7 +1756,7 @@ mod tests { #[tokio::test] async fn test_eval_pmt_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_pmt(ctx, "Sheet1", &args).await.unwrap(); @@ -1768,7 +1768,7 @@ mod tests { #[tokio::test] async fn test_eval_npv_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_npv(ctx, "Sheet1", &args).await.unwrap(); @@ -1780,7 +1780,7 @@ mod tests { #[tokio::test] async fn test_eval_sln_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10000.0)]; let result = eval_sln(ctx, "Sheet1", &args).await.unwrap(); @@ -1792,7 +1792,7 @@ mod tests { #[tokio::test] async fn test_eval_syd_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10000.0), num_expr(1000.0)]; let result = eval_syd(ctx, "Sheet1", &args).await.unwrap(); @@ -1804,7 +1804,7 @@ mod tests { #[tokio::test] async fn test_eval_ddb_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10000.0)]; let result = eval_ddb(ctx, "Sheet1", &args).await.unwrap(); @@ -1816,7 +1816,7 @@ mod tests { #[tokio::test] async fn test_eval_nominal_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_nominal(ctx, "Sheet1", &args).await.unwrap(); @@ -1828,7 +1828,7 @@ mod tests { #[tokio::test] async fn test_eval_effect_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_effect(ctx, "Sheet1", &args).await.unwrap(); @@ -1840,7 +1840,7 @@ mod tests { #[tokio::test] async fn test_eval_rri_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0)]; let result = eval_rri(ctx, "Sheet1", &args).await.unwrap(); @@ -1852,7 +1852,7 @@ mod tests { #[tokio::test] async fn test_eval_pduration_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_pduration(ctx, "Sheet1", &args).await.unwrap(); @@ -1864,7 +1864,7 @@ mod tests { #[tokio::test] async fn test_eval_dollarde_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.02)]; let result = eval_dollarde(ctx, "Sheet1", &args).await.unwrap(); @@ -1876,7 +1876,7 @@ mod tests { #[tokio::test] async fn test_eval_dollarfr_invalid_fraction() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.125), num_expr(0.0)]; // Invalid fraction let result = eval_dollarfr(ctx, "Sheet1", &args).await.unwrap(); @@ -1888,7 +1888,7 @@ mod tests { #[tokio::test] async fn test_eval_fvschedule_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10000.0)]; let result = eval_fvschedule(ctx, "Sheet1", &args).await.unwrap(); @@ -1900,7 +1900,7 @@ mod tests { #[tokio::test] async fn test_eval_ipmt_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_ipmt(ctx, "Sheet1", &args).await.unwrap(); @@ -1912,7 +1912,7 @@ mod tests { #[tokio::test] async fn test_eval_ppmt_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.05)]; let result = eval_ppmt(ctx, "Sheet1", &args).await.unwrap(); @@ -1924,7 +1924,7 @@ mod tests { #[tokio::test] async fn test_eval_rate_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0)]; let result = eval_rate(ctx, "Sheet1", &args).await.unwrap(); @@ -1936,12 +1936,12 @@ mod tests { #[tokio::test] async fn test_eval_irr_requires_mixed_cash_flows() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Set up cells with all positive cash flows engine.set_cell("Sheet1", 0, 0, CellValue::Float(1000.0)); engine.set_cell("Sheet1", 1, 0, CellValue::Float(100.0)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -1959,12 +1959,12 @@ mod tests { #[tokio::test] async fn test_eval_mirr_requires_mixed_cash_flows() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Set up cells with all positive cash flows engine.set_cell("Sheet1", 0, 0, CellValue::Float(1000.0)); engine.set_cell("Sheet1", 1, 0, CellValue::Float(100.0)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, diff --git a/crates/litchi-eval/src/engine/financial/helpers.rs b/crates/litchi-eval/src/engine/financial/helpers.rs new file mode 100644 index 0000000..2aacd68 --- /dev/null +++ b/crates/litchi-eval/src/engine/financial/helpers.rs @@ -0,0 +1,330 @@ +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::Result; + +pub(super) async fn number_arg( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result> { + let v = evaluate_expression(ctx, current_sheet, expr).await?; + Ok(to_number(&v)) +} + +pub(super) fn present_value(rate: f64, nper: f64, pmt: f64, fv: f64, typ: f64) -> f64 { + if rate.abs() < 1e-10 { + -(pmt * nper + fv) + } else { + let r1 = 1.0 + rate; + let factor = r1.powf(-nper); + let pv_pmt = pmt * (1.0 + rate * typ) * (1.0 - factor) / rate; + let pv_fv = fv * factor; + -(pv_pmt + pv_fv) + } +} + +pub(super) fn future_value(rate: f64, nper: f64, pmt: f64, pv: f64, typ: f64) -> f64 { + if rate.abs() < 1e-10 { + -(pv + pmt * nper) + } else { + let r1 = 1.0 + rate; + let factor = r1.powf(nper); + let fv_pv = pv * factor; + let fv_pmt = pmt * (1.0 + rate * typ) * (factor - 1.0) / rate; + -(fv_pv + fv_pmt) + } +} + +pub(super) fn solve_rate( + nper: f64, + pmt: f64, + pv: f64, + fv: f64, + typ: f64, + guess: f64, +) -> Option { + let mut rate = guess; + let max_iter = 100; + let tol = 1e-10; + + for _ in 0..max_iter { + let f = rate_function(rate, nper, pmt, pv, fv, typ); + if f.abs() < tol { + return Some(rate); + } + let deriv = numerical_derivative(|r| rate_function(r, nper, pmt, pv, fv, typ), rate); + if deriv.abs() < 1e-12 { + break; + } + let new_rate = rate - f / deriv; + if !new_rate.is_finite() { + break; + } + rate = new_rate; + } + + None +} + +pub(super) fn solve_irr(cash_flows: &[f64], guess: f64) -> Option { + let mut rate = guess; + let max_iter = 100; + let tol = 1e-10; + + for _ in 0..max_iter { + let f = npv_for_irr(rate, cash_flows); + if f.abs() < tol { + return Some(rate); + } + let deriv = numerical_derivative(|r| npv_for_irr(r, cash_flows), rate); + if deriv.abs() < 1e-12 { + break; + } + let new_rate = rate - f / deriv; + if !new_rate.is_finite() { + break; + } + rate = new_rate; + } + + None +} + +pub(super) fn xnpv(rate: f64, cash_flows: &[f64], dates: &[f64]) -> f64 { + let base_date = dates[0]; + let mut total = 0.0; + for (cf, d) in cash_flows.iter().zip(dates.iter()) { + let t = (d - base_date) / 365.0; + let denom = (1.0 + rate).powf(t); + total += cf / denom; + } + total +} + +pub(super) fn solve_xirr(cash_flows: &[f64], dates: &[f64], guess: f64) -> Option { + let mut rate = guess; + let max_iter = 100; + let tol = 1e-10; + + for _ in 0..max_iter { + let f = xnpv(rate, cash_flows, dates); + if f.abs() < tol { + return Some(rate); + } + let deriv = numerical_derivative(|r| xnpv(r, cash_flows, dates), rate); + if deriv.abs() < 1e-12 { + break; + } + let new_rate = rate - f / deriv; + if !new_rate.is_finite() { + break; + } + rate = new_rate; + } + + None +} + +fn rate_function(rate: f64, nper: f64, pmt: f64, pv: f64, fv: f64, typ: f64) -> f64 { + if rate.abs() < 1e-10 { + pv + pmt * nper + fv + } else { + let r1 = 1.0 + rate; + let factor = r1.powf(-nper); + let term1 = pmt * (1.0 + rate * typ) * (1.0 - factor) / rate; + let term2 = fv * factor; + pv + term1 + term2 + } +} + +fn npv_for_irr(rate: f64, cash_flows: &[f64]) -> f64 { + let mut total = 0.0; + for (i, cf) in cash_flows.iter().enumerate() { + let t = i as f64; + let denom = (1.0 + rate).powf(t); + total += cf / denom; + } + total +} + +fn numerical_derivative(f: F, x: f64) -> f64 +where + F: Fn(f64) -> f64, +{ + let h = 1e-5; + let f1 = f(x + h); + let f2 = f(x - h); + (f1 - f2) / (2.0 * h) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_present_value_zero_rate() { + // PV with zero rate: -(pmt * nper + fv) + let pv = present_value(0.0, 10.0, -100.0, 1000.0, 0.0); + // -( -100 * 10 + 1000 ) = -( -1000 + 1000 ) = 0 + assert!((pv - 0.0).abs() < 1e-9); + } + + #[test] + fn test_present_value_with_rate() { + // PV of annuity: 10 periods, $100 payment, 5% rate + let pv = present_value(0.05, 10.0, -100.0, 0.0, 0.0); + // Expected: 100 * (1 - 1.05^-10) / 0.05 ≈ 772.17 + assert!((pv - 772.17).abs() < 1.0); + } + + #[test] + fn test_present_value_with_fv() { + // PV with future value + let pv = present_value(0.05, 10.0, 0.0, 1000.0, 0.0); + // Expected: -1000 / 1.05^10 ≈ -613.91 + assert!((pv - (-613.91)).abs() < 1.0); + } + + #[test] + fn test_present_value_with_type() { + // PV with beginning-of-period payments (typ=1) + let pv = present_value(0.05, 10.0, -100.0, 0.0, 1.0); + // Expected: 100 * (1 + 0.05) * (1 - 1.05^-10) / 0.05 ≈ 810.78 + assert!((pv - 810.78).abs() < 1.0); + } + + #[test] + fn test_future_value_zero_rate() { + // FV with zero rate: -(pv + pmt * nper) + let fv = future_value(0.0, 10.0, -100.0, -1000.0, 0.0); + // -( -1000 + (-100) * 10 ) = -( -1000 - 1000 ) = 2000 + assert!((fv - 2000.0).abs() < 1e-9); + } + + #[test] + fn test_future_value_with_rate() { + // FV of annuity: 10 periods, $100 payment, 5% rate + let fv = future_value(0.05, 10.0, -100.0, 0.0, 0.0); + // Expected: 100 * (1.05^10 - 1) / 0.05 ≈ 1257.79 + assert!((fv - 1257.79).abs() < 1.0); + } + + #[test] + fn test_future_value_with_pv() { + // FV with present value + let fv = future_value(0.05, 10.0, 0.0, -1000.0, 0.0); + // Expected: 1000 * 1.05^10 ≈ 1628.89 + assert!((fv - 1628.89).abs() < 1.0); + } + + #[test] + fn test_future_value_with_type() { + // FV with beginning-of-period payments (typ=1) + let fv = future_value(0.05, 10.0, -100.0, 0.0, 1.0); + // Expected: 100 * (1 + 0.05) * (1.05^10 - 1) / 0.05 ≈ 1320.68 + assert!((fv - 1320.68).abs() < 1.0); + } + + #[test] + fn test_solve_rate_basic() { + // Solve for rate given nper=10, pmt=-100, pv=772.17, fv=0 + let rate = solve_rate(10.0, -100.0, 772.17, 0.0, 0.0, 0.1); + assert!(rate.is_some()); + let r = rate.unwrap(); + assert!((r - 0.05).abs() < 0.01); + } + + #[test] + fn test_solve_rate_with_fv() { + // Solve for rate given nper=10, pmt=0, pv=-1000, fv=1628.89 + let rate = solve_rate(10.0, 0.0, -1000.0, 1628.89, 0.0, 0.1); + assert!(rate.is_some()); + let r = rate.unwrap(); + assert!((r - 0.05).abs() < 0.01); + } + + #[test] + fn test_solve_irr_basic() { + // IRR for cash flows: -1000, 300, 400, 500 + // This should give a positive rate + let cash_flows = vec![-1000.0, 300.0, 400.0, 500.0]; + let irr = solve_irr(&cash_flows, 0.1); + assert!(irr.is_some()); + let r = irr.unwrap(); + assert!(r > 0.0 && r < 0.5); + } + + #[test] + fn test_solve_irr_simple() { + // Simple IRR: -1000, 1100 after 1 period -> 10% + let cash_flows = vec![-1000.0, 1100.0]; + let irr = solve_irr(&cash_flows, 0.1); + assert!(irr.is_some()); + let r = irr.unwrap(); + assert!((r - 0.10).abs() < 0.01); + } + + #[test] + fn test_solve_irr_no_solution() { + // All positive cash flows - no IRR + let cash_flows = vec![1000.0, 100.0, 200.0]; + let _irr = solve_irr(&cash_flows, 0.1); + // May converge to something or return None + // Just ensure it doesn't panic + } + + #[test] + fn test_xnpv_basic() { + // XNPV with dates 0, 365, 730 days + let cash_flows = vec![-1000.0, 300.0, 800.0]; + let dates = vec![0.0, 365.0, 730.0]; + let npv = xnpv(0.05, &cash_flows, &dates); + // NPV = -1000 + 300/1.05^1 + 800/1.05^2 + assert!(npv.is_finite()); + assert!(npv > -1000.0); + } + + #[test] + fn test_xnpv_zero_rate() { + let cash_flows = vec![-1000.0, 300.0, 800.0]; + let dates = vec![0.0, 365.0, 730.0]; + let npv = xnpv(0.0, &cash_flows, &dates); + // With zero rate, just sum of cash flows + assert!((npv - 100.0).abs() < 1e-9); + } + + #[test] + fn test_solve_xirr_basic() { + // XIRR for cash flows over 2 years + let cash_flows = vec![-1000.0, 600.0, 600.0]; + let dates = vec![0.0, 365.0, 730.0]; + let xirr = solve_xirr(&cash_flows, &dates, 0.1); + assert!(xirr.is_some()); + let r = xirr.unwrap(); + assert!(r > 0.0 && r < 0.5); + } + + #[test] + fn test_numerical_derivative_linear() { + // Derivative of f(x) = 2x + 3 is 2 + let f = |x: f64| 2.0 * x + 3.0; + let deriv = numerical_derivative(f, 1.0); + assert!((deriv - 2.0).abs() < 1e-4); + } + + #[test] + fn test_numerical_derivative_quadratic() { + // Derivative of f(x) = x^2 is 2x + let f = |x: f64| x * x; + let deriv_at_3 = numerical_derivative(f, 3.0); + assert!((deriv_at_3 - 6.0).abs() < 1e-4); + } + + #[test] + fn test_numerical_derivative_exp() { + // Derivative of f(x) = e^x is e^x + let f = |x: f64| x.exp(); + let deriv_at_0 = numerical_derivative(f, 0.0); + assert!((deriv_at_0 - 1.0).abs() < 1e-4); + } +} diff --git a/src/sheet/eval/engine/financial/mod.rs b/crates/litchi-eval/src/engine/financial/mod.rs similarity index 100% rename from src/sheet/eval/engine/financial/mod.rs rename to crates/litchi-eval/src/engine/financial/mod.rs diff --git a/crates/litchi-eval/src/engine/info/mod.rs b/crates/litchi-eval/src/engine/info/mod.rs new file mode 100644 index 0000000..9754e22 --- /dev/null +++ b/crates/litchi-eval/src/engine/info/mod.rs @@ -0,0 +1,1187 @@ +use litchi_core::sheet::{CellValue, Result}; + +use super::super::parser::Expr; +use super::{EvalCtx, ResolvedName, evaluate_expression, is_blank, to_number}; + +const EPS: f64 = 1e-12; + +pub(crate) fn error_code(value: &CellValue) -> Option<&str> { + match value { + CellValue::Error(code) => Some(code.as_str()), + _ => None, + } +} + +pub(crate) fn is_na_error(code: &str) -> bool { + code.eq_ignore_ascii_case("#N/A") +} + +fn is_even_number(value: f64) -> bool { + ((value / 2.0).fract()).abs() < EPS +} + +pub(crate) enum ReferenceKind { + Single { sheet: String, row: u32, col: u32 }, + Range, + None, + Error(CellValue), +} + +fn single_cell_from_range( + sheet: &str, + start_row: u32, + end_row: u32, + start_col: u32, + end_col: u32, +) -> Option<(String, u32, u32)> { + let (sr, er) = if start_row <= end_row { + (start_row, end_row) + } else { + (end_row, start_row) + }; + let (sc, ec) = if start_col <= end_col { + (start_col, end_col) + } else { + (end_col, start_col) + }; + if sr == er && sc == ec { + Some((sheet.to_string(), sr, sc)) + } else { + None + } +} + +pub(crate) fn classify_reference( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result { + match expr { + Expr::Reference { sheet, row, col } => Ok(ReferenceKind::Single { + sheet: sheet.clone(), + row: *row, + col: *col, + }), + Expr::Range(range) => { + if let Some((sheet, row, col)) = single_cell_from_range( + range.sheet.as_str(), + range.start_row, + range.end_row, + range.start_col, + range.end_col, + ) { + Ok(ReferenceKind::Single { sheet, row, col }) + } else { + Ok(ReferenceKind::Range) + } + }, + Expr::Name(name) => match ctx.resolve_name(current_sheet, name.as_str())? { + Some(ResolvedName::Cell { sheet, row, col }) => { + Ok(ReferenceKind::Single { sheet, row, col }) + }, + Some(ResolvedName::Range(range)) => { + if let Some((sheet, row, col)) = single_cell_from_range( + range.sheet.as_str(), + range.start_row, + range.end_row, + range.start_col, + range.end_col, + ) { + Ok(ReferenceKind::Single { sheet, row, col }) + } else { + Ok(ReferenceKind::Range) + } + }, + None => Ok(ReferenceKind::Error(CellValue::Error(format!( + "Unknown name: {}", + name + )))), + }, + _ => Ok(ReferenceKind::None), + } +} + +pub(crate) async fn eval_isblank( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISBLANK expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + Ok(CellValue::Bool(is_blank(&v))) +} + +pub(crate) async fn eval_iserror( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISERROR expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + Ok(CellValue::Bool(error_code(&v).is_some())) +} + +pub(crate) async fn eval_iserr( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISERR expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let result = error_code(&v).is_some_and(|code| !is_na_error(code)); + Ok(CellValue::Bool(result)) +} + +pub(crate) async fn eval_isna( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISNA expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let result = error_code(&v).is_some_and(is_na_error); + Ok(CellValue::Bool(result)) +} + +pub(crate) async fn eval_isnumber( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISNUMBER expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let result = matches!( + v, + CellValue::Int(_) | CellValue::Float(_) | CellValue::DateTime(_) + ); + Ok(CellValue::Bool(result)) +} + +pub(crate) async fn eval_istext( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISTEXT expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + Ok(CellValue::Bool(matches!(v, CellValue::String(_)))) +} + +pub(crate) async fn eval_islogical( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISLOGICAL expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + Ok(CellValue::Bool(matches!(v, CellValue::Bool(_)))) +} + +pub(crate) async fn eval_isnontext( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISNONTEXT expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + Ok(CellValue::Bool(!matches!(v, CellValue::String(_)))) +} + +pub(crate) async fn eval_iseven( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISEVEN expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let num = match to_number(&v) { + Some(n) => n, + None => { + return Ok(CellValue::Error( + "ISEVEN expects a numeric argument".to_string(), + )); + }, + }; + let truncated = num.trunc(); + Ok(CellValue::Bool(is_even_number(truncated))) +} + +pub(crate) async fn eval_isodd( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISODD expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let num = match to_number(&v) { + Some(n) => n, + None => { + return Ok(CellValue::Error( + "ISODD expects a numeric argument".to_string(), + )); + }, + }; + let truncated = num.trunc(); + Ok(CellValue::Bool(!is_even_number(truncated))) +} + +pub(crate) async fn eval_isformula( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISFORMULA expects 1 argument".to_string())); + } + match classify_reference(ctx, current_sheet, &args[0])? { + ReferenceKind::Single { sheet, row, col } => { + let raw = ctx.raw_cell_value(sheet.as_str(), row, col).await?; + Ok(CellValue::Bool(matches!(raw, CellValue::Formula { .. }))) + }, + ReferenceKind::Range => Ok(CellValue::Error( + "ISFORMULA expects a single cell reference".to_string(), + )), + ReferenceKind::None => Ok(CellValue::Error( + "ISFORMULA expects a cell reference".to_string(), + )), + ReferenceKind::Error(err) => Ok(err), + } +} + +pub(crate) async fn eval_formulatext( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() > 1 { + return Ok(CellValue::Error( + "FORMULATEXT expects 0 or 1 argument".to_string(), + )); + } + + let target = if args.is_empty() { + if let Some((sheet, row, col)) = ctx.current_position() { + Some((sheet, row, col)) + } else { + None + } + } else { + match classify_reference(ctx, current_sheet, &args[0])? { + ReferenceKind::Single { sheet, row, col } => Some((sheet, row, col)), + ReferenceKind::Range => None, + ReferenceKind::None => None, + ReferenceKind::Error(err) => return Ok(err), + } + }; + + let (sheet, row, col) = match target { + Some(t) => t, + None => return Ok(CellValue::Error("#N/A".to_string())), + }; + + let raw = ctx.raw_cell_value(sheet.as_str(), row, col).await?; + match raw { + CellValue::Formula { formula, .. } => { + let mut result = String::from("="); + result.push_str(&formula); + Ok(CellValue::String(result)) + }, + _ => Ok(CellValue::Error("#N/A".to_string())), + } +} + +pub(crate) async fn eval_info( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("INFO expects 1 argument".to_string())); + } + let info_type = evaluate_expression(ctx, current_sheet, &args[0]).await?; + + let info_type = match info_type { + CellValue::String(s) => s.trim().to_ascii_lowercase(), + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + if info_type.is_empty() { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + + let result = match info_type.as_str() { + "recalc" => CellValue::String("Automatic".to_string()), + "system" => CellValue::String("pcdos".to_string()), + _ => CellValue::Error("#N/A".to_string()), + }; + + Ok(result) +} + +pub(crate) async fn eval_isref( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ISREF expects 1 argument".to_string())); + } + match classify_reference(ctx, current_sheet, &args[0])? { + ReferenceKind::Single { .. } | ReferenceKind::Range => Ok(CellValue::Bool(true)), + ReferenceKind::None => Ok(CellValue::Bool(false)), + ReferenceKind::Error(err) => Ok(err), + } +} + +pub(crate) async fn eval_na(_: EvalCtx<'_>, _: &str, args: &[Expr]) -> Result { + if !args.is_empty() { + return Ok(CellValue::Error("NA expects no arguments".to_string())); + } + Ok(CellValue::Error("#N/A".to_string())) +} + +pub(crate) async fn eval_iferror( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 2 { + return Ok(CellValue::Error("IFERROR expects 2 arguments".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if error_code(&value).is_some() { + evaluate_expression(ctx, current_sheet, &args[1]).await + } else { + Ok(value) + } +} + +pub(crate) async fn eval_ifna( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 2 { + return Ok(CellValue::Error("IFNA expects 2 arguments".to_string())); + } + let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if error_code(&value).is_some_and(is_na_error) { + evaluate_expression(ctx, current_sheet, &args[1]).await + } else { + Ok(value) + } +} + +pub(crate) async fn eval_n( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("N expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let result = match v { + CellValue::Int(_) | CellValue::Float(_) | CellValue::DateTime(_) => v, + CellValue::Bool(true) => CellValue::Int(1), + CellValue::Bool(false) => CellValue::Int(0), + CellValue::Error(_) => v, + CellValue::Empty => CellValue::Int(0), + CellValue::String(_) => CellValue::Int(0), + CellValue::Formula { .. } => CellValue::Int(0), + }; + Ok(result) +} + +pub(crate) async fn eval_t( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("T expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match v { + CellValue::String(_) => Ok(v), + CellValue::Error(_) => Ok(v), + _ => Ok(CellValue::String(String::new())), + } +} + +pub(crate) async fn eval_type( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("TYPE expects 1 argument".to_string())); + } + if matches!(args[0], Expr::Range(_)) { + return Ok(CellValue::Int(64)); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let code = match v { + CellValue::Int(_) | CellValue::Float(_) | CellValue::DateTime(_) => 1, + CellValue::String(_) => 2, + CellValue::Bool(_) => 4, + CellValue::Error(_) => return Ok(v), + CellValue::Empty => 1, + CellValue::Formula { .. } => 64, + }; + Ok(CellValue::Int(code)) +} + +pub(crate) async fn eval_value( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("VALUE expects 1 argument".to_string())); + } + let v = evaluate_expression(ctx, current_sheet, &args[0]).await?; + match v { + CellValue::Int(_) | CellValue::Float(_) | CellValue::DateTime(_) => Ok(v), + CellValue::Error(_) => Ok(v), + CellValue::String(s) => { + let trimmed = s.trim(); + if trimmed.is_empty() { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + // For now, simple float parsing + if let Ok(f) = trimmed.parse::() { + Ok(CellValue::Float(f)) + } else { + Ok(CellValue::Error("#VALUE!".to_string())) + } + }, + CellValue::Bool(_) | CellValue::Empty | CellValue::Formula { .. } => { + Ok(CellValue::Error("#VALUE!".to_string())) + }, + } +} + +pub(crate) async fn eval_sheet( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() > 1 { + return Ok(CellValue::Error( + "SHEET expects 0 or 1 argument".to_string(), + )); + } + + let target_sheet = if args.is_empty() { + current_sheet.to_string() + } else { + match classify_reference(ctx, current_sheet, &args[0])? { + ReferenceKind::Single { sheet, .. } => sheet, + ReferenceKind::Range => { + // For a range, SHEET returns the index of the first sheet in the range. + // In our current implementation, ranges are always on a single sheet. + match &args[0] { + Expr::Range(r) => r.sheet.clone(), + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + } + }, + ReferenceKind::None => return Ok(CellValue::Error("#VALUE!".to_string())), + ReferenceKind::Error(err) => return Ok(err), + } + }; + + // We need a way to get the sheet index from the context. + // Since EngineCtx doesn't have it, we might need to add it or use a workaround. + // For now, let's assume we can add it to EngineCtx. + Ok(CellValue::Int( + ctx.get_sheet_index(&target_sheet) + .map(|idx| (idx + 1) as i64) + .unwrap_or(0), + )) +} + +pub(crate) async fn eval_sheets( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() > 1 { + return Ok(CellValue::Error( + "SHEETS expects 0 or 1 argument".to_string(), + )); + } + + if args.is_empty() { + return Ok(CellValue::Int(ctx.get_sheet_count() as i64)); + } + + match classify_reference(ctx, current_sheet, &args[0])? { + ReferenceKind::Single { .. } | ReferenceKind::Range => { + // In our current implementation, ranges/refs are always on a single sheet. + // Excel supports multi-sheet references (3D references), but we don't yet. + Ok(CellValue::Int(1)) + }, + ReferenceKind::None => Ok(CellValue::Int(ctx.get_sheet_count() as i64)), + ReferenceKind::Error(err) => Ok(err), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::Expr; + + fn num_expr(n: f64) -> Expr { + if n == n.floor() { + Expr::Literal(CellValue::Int(n as i64)) + } else { + Expr::Literal(CellValue::Float(n)) + } + } + + fn str_expr(s: &str) -> Expr { + Expr::Literal(CellValue::String(s.to_string())) + } + + fn bool_expr(b: bool) -> Expr { + Expr::Literal(CellValue::Bool(b)) + } + + #[test] + fn test_error_code_with_error() { + let err = CellValue::Error("#VALUE!".to_string()); + assert_eq!(error_code(&err), Some("#VALUE!")); + } + + #[test] + fn test_error_code_without_error() { + let val = CellValue::Int(42); + assert_eq!(error_code(&val), None); + } + + #[test] + fn test_is_na_error_true() { + assert!(is_na_error("#N/A")); + assert!(is_na_error("#n/a")); + } + + #[test] + fn test_is_na_error_false() { + assert!(!is_na_error("#VALUE!")); + assert!(!is_na_error("#REF!")); + } + + #[test] + fn test_is_even_number() { + assert!(is_even_number(4.0)); + assert!(is_even_number(-2.0)); + assert!(is_even_number(0.0)); + assert!(!is_even_number(3.0)); + assert!(!is_even_number(-1.0)); + } + + #[tokio::test] + async fn test_eval_isblank_with_blank() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Empty)]; + let result = eval_isblank(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_isblank_with_value() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_isblank(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_iserror_with_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Error("#VALUE!".to_string()))]; + let result = eval_iserror(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_iserror_with_value() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_iserror(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_iserr_with_value_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Error("#VALUE!".to_string()))]; + let result = eval_iserr(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_iserr_with_na_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Error("#N/A".to_string()))]; + let result = eval_iserr(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false) for #N/A"), + } + } + + #[tokio::test] + async fn test_eval_isna_with_na() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Error("#N/A".to_string()))]; + let result = eval_isna(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_isna_with_other_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Error("#VALUE!".to_string()))]; + let result = eval_isna(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_isnumber_with_int() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_isnumber(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_isnumber_with_float() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(std::f64::consts::PI)]; + let result = eval_isnumber(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_isnumber_with_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_isnumber(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_istext_with_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_istext(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_istext_with_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_istext(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_islogical_with_true() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![bool_expr(true)]; + let result = eval_islogical(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_islogical_with_false() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![bool_expr(false)]; + let result = eval_islogical(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_islogical_with_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(1.0)]; + let result = eval_islogical(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_isnontext_with_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_isnontext(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_isnontext_with_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_isnontext(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_iseven_with_even() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(4.0)]; + let result = eval_iseven(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_iseven_with_odd() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(3.0)]; + let result = eval_iseven(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_iseven_with_non_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_iseven(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects a numeric")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_isodd_with_odd() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(3.0)]; + let result = eval_isodd(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(true) => {}, + _ => panic!("Expected Bool(true)"), + } + } + + #[tokio::test] + async fn test_eval_isodd_with_even() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(4.0)]; + let result = eval_isodd(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Bool(false) => {}, + _ => panic!("Expected Bool(false)"), + } + } + + #[tokio::test] + async fn test_eval_na() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_na(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert_eq!(e, "#N/A"), + _ => panic!("Expected Error(#N/A)"), + } + } + + #[tokio::test] + async fn test_eval_na_with_args() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(1.0)]; + let result = eval_na(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects no arguments")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_iferror_with_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![ + Expr::Literal(CellValue::Error("#VALUE!".to_string())), + num_expr(42.0), + ]; + let result = eval_iferror(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(42) => {}, + _ => panic!("Expected Int(42)"), + } + } + + #[tokio::test] + async fn test_eval_iferror_with_no_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(100.0), num_expr(42.0)]; + let result = eval_iferror(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(100.0) | CellValue::Int(100) => {}, + _ => panic!("Expected 100, got {:?}", result), + } + } + + #[tokio::test] + async fn test_eval_ifna_with_na() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![ + Expr::Literal(CellValue::Error("#N/A".to_string())), + str_expr("not available"), + ]; + let result = eval_ifna(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::String(s) => assert_eq!(s, "not available"), + _ => panic!("Expected String"), + } + } + + #[tokio::test] + async fn test_eval_ifna_with_other_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![ + Expr::Literal(CellValue::Error("#VALUE!".to_string())), + str_expr("not available"), + ]; + let result = eval_ifna(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert_eq!(e, "#VALUE!"), + _ => panic!("Expected Error(#VALUE!)"), + } + } + + #[tokio::test] + async fn test_eval_n_with_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_n(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(42.0) | CellValue::Int(42) => {}, + _ => panic!("Expected number, got {:?}", result), + } + } + + #[tokio::test] + async fn test_eval_n_with_true() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![bool_expr(true)]; + let result = eval_n(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(1) => {}, + _ => panic!("Expected Int(1)"), + } + } + + #[tokio::test] + async fn test_eval_n_with_false() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![bool_expr(false)]; + let result = eval_n(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(0) => {}, + _ => panic!("Expected Int(0)"), + } + } + + #[tokio::test] + async fn test_eval_n_with_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_n(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(0) => {}, + _ => panic!("Expected Int(0)"), + } + } + + #[tokio::test] + async fn test_eval_t_with_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_t(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::String(s) => assert_eq!(s, "hello"), + _ => panic!("Expected String"), + } + } + + #[tokio::test] + async fn test_eval_t_with_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_t(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::String(s) => assert!(s.is_empty()), + _ => panic!("Expected empty String"), + } + } + + #[tokio::test] + async fn test_eval_type_with_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_type(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(1) => {}, + _ => panic!("Expected Int(1)"), + } + } + + #[tokio::test] + async fn test_eval_type_with_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_type(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(2) => {}, + _ => panic!("Expected Int(2)"), + } + } + + #[tokio::test] + async fn test_eval_type_with_bool() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![bool_expr(true)]; + let result = eval_type(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(4) => {}, + _ => panic!("Expected Int(4)"), + } + } + + #[tokio::test] + async fn test_eval_type_with_error() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Error("#VALUE!".to_string()))]; + let result = eval_type(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert_eq!(e, "#VALUE!"), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_value_with_number_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("123.45")]; + let result = eval_value(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 123.45).abs() < 0.01), + _ => panic!("Expected Float"), + } + } + + #[tokio::test] + async fn test_eval_value_with_non_number() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello")]; + let result = eval_value(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("#VALUE!")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_value_with_empty() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("")]; + let result = eval_value(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("#VALUE!")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_info_recalc() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("recalc")]; + let result = eval_info(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::String(s) => assert_eq!(s, "Automatic"), + _ => panic!("Expected String"), + } + } + + #[tokio::test] + async fn test_eval_info_system() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("system")]; + let result = eval_info(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::String(s) => assert_eq!(s, "pcdos"), + _ => panic!("Expected String"), + } + } + + #[tokio::test] + async fn test_eval_info_invalid() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("invalid")]; + let result = eval_info(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("#N/A")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_info_non_string() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_info(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("#VALUE!")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_sheets_no_args() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_sheets(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(n) => assert!(n >= 0), + _ => panic!("Expected Int"), + } + } + + #[tokio::test] + async fn test_eval_sheet_no_args() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_sheet(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(n) => assert!(n >= 0), + _ => panic!("Expected Int"), + } + } +} diff --git a/src/sheet/eval/engine/logical.rs b/crates/litchi-eval/src/engine/logical.rs similarity index 85% rename from src/sheet/eval/engine/logical.rs rename to crates/litchi-eval/src/engine/logical.rs index 04ee16d..ea4921e 100644 --- a/src/sheet/eval/engine/logical.rs +++ b/crates/litchi-eval/src/engine/logical.rs @@ -1,4 +1,4 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; use super::super::parser::Expr; use super::{EvalCtx, evaluate_expression, for_each_value_in_expr, to_bool}; @@ -30,7 +30,7 @@ pub(crate) async fn eval_ifs( current_sheet: &str, args: &[Expr], ) -> Result { - if args.len() < 2 || !args.len().is_multiple_of(2) { + if args.len() < 2 || args.len() % 2 != 0 { return Ok(CellValue::Error( "IFS expects an even number of arguments (condition/result pairs)".to_string(), )); @@ -170,7 +170,7 @@ pub(crate) async fn eval_switch( } // Default value if exists (odd number of arguments total) - if args.len().is_multiple_of(2) { + if args.len() % 2 == 0 { evaluate_expression(ctx, current_sheet, &args[args.len() - 1]).await } else { Ok(CellValue::Error("#N/A".to_string())) @@ -180,7 +180,7 @@ pub(crate) async fn eval_switch( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn bool_expr(b: bool) -> Expr { Expr::Literal(CellValue::Bool(b)) @@ -200,7 +200,7 @@ mod tests { #[tokio::test] async fn test_eval_if_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true), num_expr(1.0), num_expr(0.0)]; let result = eval_if(ctx, "Sheet1", &args).await.unwrap(); @@ -212,7 +212,7 @@ mod tests { #[tokio::test] async fn test_eval_if_false() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(false), num_expr(1.0), num_expr(0.0)]; let result = eval_if(ctx, "Sheet1", &args).await.unwrap(); @@ -224,7 +224,7 @@ mod tests { #[tokio::test] async fn test_eval_if_no_else() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(false), num_expr(1.0)]; let result = eval_if(ctx, "Sheet1", &args).await.unwrap(); @@ -236,7 +236,7 @@ mod tests { #[tokio::test] async fn test_eval_if_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true)]; let result = eval_if(ctx, "Sheet1", &args).await.unwrap(); @@ -248,7 +248,7 @@ mod tests { #[tokio::test] async fn test_eval_ifs_first_match() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ bool_expr(true), @@ -265,7 +265,7 @@ mod tests { #[tokio::test] async fn test_eval_ifs_second_match() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ bool_expr(false), @@ -282,7 +282,7 @@ mod tests { #[tokio::test] async fn test_eval_ifs_no_match() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ bool_expr(false), @@ -299,7 +299,7 @@ mod tests { #[tokio::test] async fn test_eval_ifs_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true)]; let result = eval_ifs(ctx, "Sheet1", &args).await.unwrap(); @@ -311,7 +311,7 @@ mod tests { #[tokio::test] async fn test_eval_and_empty() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_and(ctx, "Sheet1", &args).await.unwrap(); @@ -323,7 +323,7 @@ mod tests { #[tokio::test] async fn test_eval_and_all_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true), bool_expr(true), bool_expr(true)]; let result = eval_and(ctx, "Sheet1", &args).await.unwrap(); @@ -335,7 +335,7 @@ mod tests { #[tokio::test] async fn test_eval_and_one_false() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true), bool_expr(false), bool_expr(true)]; let result = eval_and(ctx, "Sheet1", &args).await.unwrap(); @@ -347,7 +347,7 @@ mod tests { #[tokio::test] async fn test_eval_and_numeric_truthy() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0)]; let result = eval_and(ctx, "Sheet1", &args).await.unwrap(); @@ -359,7 +359,7 @@ mod tests { #[tokio::test] async fn test_eval_and_numeric_falsy() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(0.0)]; let result = eval_and(ctx, "Sheet1", &args).await.unwrap(); @@ -371,7 +371,7 @@ mod tests { #[tokio::test] async fn test_eval_or_empty() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_or(ctx, "Sheet1", &args).await.unwrap(); @@ -383,7 +383,7 @@ mod tests { #[tokio::test] async fn test_eval_or_one_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(false), bool_expr(true), bool_expr(false)]; let result = eval_or(ctx, "Sheet1", &args).await.unwrap(); @@ -395,7 +395,7 @@ mod tests { #[tokio::test] async fn test_eval_or_all_false() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(false), bool_expr(false), bool_expr(false)]; let result = eval_or(ctx, "Sheet1", &args).await.unwrap(); @@ -407,7 +407,7 @@ mod tests { #[tokio::test] async fn test_eval_or_numeric() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0), num_expr(1.0)]; let result = eval_or(ctx, "Sheet1", &args).await.unwrap(); @@ -419,7 +419,7 @@ mod tests { #[tokio::test] async fn test_eval_xor_empty() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_xor(ctx, "Sheet1", &args).await.unwrap(); @@ -431,7 +431,7 @@ mod tests { #[tokio::test] async fn test_eval_xor_one_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true)]; let result = eval_xor(ctx, "Sheet1", &args).await.unwrap(); @@ -443,7 +443,7 @@ mod tests { #[tokio::test] async fn test_eval_xor_two_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true), bool_expr(true)]; let result = eval_xor(ctx, "Sheet1", &args).await.unwrap(); @@ -455,7 +455,7 @@ mod tests { #[tokio::test] async fn test_eval_xor_three_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true), bool_expr(true), bool_expr(true)]; let result = eval_xor(ctx, "Sheet1", &args).await.unwrap(); @@ -467,7 +467,7 @@ mod tests { #[tokio::test] async fn test_eval_xor_mixed() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true), bool_expr(false), bool_expr(true)]; let result = eval_xor(ctx, "Sheet1", &args).await.unwrap(); @@ -479,7 +479,7 @@ mod tests { #[tokio::test] async fn test_eval_not_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(true)]; let result = eval_not(ctx, "Sheet1", &args).await.unwrap(); @@ -491,7 +491,7 @@ mod tests { #[tokio::test] async fn test_eval_not_false() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![bool_expr(false)]; let result = eval_not(ctx, "Sheet1", &args).await.unwrap(); @@ -503,7 +503,7 @@ mod tests { #[tokio::test] async fn test_eval_not_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_not(ctx, "Sheet1", &args).await.unwrap(); @@ -515,7 +515,7 @@ mod tests { #[tokio::test] async fn test_eval_not_nonzero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0)]; let result = eval_not(ctx, "Sheet1", &args).await.unwrap(); @@ -527,7 +527,7 @@ mod tests { #[tokio::test] async fn test_eval_not_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_not(ctx, "Sheet1", &args).await.unwrap(); @@ -539,7 +539,7 @@ mod tests { #[tokio::test] async fn test_eval_true() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_true(ctx, "Sheet1", &args).await.unwrap(); @@ -551,7 +551,7 @@ mod tests { #[tokio::test] async fn test_eval_true_with_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_true(ctx, "Sheet1", &args).await.unwrap(); @@ -563,7 +563,7 @@ mod tests { #[tokio::test] async fn test_eval_false() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_false(ctx, "Sheet1", &args).await.unwrap(); @@ -575,7 +575,7 @@ mod tests { #[tokio::test] async fn test_eval_false_with_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_false(ctx, "Sheet1", &args).await.unwrap(); @@ -587,7 +587,7 @@ mod tests { #[tokio::test] async fn test_eval_switch_match_first() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(1.0), @@ -605,7 +605,7 @@ mod tests { #[tokio::test] async fn test_eval_switch_match_second() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(2.0), @@ -623,7 +623,7 @@ mod tests { #[tokio::test] async fn test_eval_switch_no_match() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(3.0), @@ -641,7 +641,7 @@ mod tests { #[tokio::test] async fn test_eval_switch_default() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(3.0), @@ -660,7 +660,7 @@ mod tests { #[tokio::test] async fn test_eval_switch_too_few_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(1.0)]; let result = eval_switch(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/lookup/choose.rs b/crates/litchi-eval/src/engine/lookup/choose.rs similarity index 85% rename from src/sheet/eval/engine/lookup/choose.rs rename to crates/litchi-eval/src/engine/lookup/choose.rs index e3ec969..ea67c2d 100644 --- a/src/sheet/eval/engine/lookup/choose.rs +++ b/crates/litchi-eval/src/engine/lookup/choose.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, evaluate_expression}; @@ -38,7 +38,7 @@ pub(crate) async fn eval_choose( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -54,7 +54,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_basic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // CHOOSE(2, "a", "b", "c") should return "b" let args = vec![num_expr(2.0), str_expr("a"), str_expr("b"), str_expr("c")]; @@ -67,7 +67,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_first() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(10.0), num_expr(20.0)]; let result = eval_choose(ctx, "Sheet1", &args).await.unwrap(); @@ -79,7 +79,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_last() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.0), str_expr("x"), str_expr("y"), str_expr("z")]; let result = eval_choose(ctx, "Sheet1", &args).await.unwrap(); @@ -91,7 +91,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_choose(ctx, "Sheet1", &args).await.unwrap(); @@ -103,7 +103,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_index_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0), str_expr("a"), str_expr("b")]; let result = eval_choose(ctx, "Sheet1", &args).await.unwrap(); @@ -115,7 +115,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_index_out_of_range() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0), str_expr("a"), str_expr("b")]; let result = eval_choose(ctx, "Sheet1", &args).await.unwrap(); @@ -127,7 +127,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_non_numeric_index() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("invalid"), str_expr("a"), str_expr("b")]; let result = eval_choose(ctx, "Sheet1", &args).await.unwrap(); @@ -139,7 +139,7 @@ mod tests { #[tokio::test] async fn test_eval_choose_decimal_index() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Decimal index should be truncated let args = vec![num_expr(2.9), str_expr("a"), str_expr("b"), str_expr("c")]; diff --git a/crates/litchi-eval/src/engine/lookup/helpers.rs b/crates/litchi-eval/src/engine/lookup/helpers.rs new file mode 100644 index 0000000..4e389aa --- /dev/null +++ b/crates/litchi-eval/src/engine/lookup/helpers.rs @@ -0,0 +1,60 @@ +use std::cmp::min; + +use crate::parser::{Expr, RangeRef}; +use litchi_core::sheet::{CellValue, Result}; + +use super::super::{EvalCtx, FlatRange, ResolvedName, to_number, to_text}; + +pub(super) enum ReferenceLookup { + Point((u32, u32)), + NameError(String), + NotReference, +} + +pub(super) async fn first_cell_from_expr( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result { + let lookup = match expr { + Expr::Reference { row, col, .. } => ReferenceLookup::Point((*row, *col)), + Expr::Range(range) => ReferenceLookup::Point(range_first_cell(range)), + Expr::Name(name) => match ctx.resolve_name(current_sheet, name.as_str())? { + Some(ResolvedName::Cell { row, col, .. }) => ReferenceLookup::Point((row, col)), + Some(ResolvedName::Range(range)) => ReferenceLookup::Point(range_first_cell(&range)), + None => ReferenceLookup::NameError(format!("Unknown name: {}", name)), + }, + _ => ReferenceLookup::NotReference, + }; + + Ok(lookup) +} + +fn range_first_cell(range: &RangeRef) -> (u32, u32) { + let row = min(range.start_row, range.end_row); + let col = min(range.start_col, range.end_col); + (row, col) +} + +pub(super) fn is_1d(range: &FlatRange) -> bool { + range.rows == 1 || range.cols == 1 +} + +pub(super) fn find_exact_match_index( + lookup_val: &CellValue, + values: &[CellValue], +) -> Option { + for (idx, v) in values.iter().enumerate() { + if values_equal(lookup_val, v) { + return Some(idx); + } + } + None +} + +pub(super) fn values_equal(a: &CellValue, b: &CellValue) -> bool { + match (to_number(a), to_number(b)) { + (Some(x), Some(y)) => x == y, + _ => to_text(a) == to_text(b), + } +} diff --git a/src/sheet/eval/engine/lookup/index.rs b/crates/litchi-eval/src/engine/lookup/index.rs similarity index 96% rename from src/sheet/eval/engine/lookup/index.rs rename to crates/litchi-eval/src/engine/lookup/index.rs index fa3fb95..de93b04 100644 --- a/src/sheet/eval/engine/lookup/index.rs +++ b/crates/litchi-eval/src/engine/lookup/index.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, flatten_range_expr, to_number}; @@ -56,9 +56,9 @@ pub(crate) async fn eval_index( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; - use crate::sheet::eval::parser::ast::RangeRef; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/src/sheet/eval/engine/lookup/matchers.rs b/crates/litchi-eval/src/engine/lookup/matchers.rs similarity index 96% rename from src/sheet/eval/engine/lookup/matchers.rs rename to crates/litchi-eval/src/engine/lookup/matchers.rs index 2a3ead9..4054339 100644 --- a/src/sheet/eval/engine/lookup/matchers.rs +++ b/crates/litchi-eval/src/engine/lookup/matchers.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, evaluate_expression, flatten_range_expr, to_number}; use super::helpers::{find_exact_match_index, is_1d}; @@ -100,9 +100,9 @@ pub(crate) async fn eval_xmatch( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; - use crate::sheet::eval::parser::ast::RangeRef; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/src/sheet/eval/engine/lookup/mod.rs b/crates/litchi-eval/src/engine/lookup/mod.rs similarity index 100% rename from src/sheet/eval/engine/lookup/mod.rs rename to crates/litchi-eval/src/engine/lookup/mod.rs diff --git a/src/sheet/eval/engine/lookup/position.rs b/crates/litchi-eval/src/engine/lookup/position.rs similarity index 96% rename from src/sheet/eval/engine/lookup/position.rs rename to crates/litchi-eval/src/engine/lookup/position.rs index c6b7f0a..2009598 100644 --- a/src/sheet/eval/engine/lookup/position.rs +++ b/crates/litchi-eval/src/engine/lookup/position.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, flatten_range_expr}; use super::helpers::{ReferenceLookup, first_cell_from_expr}; @@ -111,9 +111,9 @@ pub(crate) async fn eval_columns( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; - use crate::sheet::eval::parser::ast::RangeRef; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/crates/litchi-eval/src/engine/lookup/table.rs b/crates/litchi-eval/src/engine/lookup/table.rs new file mode 100644 index 0000000..fc1599d --- /dev/null +++ b/crates/litchi-eval/src/engine/lookup/table.rs @@ -0,0 +1,352 @@ +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +use super::super::{EvalCtx, evaluate_expression, flatten_range_expr, to_bool, to_number}; +use super::helpers::values_equal; + +pub(crate) async fn eval_vlookup( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() < 3 || args.len() > 4 { + return Ok(CellValue::Error( + "VLOOKUP expects 3 or 4 arguments (lookup_value, table_array, col_index_num, [range_lookup])" + .to_string(), + )); + } + + let lookup_val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let table = flatten_range_expr(ctx, current_sheet, &args[1]).await?; + + let col_index_val = evaluate_expression(ctx, current_sheet, &args[2]).await?; + let col_index = match to_number(&col_index_val) { + Some(n) if n >= 1.0 => n as i64, + _ => { + return Ok(CellValue::Error( + "VLOOKUP col_index_num must be a positive number".to_string(), + )); + }, + }; + + let exact_match_only = if args.len() == 4 { + let rl_val = evaluate_expression(ctx, current_sheet, &args[3]).await?; + !to_bool(&rl_val) + } else { + true + }; + + if !exact_match_only { + return Ok(CellValue::Error( + "VLOOKUP currently only supports exact match (range_lookup = FALSE)".to_string(), + )); + } + + let rows = table.rows as i64; + let cols = table.cols as i64; + + if col_index < 1 || col_index > cols { + return Ok(CellValue::Error( + "VLOOKUP col_index_num out of bounds for table_array".to_string(), + )); + } + + for r in 0..rows { + let base = (r * cols) as usize; + let key = &table.values[base]; + if values_equal(&lookup_val, key) { + let idx = base + (col_index - 1) as usize; + return Ok(table.values[idx].clone()); + } + } + + Ok(CellValue::Error("VLOOKUP: value not found".to_string())) +} + +pub(crate) async fn eval_hlookup( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() < 3 || args.len() > 4 { + return Ok(CellValue::Error( + "HLOOKUP expects 3 or 4 arguments (lookup_value, table_array, row_index_num, [range_lookup])" + .to_string(), + )); + } + + let lookup_val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let table = flatten_range_expr(ctx, current_sheet, &args[1]).await?; + + let row_index_val = evaluate_expression(ctx, current_sheet, &args[2]).await?; + let row_index = match to_number(&row_index_val) { + Some(n) if n >= 1.0 => n as i64, + _ => { + return Ok(CellValue::Error( + "HLOOKUP row_index_num must be a positive number".to_string(), + )); + }, + }; + + let exact_match_only = if args.len() == 4 { + let rl_val = evaluate_expression(ctx, current_sheet, &args[3]).await?; + !to_bool(&rl_val) + } else { + true + }; + + if !exact_match_only { + return Ok(CellValue::Error( + "HLOOKUP currently only supports exact match (range_lookup = FALSE)".to_string(), + )); + } + + let rows = table.rows as i64; + let cols = table.cols as i64; + + if row_index < 1 || row_index > rows { + return Ok(CellValue::Error( + "HLOOKUP row_index_num out of bounds for table_array".to_string(), + )); + } + + for c in 0..cols { + let key = &table.values[c as usize]; + if values_equal(&lookup_val, key) { + let idx = ((row_index - 1) * cols + c) as usize; + return Ok(table.values[idx].clone()); + } + } + + Ok(CellValue::Error("HLOOKUP: value not found".to_string())) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; + + fn num_expr(n: f64) -> Expr { + if n == n.floor() { + Expr::Literal(CellValue::Int(n as i64)) + } else { + Expr::Literal(CellValue::Float(n)) + } + } + + fn str_expr(s: &str) -> Expr { + Expr::Literal(CellValue::String(s.to_string())) + } + + fn range_expr(sheet: &str, start_row: u32, start_col: u32, end_row: u32, end_col: u32) -> Expr { + Expr::Range(RangeRef { + sheet: sheet.to_string(), + start_row, + start_col, + end_row, + end_col, + }) + } + + #[tokio::test] + async fn test_vlookup_basic() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + // Create a table: | ID | Name | Value | + // | 1 | Alice | 100 | + // | 2 | Bob | 200 | + // | 3 | Carol | 300 | + let values = vec![ + CellValue::Int(1), + CellValue::String("Alice".to_string()), + CellValue::Int(100), + CellValue::Int(2), + CellValue::String("Bob".to_string()), + CellValue::Int(200), + CellValue::Int(3), + CellValue::String("Carol".to_string()), + CellValue::Int(300), + ]; + engine.add_range("Sheet1", 1, 1, 3, 3, values); + + // VLOOKUP(2, Sheet1!A1:C3, 2) should return "Bob" + let args = vec![ + num_expr(2.0), + range_expr("Sheet1", 1, 1, 3, 3), + num_expr(2.0), + ]; + let result = eval_vlookup(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("Bob".to_string())); + } + + #[tokio::test] + async fn test_vlookup_third_column() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + let values = vec![ + CellValue::Int(1), + CellValue::String("Alice".to_string()), + CellValue::Int(100), + CellValue::Int(2), + CellValue::String("Bob".to_string()), + CellValue::Int(200), + ]; + engine.add_range("Sheet1", 1, 1, 2, 3, values); + + // VLOOKUP(2, Sheet1!A1:C2, 3) should return 200 + let args = vec![ + num_expr(2.0), + range_expr("Sheet1", 1, 1, 2, 3), + num_expr(3.0), + ]; + let result = eval_vlookup(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(200)); + } + + #[tokio::test] + async fn test_vlookup_not_found() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + let values = vec![ + CellValue::Int(1), + CellValue::String("Alice".to_string()), + CellValue::Int(2), + CellValue::String("Bob".to_string()), + ]; + engine.add_range("Sheet1", 1, 1, 2, 2, values); + + let args = vec![ + num_expr(999.0), + range_expr("Sheet1", 1, 1, 2, 2), + num_expr(2.0), + ]; + let result = eval_vlookup(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("not found")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_vlookup_col_index_out_of_bounds() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + let values = vec![CellValue::Int(1), CellValue::String("Alice".to_string())]; + engine.add_range("Sheet1", 1, 1, 1, 2, values); + + let args = vec![ + num_expr(1.0), + range_expr("Sheet1", 1, 1, 1, 2), + num_expr(5.0), + ]; + let result = eval_vlookup(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("out of bounds")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_vlookup_string_lookup() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + let values = vec![ + CellValue::String("apple".to_string()), + CellValue::Int(100), + CellValue::String("banana".to_string()), + CellValue::Int(200), + ]; + engine.add_range("Sheet1", 1, 1, 2, 2, values); + + // VLOOKUP("banana", Sheet1!A1:B2, 2) should return 200 + let args = vec![ + str_expr("banana"), + range_expr("Sheet1", 1, 1, 2, 2), + num_expr(2.0), + ]; + let result = eval_vlookup(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(200)); + } + + #[tokio::test] + async fn test_hlookup_basic() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + // Create a horizontal table: + // | Name | Alice | Bob | Carol | + // | Value | 100 | 200 | 300 | + let values = vec![ + CellValue::String("Name".to_string()), + CellValue::String("Alice".to_string()), + CellValue::String("Bob".to_string()), + CellValue::String("Carol".to_string()), + CellValue::String("Value".to_string()), + CellValue::Int(100), + CellValue::Int(200), + CellValue::Int(300), + ]; + engine.add_range("Sheet1", 1, 1, 2, 4, values); + + // HLOOKUP("Bob", Sheet1!A1:D2, 2) should return 200 + let args = vec![ + str_expr("Bob"), + range_expr("Sheet1", 1, 1, 2, 4), + num_expr(2.0), + ]; + let result = eval_hlookup(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(200)); + } + + #[tokio::test] + async fn test_hlookup_not_found() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + let values = vec![ + CellValue::String("A".to_string()), + CellValue::String("B".to_string()), + CellValue::Int(1), + CellValue::Int(2), + ]; + engine.add_range("Sheet1", 1, 1, 2, 2, values); + + let args = vec![ + str_expr("Z"), + range_expr("Sheet1", 1, 1, 2, 2), + num_expr(2.0), + ]; + let result = eval_hlookup(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("not found")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_hlookup_row_index_out_of_bounds() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + + let values = vec![CellValue::String("A".to_string()), CellValue::Int(1)]; + engine.add_range("Sheet1", 1, 1, 1, 2, values); + + let args = vec![ + str_expr("A"), + range_expr("Sheet1", 1, 1, 1, 2), + num_expr(5.0), + ]; + let result = eval_hlookup(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("out of bounds")), + _ => panic!("Expected Error"), + } + } +} diff --git a/src/sheet/eval/engine/lookup/xlookup.rs b/crates/litchi-eval/src/engine/lookup/xlookup.rs similarity index 96% rename from src/sheet/eval/engine/lookup/xlookup.rs rename to crates/litchi-eval/src/engine/lookup/xlookup.rs index b4cb0c4..02784a9 100644 --- a/src/sheet/eval/engine/lookup/xlookup.rs +++ b/crates/litchi-eval/src/engine/lookup/xlookup.rs @@ -1,5 +1,5 @@ -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::super::{EvalCtx, evaluate_expression, flatten_range_expr, to_number}; use super::helpers::{find_exact_match_index, is_1d}; @@ -68,9 +68,9 @@ pub(crate) async fn eval_xlookup( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; - use crate::sheet::eval::parser::ast::RangeRef; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/src/sheet/eval/engine/math/arithmetic.rs b/crates/litchi-eval/src/engine/math/arithmetic.rs similarity index 90% rename from src/sheet/eval/engine/math/arithmetic.rs rename to crates/litchi-eval/src/engine/math/arithmetic.rs index 3b927a4..db93f6a 100644 --- a/src/sheet/eval/engine/math/arithmetic.rs +++ b/crates/litchi-eval/src/engine/math/arithmetic.rs @@ -1,7 +1,7 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; pub(crate) async fn eval_int( ctx: EvalCtx<'_>, @@ -252,7 +252,7 @@ mod tests { #[tokio::test] async fn test_eval_abs_int() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-42.0)]; let result = eval_abs(ctx, "Sheet1", &args).await.unwrap(); @@ -264,7 +264,7 @@ mod tests { #[tokio::test] async fn test_eval_abs_float() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-std::f64::consts::PI)]; let result = eval_abs(ctx, "Sheet1", &args).await.unwrap(); @@ -276,7 +276,7 @@ mod tests { #[tokio::test] async fn test_eval_power() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2.0), num_expr(3.0)]; let result = eval_power(ctx, "Sheet1", &args).await.unwrap(); @@ -288,7 +288,7 @@ mod tests { #[tokio::test] async fn test_eval_sqrt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(16.0)]; let result = eval_sqrt(ctx, "Sheet1", &args).await.unwrap(); @@ -300,7 +300,7 @@ mod tests { #[tokio::test] async fn test_eval_ln() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::E)]; let result = eval_ln(ctx, "Sheet1", &args).await.unwrap(); @@ -312,7 +312,7 @@ mod tests { #[tokio::test] async fn test_eval_log10() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1000.0)]; let result = eval_log10(ctx, "Sheet1", &args).await.unwrap(); @@ -324,7 +324,7 @@ mod tests { #[tokio::test] async fn test_eval_exp() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_exp(ctx, "Sheet1", &args).await.unwrap(); @@ -336,7 +336,7 @@ mod tests { #[tokio::test] async fn test_eval_int() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.7)]; let result = eval_int(ctx, "Sheet1", &args).await.unwrap(); @@ -348,7 +348,7 @@ mod tests { #[tokio::test] async fn test_eval_int_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.7)]; let result = eval_int(ctx, "Sheet1", &args).await.unwrap(); @@ -360,7 +360,7 @@ mod tests { #[tokio::test] async fn test_eval_delta_equal() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0)]; let result = eval_delta(ctx, "Sheet1", &args).await.unwrap(); @@ -372,7 +372,7 @@ mod tests { #[tokio::test] async fn test_eval_delta_with_comparison() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0), num_expr(5.0)]; let result = eval_delta(ctx, "Sheet1", &args).await.unwrap(); @@ -384,7 +384,7 @@ mod tests { #[tokio::test] async fn test_eval_gestep() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0), num_expr(3.0)]; let result = eval_gestep(ctx, "Sheet1", &args).await.unwrap(); @@ -396,7 +396,7 @@ mod tests { #[tokio::test] async fn test_eval_gestep_default() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0)]; let result = eval_gestep(ctx, "Sheet1", &args).await.unwrap(); @@ -408,7 +408,7 @@ mod tests { #[tokio::test] async fn test_eval_sqrtpi() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2.0)]; let result = eval_sqrtpi(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/math/bitwise.rs b/crates/litchi-eval/src/engine/math/bitwise.rs similarity index 91% rename from src/sheet/eval/engine/math/bitwise.rs rename to crates/litchi-eval/src/engine/math/bitwise.rs index c8bfcca..f866f5b 100644 --- a/src/sheet/eval/engine/math/bitwise.rs +++ b/crates/litchi-eval/src/engine/math/bitwise.rs @@ -1,8 +1,8 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{MAX_BITWISE_VALUE, bit_operand_value, bit_shift_value}; -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression}; -use crate::sheet::eval::parser::Expr; +use crate::engine::{EvalCtx, evaluate_expression}; +use crate::parser::Expr; pub(crate) async fn eval_bitand( ctx: EvalCtx<'_>, @@ -159,7 +159,7 @@ pub(crate) async fn eval_bitrshift( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -171,7 +171,7 @@ mod tests { #[tokio::test] async fn test_eval_bitand() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // 6 = 110, 3 = 011, result = 010 = 2 let args = vec![num_expr(6.0), num_expr(3.0)]; @@ -181,7 +181,7 @@ mod tests { #[tokio::test] async fn test_eval_bitor() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // 6 = 110, 3 = 011, result = 111 = 7 let args = vec![num_expr(6.0), num_expr(3.0)]; @@ -191,7 +191,7 @@ mod tests { #[tokio::test] async fn test_eval_bitxor() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // 6 = 110, 3 = 011, result = 101 = 5 let args = vec![num_expr(6.0), num_expr(3.0)]; @@ -201,7 +201,7 @@ mod tests { #[tokio::test] async fn test_eval_bitlshift() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // 5 << 2 = 20 let args = vec![num_expr(5.0), num_expr(2.0)]; @@ -211,7 +211,7 @@ mod tests { #[tokio::test] async fn test_eval_bitrshift() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // 20 >> 2 = 5 let args = vec![num_expr(20.0), num_expr(2.0)]; @@ -221,7 +221,7 @@ mod tests { #[tokio::test] async fn test_eval_bitand_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(6.0)]; let result = eval_bitand(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/math/combinatorics.rs b/crates/litchi-eval/src/engine/math/combinatorics.rs similarity index 93% rename from src/sheet/eval/engine/math/combinatorics.rs rename to crates/litchi-eval/src/engine/math/combinatorics.rs index 758779a..f00958a 100644 --- a/src/sheet/eval/engine/math/combinatorics.rs +++ b/crates/litchi-eval/src/engine/math/combinatorics.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, for_each_value_in_expr, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, for_each_value_in_expr, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{ combination, double_factorial, factorial, number_result, permutation, to_int_if_whole, @@ -429,7 +429,7 @@ fn lcm_u128(a: u128, b: u128) -> u128 { #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -441,7 +441,7 @@ mod tests { #[tokio::test] async fn test_eval_fact() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(5.0)]; let result = eval_fact(ctx, "Sheet1", &args).await.unwrap(); @@ -450,7 +450,7 @@ mod tests { #[tokio::test] async fn test_eval_fact_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_fact(ctx, "Sheet1", &args).await.unwrap(); @@ -459,7 +459,7 @@ mod tests { #[tokio::test] async fn test_eval_fact_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-5.0)]; let result = eval_fact(ctx, "Sheet1", &args).await.unwrap(); @@ -471,7 +471,7 @@ mod tests { #[tokio::test] async fn test_eval_combin() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // C(5, 2) = 10 let args = vec![num_expr(5.0), num_expr(2.0)]; @@ -481,7 +481,7 @@ mod tests { #[tokio::test] async fn test_eval_combin_k_greater_n() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.0), num_expr(5.0)]; let result = eval_combin(ctx, "Sheet1", &args).await.unwrap(); @@ -493,7 +493,7 @@ mod tests { #[tokio::test] async fn test_eval_permut() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // P(5, 2) = 20 let args = vec![num_expr(5.0), num_expr(2.0)]; @@ -503,7 +503,7 @@ mod tests { #[tokio::test] async fn test_eval_gcd() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // GCD(48, 18) = 6 let args = vec![num_expr(48.0), num_expr(18.0)]; @@ -513,7 +513,7 @@ mod tests { #[tokio::test] async fn test_eval_gcd_single_arg() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(42.0)]; let result = eval_gcd(ctx, "Sheet1", &args).await.unwrap(); @@ -522,7 +522,7 @@ mod tests { #[tokio::test] async fn test_eval_lcm() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // LCM(4, 6) = 12 let args = vec![num_expr(4.0), num_expr(6.0)]; @@ -532,7 +532,7 @@ mod tests { #[tokio::test] async fn test_eval_factdouble() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // 6!! = 6 * 4 * 2 = 48 let args = vec![num_expr(6.0)]; @@ -542,7 +542,7 @@ mod tests { #[tokio::test] async fn test_eval_multinomial() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // MULTINOMIAL(2, 3, 4) = (2+3+4)! / (2! * 3! * 4!) = 9! / (2! * 3! * 4!) = 1260 let args = vec![num_expr(2.0), num_expr(3.0), num_expr(4.0)]; diff --git a/crates/litchi-eval/src/engine/math/conversions.rs b/crates/litchi-eval/src/engine/math/conversions.rs new file mode 100644 index 0000000..2846e5f --- /dev/null +++ b/crates/litchi-eval/src/engine/math/conversions.rs @@ -0,0 +1,816 @@ +use litchi_core::sheet::{CellValue, Result}; + +use super::helpers::{ + BIN_MAX, BIN_MIN, HEX_MAX, HEX_MIN, OCT_MAX, OCT_MIN, binary_string_from_value, + ensure_number_in_range, is_negative_binary, negative_binary_to_hex, negative_binary_to_oct, + pad_with_places, parse_decimal_for_conversion, parse_hex_string, parse_octal_string, + parse_places_argument, parse_signed_binary, signed_hex_to_decimal, signed_octal_to_decimal, + to_int_if_whole, twos_complement_value, +}; +use crate::engine::{EvalCtx, evaluate_expression, to_number, to_text}; +use crate::parser::Expr; + +pub(crate) async fn eval_bin2dec( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("BIN2DEC expects 1 argument".to_string())); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let bits = match binary_string_from_value(&number_value, "BIN2DEC") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let value = match parse_signed_binary(&bits, "BIN2DEC") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + Ok(CellValue::Int(value)) +} + +pub(crate) async fn eval_bin2hex( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "BIN2HEX expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "BIN2HEX") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + let bits = match binary_string_from_value(&number_value, "BIN2HEX") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let result = if is_negative_binary(&bits) { + negative_binary_to_hex(&bits) + } else { + let value = u32::from_str_radix(&bits, 2).unwrap(); + let mut hex = format!("{value:X}"); + if let Err(err) = pad_with_places(&mut hex, places, "BIN2HEX") { + return Ok(err); + } + hex + }; + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_bin2oct( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "BIN2OCT expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "BIN2OCT") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + let bits = match binary_string_from_value(&number_value, "BIN2OCT") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let result = if is_negative_binary(&bits) { + negative_binary_to_oct(&bits) + } else { + let value = u32::from_str_radix(&bits, 2).unwrap(); + let mut oct = format!("{value:o}"); + if let Err(err) = pad_with_places(&mut oct, places, "BIN2OCT") { + return Ok(err); + } + oct + }; + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_dec2bin( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "DEC2BIN expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let number = match parse_decimal_for_conversion(&number_value, "DEC2BIN", BIN_MIN, BIN_MAX) { + Ok(v) => v, + Err(err) => return Ok(err), + }; + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "DEC2BIN") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if number < 0 { + let value = twos_complement_value(number, 10); + return Ok(CellValue::String(format!("{value:010b}"))); + } + let mut result = format!("{number:b}"); + if let Err(err) = pad_with_places(&mut result, places, "DEC2BIN") { + return Ok(err); + } + Ok(CellValue::String(result)) +} + +const DECIMAL_MAX_VALUE: u128 = (1u128 << 53) - 1; + +pub(crate) async fn eval_decimal( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 2 { + return Ok(CellValue::Error( + "DECIMAL expects 2 arguments (text, radix)".to_string(), + )); + } + + let text_val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let raw_text = to_text(&text_val); + let text = raw_text.trim(); + if text.is_empty() { + return Ok(CellValue::Error( + "DECIMAL text must not be empty".to_string(), + )); + } + if text.len() > 255 { + return Ok(CellValue::Error( + "DECIMAL text must be 255 characters or fewer".to_string(), + )); + } + + let radix_val = evaluate_expression(ctx, current_sheet, &args[1]).await?; + let radix_num = match to_number(&radix_val) { + Some(n) => n, + None => { + return Ok(CellValue::Error( + "DECIMAL radix must be numeric".to_string(), + )); + }, + }; + let radix_int = match to_int_if_whole(radix_num) { + Some(i) => i, + None => { + return Ok(CellValue::Error( + "DECIMAL radix must be an integer between 2 and 36".to_string(), + )); + }, + }; + if !(2..=36).contains(&radix_int) { + return Ok(CellValue::Error( + "DECIMAL radix must be between 2 and 36".to_string(), + )); + } + let radix = radix_int as u32; + + let mut value: u128 = 0; + for ch in text.chars() { + let digit = match char_to_digit(ch) { + Some(d) => d, + None => { + return Ok(CellValue::Error(format!( + "DECIMAL text contains invalid character '{}'", + ch + ))); + }, + }; + if digit >= radix { + return Ok(CellValue::Error(format!( + "DECIMAL text contains digit '{}' invalid for radix {}", + ch, radix + ))); + } + value = value * radix as u128 + digit as u128; + if value > DECIMAL_MAX_VALUE { + return Ok(CellValue::Error( + "DECIMAL result is out of supported range (must be less than 2^53)".to_string(), + )); + } + } + + Ok(CellValue::Int(value as i64)) +} + +pub(crate) async fn eval_base( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() < 2 || args.len() > 3 { + return Ok(CellValue::Error( + "BASE expects 2 or 3 arguments (number, radix, [min_length])".to_string(), + )); + } + + let number_val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let number_num = match to_number(&number_val) { + Some(n) => n, + None => { + return Ok(CellValue::Error("BASE number must be numeric".to_string())); + }, + }; + let number_int = match to_int_if_whole(number_num) { + Some(i) => i, + None => { + return Ok(CellValue::Error( + "BASE number must be an integer between 0 and 2^53-1".to_string(), + )); + }, + }; + if number_int < 0 || number_int as u128 > DECIMAL_MAX_VALUE { + return Ok(CellValue::Error( + "BASE number must be between 0 and 2^53-1".to_string(), + )); + } + let number = number_int as u128; + + let radix_val = evaluate_expression(ctx, current_sheet, &args[1]).await?; + let radix_num = match to_number(&radix_val) { + Some(n) => n, + None => { + return Ok(CellValue::Error("BASE radix must be numeric".to_string())); + }, + }; + let radix_int = match to_int_if_whole(radix_num) { + Some(i) => i, + None => { + return Ok(CellValue::Error( + "BASE radix must be an integer between 2 and 36".to_string(), + )); + }, + }; + if !(2..=36).contains(&radix_int) { + return Ok(CellValue::Error( + "BASE radix must be between 2 and 36".to_string(), + )); + } + let radix = radix_int as u32; + + let min_length = if args.len() == 3 { + let len_val = evaluate_expression(ctx, current_sheet, &args[2]).await?; + let len_num = match to_number(&len_val) { + Some(n) => n, + None => { + return Ok(CellValue::Error( + "BASE min_length must be numeric".to_string(), + )); + }, + }; + let len_int = match to_int_if_whole(len_num) { + Some(i) => i, + None => { + return Ok(CellValue::Error( + "BASE min_length must be a non-negative integer up to 255".to_string(), + )); + }, + }; + if !(0..=255).contains(&len_int) { + return Ok(CellValue::Error( + "BASE min_length must be between 0 and 255".to_string(), + )); + } + len_int as usize + } else { + 0 + }; + + let mut result = if number == 0 { + "0".to_string() + } else { + convert_number_to_base(number, radix) + }; + + if min_length > result.len() { + let zeros = "0".repeat(min_length - result.len()); + result = format!("{zeros}{result}"); + } + + Ok(CellValue::String(result)) +} + +fn char_to_digit(c: char) -> Option { + if c.is_ascii_digit() { + Some(c as u32 - '0' as u32) + } else if c.is_ascii_alphabetic() { + let upper = c.to_ascii_uppercase(); + Some(upper as u32 - 'A' as u32 + 10) + } else { + None + } +} + +fn convert_number_to_base(mut value: u128, radix: u32) -> String { + debug_assert!((2..=36).contains(&radix)); + let mut digits: Vec = Vec::new(); + while value > 0 { + let rem = (value % radix as u128) as u32; + digits.push(digit_to_char(rem)); + value /= radix as u128; + } + digits.iter().rev().collect() +} + +fn digit_to_char(value: u32) -> char { + if value < 10 { + (b'0' + value as u8) as char + } else { + (b'A' + (value - 10) as u8) as char + } +} + +pub(crate) async fn eval_dec2oct( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "DEC2OCT expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let number = match parse_decimal_for_conversion(&number_value, "DEC2OCT", OCT_MIN, OCT_MAX) { + Ok(v) => v, + Err(err) => return Ok(err), + }; + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "DEC2OCT") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if number < 0 { + let value = twos_complement_value(number, 30); + return Ok(CellValue::String(format!("{value:010o}"))); + } + let mut result = format!("{number:o}"); + if let Err(err) = pad_with_places(&mut result, places, "DEC2OCT") { + return Ok(err); + } + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_dec2hex( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "DEC2HEX expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let number = match parse_decimal_for_conversion(&number_value, "DEC2HEX", HEX_MIN, HEX_MAX) { + Ok(v) => v, + Err(err) => return Ok(err), + }; + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "DEC2HEX") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if number < 0 { + let value = twos_complement_value(number, 40); + return Ok(CellValue::String(format!("{value:010X}"))); + } + let mut result = format!("{number:X}"); + if let Err(err) = pad_with_places(&mut result, places, "DEC2HEX") { + return Ok(err); + } + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_hex2dec( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("HEX2DEC expects 1 argument".to_string())); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let hex = match parse_hex_string(&number_value, "HEX2DEC") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let value = match signed_hex_to_decimal(&hex, "HEX2DEC") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + Ok(CellValue::Int(value)) +} + +pub(crate) async fn eval_hex2bin( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "HEX2BIN expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let hex = match parse_hex_string(&number_value, "HEX2BIN") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let decimal = match signed_hex_to_decimal(&hex, "HEX2BIN") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + if let Err(err) = ensure_number_in_range(decimal, BIN_MIN, BIN_MAX, "HEX2BIN") { + return Ok(err); + } + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "HEX2BIN") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if decimal < 0 { + let value = twos_complement_value(decimal, 10); + return Ok(CellValue::String(format!("{value:010b}"))); + } + let mut result = format!("{decimal:b}"); + if let Err(err) = pad_with_places(&mut result, places, "HEX2BIN") { + return Ok(err); + } + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_hex2oct( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "HEX2OCT expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let hex = match parse_hex_string(&number_value, "HEX2OCT") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let decimal = match signed_hex_to_decimal(&hex, "HEX2OCT") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + if let Err(err) = ensure_number_in_range(decimal, OCT_MIN, OCT_MAX, "HEX2OCT") { + return Ok(err); + } + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "HEX2OCT") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if decimal < 0 { + let value = twos_complement_value(decimal, 30); + return Ok(CellValue::String(format!("{value:010o}"))); + } + let mut result = format!("{decimal:o}"); + if let Err(err) = pad_with_places(&mut result, places, "HEX2OCT") { + return Ok(err); + } + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_oct2dec( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("OCT2DEC expects 1 argument".to_string())); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let oct = match parse_octal_string(&number_value, "OCT2DEC") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let value = match signed_octal_to_decimal(&oct, "OCT2DEC") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + Ok(CellValue::Int(value)) +} + +pub(crate) async fn eval_oct2bin( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "OCT2BIN expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let oct = match parse_octal_string(&number_value, "OCT2BIN") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let decimal = match signed_octal_to_decimal(&oct, "OCT2BIN") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + if let Err(err) = ensure_number_in_range(decimal, BIN_MIN, BIN_MAX, "OCT2BIN") { + return Ok(err); + } + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "OCT2BIN") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if decimal < 0 { + let value = twos_complement_value(decimal, 10); + return Ok(CellValue::String(format!("{value:010b}"))); + } + let mut result = format!("{decimal:b}"); + if let Err(err) = pad_with_places(&mut result, places, "OCT2BIN") { + return Ok(err); + } + Ok(CellValue::String(result)) +} + +pub(crate) async fn eval_oct2hex( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "OCT2HEX expects 1 or 2 arguments".to_string(), + )); + } + let number_value = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let CellValue::Error(_) = number_value { + return Ok(number_value); + } + let oct = match parse_octal_string(&number_value, "OCT2HEX") { + Ok(s) => s, + Err(err) => return Ok(err), + }; + let decimal = match signed_octal_to_decimal(&oct, "OCT2HEX") { + Ok(v) => v, + Err(err) => return Ok(err), + }; + if let Err(err) = ensure_number_in_range(decimal, HEX_MIN, HEX_MAX, "OCT2HEX") { + return Ok(err); + } + let places = if args.len() == 2 { + let places_value = evaluate_expression(ctx, current_sheet, &args[1]).await?; + if let CellValue::Error(_) = places_value { + return Ok(places_value); + } + Some(match parse_places_argument(&places_value, "OCT2HEX") { + Ok(p) => p, + Err(err) => return Ok(err), + }) + } else { + None + }; + if decimal < 0 { + let value = twos_complement_value(decimal, 40); + return Ok(CellValue::String(format!("{value:010X}"))); + } + let mut result = format!("{decimal:X}"); + if let Err(err) = pad_with_places(&mut result, places, "OCT2HEX") { + return Ok(err); + } + Ok(CellValue::String(result)) +} +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::Expr; + + fn num_expr(n: f64) -> Expr { + if n == n.floor() { + Expr::Literal(CellValue::Int(n as i64)) + } else { + Expr::Literal(CellValue::Float(n)) + } + } + + fn str_expr(s: &str) -> Expr { + Expr::Literal(CellValue::String(s.to_string())) + } + + #[tokio::test] + async fn test_eval_bin2dec() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("1010")]; + let result = eval_bin2dec(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(10)); + } + + #[tokio::test] + async fn test_eval_bin2hex() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("1111")]; + let result = eval_bin2hex(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("F".to_string())); + } + + #[tokio::test] + async fn test_eval_dec2bin() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(10.0)]; + let result = eval_dec2bin(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("1010".to_string())); + } + + #[tokio::test] + async fn test_eval_dec2hex() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(255.0)]; + let result = eval_dec2hex(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("FF".to_string())); + } + + #[tokio::test] + async fn test_eval_hex2dec() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("FF")]; + let result = eval_hex2dec(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(255)); + } + + #[tokio::test] + async fn test_eval_hex2bin() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("F")]; + let result = eval_hex2bin(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("1111".to_string())); + } + + #[tokio::test] + async fn test_eval_dec2oct() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(64.0)]; + let result = eval_dec2oct(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("100".to_string())); + } + + #[tokio::test] + async fn test_eval_oct2dec() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("100")]; + let result = eval_oct2dec(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(64)); + } + + #[tokio::test] + async fn test_eval_base() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // BASE(255, 16) = "FF" + let args = vec![num_expr(255.0), num_expr(16.0)]; + let result = eval_base(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("FF".to_string())); + } + + #[tokio::test] + async fn test_eval_base_with_min_length() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // BASE(255, 16, 4) = "00FF" + let args = vec![num_expr(255.0), num_expr(16.0), num_expr(4.0)]; + let result = eval_base(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("00FF".to_string())); + } + + #[tokio::test] + async fn test_eval_decimal() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // DECIMAL("FF", 16) = 255 + let args = vec![str_expr("FF"), num_expr(16.0)]; + let result = eval_decimal(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(255)); + } + + #[tokio::test] + async fn test_eval_decimal_binary() { + let engine = crate::engine::test_helpers::TestEngine::new(); + let ctx = engine.ctx(); + // DECIMAL("1010", 2) = 10 + let args = vec![str_expr("1010"), num_expr(2.0)]; + let result = eval_decimal(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(10)); + } +} diff --git a/crates/litchi-eval/src/engine/math/helpers.rs b/crates/litchi-eval/src/engine/math/helpers.rs new file mode 100644 index 0000000..86adb4c --- /dev/null +++ b/crates/litchi-eval/src/engine/math/helpers.rs @@ -0,0 +1,861 @@ +use std::result::Result as StdResult; + +use crate::engine::{EvalCtx, flatten_range_expr, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +pub(super) const EPS: f64 = 1e-12; +pub(super) const MAX_BITWISE_VALUE: u64 = (1u64 << 48) - 1; +pub(super) const BIN_MAX: i64 = 511; +pub(super) const BIN_MIN: i64 = -512; +pub(super) const OCT_MAX: i64 = 536_870_911; +pub(super) const OCT_MIN: i64 = -536_870_912; +pub(super) const HEX_MAX: i64 = 549_755_813_887; +pub(super) const HEX_MIN: i64 = -549_755_813_888; + +pub(super) fn number_result(value: f64) -> CellValue { + if value.is_finite() + && value.fract().abs() < EPS + && value <= i64::MAX as f64 + && value >= i64::MIN as f64 + { + CellValue::Int(value as i64) + } else { + CellValue::Float(value) + } +} + +pub(super) fn is_even(value: f64) -> bool { + ((value / 2.0).fract()).abs() < EPS +} + +pub(super) fn round_away_from_zero(value: f64) -> f64 { + let rounded = if value >= 0.0 { + value.ceil() + } else { + value.floor() + }; + if rounded == -0.0 { 0.0 } else { rounded } +} + +pub(super) fn to_int_if_whole(value: f64) -> Option { + if !value.is_finite() { + return None; + } + let truncated = value.trunc(); + if (value - truncated).abs() > EPS { + return None; + } + if truncated < i64::MIN as f64 || truncated > i64::MAX as f64 { + return None; + } + Some(truncated as i64) +} + +pub(super) async fn flatten_numeric_values( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result> { + let flat = flatten_range_expr(ctx, current_sheet, expr).await?; + let values = flat + .values + .into_iter() + .map(|v| to_number(&v).unwrap_or(0.0)) + .collect(); + Ok(values) +} + +pub(super) fn to_u48(value: f64) -> Option { + if value.is_finite() + && value >= 0.0 + && value <= MAX_BITWISE_VALUE as f64 + && (value.fract()).abs() < EPS + { + Some(value as u64) + } else { + None + } +} + +pub(super) fn to_shift_amount(value: f64) -> Option { + if value.is_finite() && (0.0..=53.0).contains(&value) && (value.fract()).abs() < EPS { + Some(value as u32) + } else { + None + } +} + +pub(super) fn bit_operand_value(value: &CellValue, func_name: &str) -> StdResult { + let num = match to_number(value) { + Some(n) => n, + None => { + return Err(CellValue::Error(format!( + "{func_name} arguments must be numeric" + ))); + }, + }; + match to_u48(num) { + Some(v) => Ok(v), + None => Err(CellValue::Error(format!( + "{func_name} arguments must be integers between 0 and 2^48-1" + ))), + } +} + +pub(super) fn bit_shift_value(value: &CellValue, func_name: &str) -> StdResult { + let num = match to_number(value) { + Some(n) => n, + None => { + return Err(CellValue::Error(format!( + "{func_name} shift must be numeric" + ))); + }, + }; + match to_shift_amount(num) { + Some(v) => Ok(v), + None => Err(CellValue::Error(format!( + "{func_name} shift must be between 0 and 53" + ))), + } +} + +pub(super) fn binary_string_from_value( + value: &CellValue, + func_name: &str, +) -> StdResult { + let raw = match value { + CellValue::String(s) => s.trim().to_string(), + CellValue::Int(i) => { + if *i < 0 { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value containing only 0 or 1" + ))); + } + i.to_string() + }, + CellValue::Float(f) => match to_int_if_whole(*f) { + Some(i) if i >= 0 => i.to_string(), + _ => { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value containing only 0 or 1" + ))); + }, + }, + CellValue::Bool(true) => "1".to_string(), + CellValue::Bool(false) => "0".to_string(), + _ => { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value containing only 0 or 1" + ))); + }, + }; + + let trimmed = raw.trim(); + if trimmed.is_empty() { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value containing only 0 or 1" + ))); + } + if trimmed.len() > 10 { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value up to 10 digits" + ))); + } + if !trimmed.chars().all(|c| c == '0' || c == '1') { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value containing only 0 or 1" + ))); + } + Ok(trimmed.to_string()) +} + +pub(super) fn parse_signed_binary(bits: &str, func_name: &str) -> StdResult { + if bits.is_empty() || bits.len() > 10 { + return Err(CellValue::Error(format!( + "{func_name} expects a binary value up to 10 digits" + ))); + } + let unsigned = u16::from_str_radix(bits, 2).unwrap(); + if bits.len() == 10 && bits.starts_with('1') { + Ok(unsigned as i64 - 1024) + } else { + Ok(unsigned as i64) + } +} + +pub(super) fn parse_places_argument( + value: &CellValue, + func_name: &str, +) -> StdResult { + let num = match to_number(value) { + Some(n) => n, + None => { + return Err(CellValue::Error(format!( + "{func_name} places must be numeric" + ))); + }, + }; + let int_val = match to_int_if_whole(num) { + Some(i) => i, + None => { + return Err(CellValue::Error(format!( + "{func_name} places must be an integer between 1 and 10" + ))); + }, + }; + if !(1..=10).contains(&int_val) { + return Err(CellValue::Error(format!( + "{func_name} places must be between 1 and 10" + ))); + } + Ok(int_val as usize) +} + +pub(super) fn pad_with_places( + result: &mut String, + places: Option, + func_name: &str, +) -> StdResult<(), CellValue> { + if let Some(p) = places { + if result.len() > p { + return Err(CellValue::Error(format!( + "{func_name} places is too small to display the result" + ))); + } + while result.len() < p { + result.insert(0, '0'); + } + } + Ok(()) +} + +pub(super) fn is_negative_binary(bits: &str) -> bool { + bits.len() == 10 && bits.starts_with('1') +} + +pub(super) fn extend_binary(bits: &str, target_len: usize, fill: char) -> String { + if bits.len() >= target_len { + bits.to_string() + } else { + let mut extended = String::with_capacity(target_len); + for _ in 0..(target_len - bits.len()) { + extended.push(fill); + } + extended.push_str(bits); + extended + } +} + +pub(super) fn negative_binary_to_hex(bits: &str) -> String { + let extended = extend_binary(bits, 40, '1'); + let value = u64::from_str_radix(&extended, 2).unwrap(); + format!("{value:010X}") +} + +pub(super) fn negative_binary_to_oct(bits: &str) -> String { + let extended = extend_binary(bits, 30, '1'); + let value = u64::from_str_radix(&extended, 2).unwrap(); + format!("{value:010o}") +} + +pub(super) fn parse_decimal_for_conversion( + value: &CellValue, + func_name: &str, + min: i64, + max: i64, +) -> StdResult { + let num = match to_number(value) { + Some(n) => n, + None => { + return Err(CellValue::Error(format!( + "{func_name} number must be numeric" + ))); + }, + }; + let int_val = match to_int_if_whole(num) { + Some(i) => i, + None => { + return Err(CellValue::Error(format!( + "{func_name} number must be an integer" + ))); + }, + }; + if int_val < min || int_val > max { + return Err(CellValue::Error(format!( + "{func_name} number must be between {min} and {max}" + ))); + } + Ok(int_val) +} + +pub(super) fn ensure_number_in_range( + value: i64, + min: i64, + max: i64, + func_name: &str, +) -> StdResult<(), CellValue> { + if value < min || value > max { + Err(CellValue::Error(format!( + "{func_name} number must be between {min} and {max}" + ))) + } else { + Ok(()) + } +} + +pub(super) fn twos_complement_value(number: i64, bits: u32) -> u64 { + let modulus = 1i128 << bits; + let adjusted = modulus + number as i128; + adjusted as u64 +} + +pub(super) fn parse_hex_string(value: &CellValue, func_name: &str) -> StdResult { + let raw = match value { + CellValue::String(s) => s.trim().to_string(), + CellValue::Int(i) => i.to_string(), + CellValue::Float(f) => match to_int_if_whole(*f) { + Some(i) => i.to_string(), + None => { + return Err(CellValue::Error(format!( + "{func_name} expects a hexadecimal value up to 10 digits" + ))); + }, + }, + CellValue::Bool(true) => "1".to_string(), + CellValue::Bool(false) => "0".to_string(), + _ => { + return Err(CellValue::Error(format!( + "{func_name} expects a hexadecimal value up to 10 digits" + ))); + }, + }; + + let trimmed = raw.trim(); + if trimmed.is_empty() { + return Err(CellValue::Error(format!( + "{func_name} expects a hexadecimal value up to 10 digits" + ))); + } + let upper = trimmed.to_uppercase(); + let (sign, digits) = if let Some(stripped) = upper.strip_prefix('-') { + ("-", stripped) + } else { + ("", upper.as_str()) + }; + if digits.is_empty() || digits.len() > 10 { + return Err(CellValue::Error(format!( + "{func_name} expects a hexadecimal value up to 10 digits" + ))); + } + if !digits.chars().all(|c| c.is_ascii_hexdigit()) { + return Err(CellValue::Error(format!( + "{func_name} expects a hexadecimal value up to 10 digits" + ))); + } + Ok(format!("{sign}{digits}")) +} + +pub(super) fn signed_hex_to_decimal(hex: &str, func_name: &str) -> StdResult { + if let Some(digits) = hex.strip_prefix('-') { + let value = i64::from_str_radix(digits, 16).unwrap(); + let signed = -value; + if !(HEX_MIN..=HEX_MAX).contains(&signed) { + return Err(CellValue::Error(format!( + "{func_name} number is out of range" + ))); + } + return Ok(signed); + } + + if hex.len() == 10 + && let Some(first) = hex.chars().next() + && ('8'..='F').contains(&first) + { + let raw = u64::from_str_radix(hex, 16).unwrap(); + let signed = raw as i64 - (1i64 << 40); + return Ok(signed); + } + + let value = i64::from_str_radix(hex, 16).unwrap(); + if !(HEX_MIN..=HEX_MAX).contains(&value) { + return Err(CellValue::Error(format!( + "{func_name} number is out of range" + ))); + } + Ok(value) +} + +pub(super) fn parse_octal_string( + value: &CellValue, + func_name: &str, +) -> StdResult { + let raw = match value { + CellValue::String(s) => s.trim().to_string(), + CellValue::Int(i) => i.to_string(), + CellValue::Float(f) => match to_int_if_whole(*f) { + Some(i) => i.to_string(), + None => { + return Err(CellValue::Error(format!( + "{func_name} expects an octal value up to 10 digits" + ))); + }, + }, + CellValue::Bool(true) => "1".to_string(), + CellValue::Bool(false) => "0".to_string(), + _ => { + return Err(CellValue::Error(format!( + "{func_name} expects an octal value up to 10 digits" + ))); + }, + }; + + let trimmed = raw.trim(); + if trimmed.is_empty() { + return Err(CellValue::Error(format!( + "{func_name} expects an octal value up to 10 digits" + ))); + } + let (sign, digits) = if let Some(stripped) = trimmed.strip_prefix('-') { + ("-", stripped) + } else { + ("", trimmed) + }; + if digits.is_empty() || digits.len() > 10 { + return Err(CellValue::Error(format!( + "{func_name} expects an octal value up to 10 digits" + ))); + } + if !digits.chars().all(|c| ('0'..='7').contains(&c)) { + return Err(CellValue::Error(format!( + "{func_name} expects an octal value up to 10 digits" + ))); + } + Ok(format!("{sign}{digits}")) +} + +pub(super) fn signed_octal_to_decimal(oct: &str, func_name: &str) -> StdResult { + if let Some(digits) = oct.strip_prefix('-') { + let value = i64::from_str_radix(digits, 8).unwrap(); + let signed = -value; + if !(OCT_MIN..=OCT_MAX).contains(&signed) { + return Err(CellValue::Error(format!( + "{func_name} number is out of range" + ))); + } + return Ok(signed); + } + + if oct.len() == 10 + && let Some(first) = oct.chars().next() + && ('4'..='7').contains(&first) + { + let raw = u64::from_str_radix(oct, 8).unwrap(); + let signed = raw as i64 - (1i64 << 30); + return Ok(signed); + } + + let value = i64::from_str_radix(oct, 8).unwrap(); + if !(OCT_MIN..=OCT_MAX).contains(&value) { + return Err(CellValue::Error(format!( + "{func_name} number is out of range" + ))); + } + Ok(value) +} + +pub(super) fn factorial(n: u64) -> f64 { + if n <= 1 { + 1.0 + } else { + (2..=n).fold(1.0, |acc, v| acc * v as f64) + } +} + +pub(super) fn double_factorial(n: u64) -> f64 { + if n <= 1 { + 1.0 + } else { + let mut acc = 1.0; + let mut current = n; + while current > 1 { + acc *= current as f64; + current -= 2; + } + acc + } +} + +pub(super) fn combination(n: u64, k: u64) -> f64 { + if k == 0 || k == n { + return 1.0; + } + let k = k.min(n - k); + let mut result = 1.0; + for i in 1..=k { + result *= (n - k + i) as f64; + result /= i as f64; + } + result +} + +pub(super) fn permutation(n: u64, k: u64) -> f64 { + if k == 0 { + return 1.0; + } + let mut result = 1.0; + for i in 0..k { + result *= (n - i) as f64; + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_number_result_int() { + let result = number_result(5.0); + assert!(matches!(result, CellValue::Int(5))); + } + + #[test] + fn test_number_result_float() { + let result = number_result(5.5); + assert!(matches!(result, CellValue::Float(v) if (v - 5.5).abs() < 1e-9)); + } + + #[test] + fn test_number_result_large_float() { + let result = number_result(1e20); + assert!(matches!(result, CellValue::Float(_))); + } + + #[test] + fn test_is_even_true() { + assert!(is_even(4.0)); + assert!(is_even(-2.0)); + assert!(is_even(0.0)); + } + + #[test] + fn test_is_even_false() { + assert!(!is_even(3.0)); + assert!(!is_even(-1.0)); + } + + #[test] + fn test_round_away_from_zero() { + assert_eq!(round_away_from_zero(2.3), 3.0); + assert_eq!(round_away_from_zero(2.7), 3.0); + assert_eq!(round_away_from_zero(-2.3), -3.0); + assert_eq!(round_away_from_zero(-2.7), -3.0); + assert_eq!(round_away_from_zero(0.0), 0.0); + } + + #[test] + fn test_to_int_if_whole() { + assert_eq!(to_int_if_whole(5.0), Some(5)); + assert_eq!(to_int_if_whole(-5.0), Some(-5)); + assert_eq!(to_int_if_whole(5.5), None); + assert_eq!(to_int_if_whole(f64::NAN), None); + assert_eq!(to_int_if_whole(f64::INFINITY), None); + } + + #[test] + fn test_to_u48_valid() { + assert_eq!(to_u48(0.0), Some(0)); + assert_eq!(to_u48(100.0), Some(100)); + assert_eq!(to_u48(((1u64 << 48) - 1) as f64), Some((1u64 << 48) - 1)); + } + + #[test] + fn test_to_u48_invalid() { + assert_eq!(to_u48(-1.0), None); + assert_eq!(to_u48((1u64 << 48) as f64), None); + assert_eq!(to_u48(5.5), None); + assert_eq!(to_u48(f64::NAN), None); + } + + #[test] + fn test_to_shift_amount_valid() { + assert_eq!(to_shift_amount(0.0), Some(0)); + assert_eq!(to_shift_amount(10.0), Some(10)); + assert_eq!(to_shift_amount(53.0), Some(53)); + } + + #[test] + fn test_to_shift_amount_invalid() { + assert_eq!(to_shift_amount(-1.0), None); + assert_eq!(to_shift_amount(54.0), None); + assert_eq!(to_shift_amount(5.5), None); + } + + #[test] + fn test_factorial() { + assert_eq!(factorial(0), 1.0); + assert_eq!(factorial(1), 1.0); + assert_eq!(factorial(5), 120.0); + assert_eq!(factorial(10), 3628800.0); + } + + #[test] + fn test_double_factorial() { + assert_eq!(double_factorial(0), 1.0); + assert_eq!(double_factorial(1), 1.0); + assert_eq!(double_factorial(5), 15.0); // 5 * 3 * 1 + assert_eq!(double_factorial(6), 48.0); // 6 * 4 * 2 + } + + #[test] + fn test_combination() { + assert_eq!(combination(5, 0), 1.0); + assert_eq!(combination(5, 5), 1.0); + assert_eq!(combination(5, 2), 10.0); + assert_eq!(combination(10, 3), 120.0); + // C(n, k) = C(n, n-k) + assert_eq!(combination(10, 3), combination(10, 7)); + } + + #[test] + fn test_permutation() { + assert_eq!(permutation(5, 0), 1.0); + assert_eq!(permutation(5, 1), 5.0); + assert_eq!(permutation(5, 2), 20.0); + assert_eq!(permutation(10, 3), 720.0); + } + + #[test] + fn test_extend_binary() { + assert_eq!(extend_binary("101", 5, '0'), "00101"); + assert_eq!(extend_binary("101", 3, '0'), "101"); + assert_eq!(extend_binary("101", 5, '1'), "11101"); + } + + #[test] + fn test_is_negative_binary() { + assert!(is_negative_binary("1000000000")); // 10 bits starting with 1 + assert!(!is_negative_binary("0111111111")); // 10 bits starting with 0 + assert!(!is_negative_binary("101")); // Less than 10 bits + } + + #[test] + fn test_twos_complement_value() { + // The function calculates (2^bits + number) for two's complement + // Positive 5 with 8 bits: 256 + 5 = 261 + assert_eq!(twos_complement_value(5, 8), 261); + // Negative number: -1 with 8 bits: 256 + (-1) = 255 + assert_eq!(twos_complement_value(-1, 8), 255); + // Negative number: -5 with 8 bits: 256 + (-5) = 251 + assert_eq!(twos_complement_value(-5, 8), 251); + } + + #[test] + fn test_parse_decimal_for_conversion_valid() { + let value = CellValue::Int(100); + let result = parse_decimal_for_conversion(&value, "TEST", -100, 100); + assert_eq!(result.unwrap(), 100); + } + + #[test] + fn test_parse_decimal_for_conversion_out_of_range() { + let value = CellValue::Int(200); + let result = parse_decimal_for_conversion(&value, "TEST", -100, 100); + assert!(result.is_err()); + } + + #[test] + fn test_parse_decimal_for_conversion_non_numeric() { + let value = CellValue::String("abc".to_string()); + let result = parse_decimal_for_conversion(&value, "TEST", -100, 100); + assert!(result.is_err()); + } + + #[test] + fn test_ensure_number_in_range() { + assert!(ensure_number_in_range(50, 0, 100, "TEST").is_ok()); + assert!(ensure_number_in_range(-1, 0, 100, "TEST").is_err()); + assert!(ensure_number_in_range(101, 0, 100, "TEST").is_err()); + } + + #[test] + fn test_bit_operand_value_valid() { + let value = CellValue::Int(42); + let result = bit_operand_value(&value, "BITAND"); + assert_eq!(result.unwrap(), 42); + } + + #[test] + fn test_bit_operand_value_negative() { + let value = CellValue::Int(-1); + let result = bit_operand_value(&value, "BITAND"); + assert!(result.is_err()); + } + + #[test] + fn test_bit_operand_value_non_numeric() { + let value = CellValue::String("abc".to_string()); + let result = bit_operand_value(&value, "BITAND"); + assert!(result.is_err()); + } + + #[test] + fn test_bit_shift_value_valid() { + let value = CellValue::Int(10); + let result = bit_shift_value(&value, "BITLSHIFT"); + assert_eq!(result.unwrap(), 10); + } + + #[test] + fn test_bit_shift_value_too_large() { + let value = CellValue::Int(100); + let result = bit_shift_value(&value, "BITLSHIFT"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_places_argument_valid() { + let value = CellValue::Int(5); + let result = parse_places_argument(&value, "DEC2BIN"); + assert_eq!(result.unwrap(), 5); + } + + #[test] + fn test_parse_places_argument_out_of_range() { + let value = CellValue::Int(15); + let result = parse_places_argument(&value, "DEC2BIN"); + assert!(result.is_err()); + } + + #[test] + fn test_parse_places_argument_non_numeric() { + let value = CellValue::String("abc".to_string()); + let result = parse_places_argument(&value, "DEC2BIN"); + assert!(result.is_err()); + } + + #[test] + fn test_pad_with_places_success() { + let mut result = "101".to_string(); + assert!(pad_with_places(&mut result, Some(5), "DEC2BIN").is_ok()); + assert_eq!(result, "00101"); + } + + #[test] + fn test_pad_with_places_too_small() { + let mut result = "1010".to_string(); + assert!(pad_with_places(&mut result, Some(2), "DEC2BIN").is_err()); + } + + #[test] + fn test_binary_string_from_value_valid() { + let value = CellValue::String("1010".to_string()); + let result = binary_string_from_value(&value, "BIN2DEC"); + assert_eq!(result.unwrap(), "1010"); + } + + #[test] + fn test_binary_string_from_value_invalid_characters() { + let value = CellValue::String("102".to_string()); + let result = binary_string_from_value(&value, "BIN2DEC"); + assert!(result.is_err()); + } + + #[test] + fn test_binary_string_from_value_int() { + let value = CellValue::Int(101); + let result = binary_string_from_value(&value, "BIN2DEC"); + assert_eq!(result.unwrap(), "101"); + } + + #[test] + fn test_parse_signed_binary_positive() { + let result = parse_signed_binary("101", "BIN2DEC").unwrap(); + assert_eq!(result, 5); + } + + #[test] + fn test_parse_signed_binary_negative() { + // 10-bit two's complement: 1111111111 = -1 + let result = parse_signed_binary("1111111111", "BIN2DEC").unwrap(); + assert_eq!(result, -1); + } + + #[test] + fn test_parse_signed_binary_out_of_range() { + let result = parse_signed_binary("101010101010", "BIN2DEC"); + assert!(result.is_err()); + } + + #[test] + fn test_hex_string_from_value_valid() { + let value = CellValue::String("A1F".to_string()); + let result = parse_hex_string(&value, "HEX2DEC").unwrap(); + assert_eq!(result, "A1F"); + } + + #[test] + fn test_hex_string_from_value_negative() { + let value = CellValue::String("-A1F".to_string()); + let result = parse_hex_string(&value, "HEX2DEC").unwrap(); + assert_eq!(result, "-A1F"); + } + + #[test] + fn test_hex_string_from_value_invalid() { + let value = CellValue::String("GHI".to_string()); + let result = parse_hex_string(&value, "HEX2DEC"); + assert!(result.is_err()); + } + + #[test] + fn test_signed_hex_to_decimal_positive() { + let result = signed_hex_to_decimal("A", "HEX2DEC").unwrap(); + assert_eq!(result, 10); + } + + #[test] + fn test_signed_hex_to_decimal_negative() { + let result = signed_hex_to_decimal("-A", "HEX2DEC").unwrap(); + assert_eq!(result, -10); + } + + #[test] + fn test_octal_string_from_value_valid() { + let value = CellValue::String("755".to_string()); + let result = parse_octal_string(&value, "OCT2DEC").unwrap(); + assert_eq!(result, "755"); + } + + #[test] + fn test_octal_string_from_value_invalid() { + let value = CellValue::String("789".to_string()); + let result = parse_octal_string(&value, "OCT2DEC"); + assert!(result.is_err()); + } + + #[test] + fn test_signed_octal_to_decimal_positive() { + let result = signed_octal_to_decimal("755", "OCT2DEC").unwrap(); + assert_eq!(result, 0o755); + } + + #[test] + fn test_signed_octal_to_decimal_negative() { + let result = signed_octal_to_decimal("-755", "OCT2DEC").unwrap(); + assert_eq!(result, -0o755); + } + + #[test] + fn test_negative_binary_to_hex() { + let result = negative_binary_to_hex("1000000000"); + assert_eq!(result, "FFFFFFFE00"); + } + + #[test] + fn test_negative_binary_to_oct() { + let result = negative_binary_to_oct("1000000000"); + assert_eq!(result, "7777777000"); + } +} diff --git a/src/sheet/eval/engine/math/mod.rs b/crates/litchi-eval/src/engine/math/mod.rs similarity index 100% rename from src/sheet/eval/engine/math/mod.rs rename to crates/litchi-eval/src/engine/math/mod.rs diff --git a/src/sheet/eval/engine/math/random.rs b/crates/litchi-eval/src/engine/math/random.rs similarity index 93% rename from src/sheet/eval/engine/math/random.rs rename to crates/litchi-eval/src/engine/math/random.rs index 3312cc7..ed6fadf 100644 --- a/src/sheet/eval/engine/math/random.rs +++ b/crates/litchi-eval/src/engine/math/random.rs @@ -1,8 +1,8 @@ use rand::RngExt; -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; pub(crate) async fn eval_rand( _ctx: EvalCtx<'_>, @@ -61,8 +61,8 @@ pub(crate) async fn eval_randbetween( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -81,7 +81,7 @@ mod tests { match result { CellValue::Float(v) => { // RAND should return a value between 0 and 1 - assert!(v >= 0.0 && v < 1.0); + assert!((0.0..1.0).contains(&v)); }, _ => panic!("Expected Float"), } @@ -108,7 +108,7 @@ mod tests { match result { CellValue::Int(v) => { // RANDBETWEEN should return an integer in the range [1, 10] - assert!(v >= 1 && v <= 10); + assert!((1..=10).contains(&v)); }, _ => panic!("Expected Int"), } @@ -135,7 +135,7 @@ mod tests { let result = eval_randbetween(ctx, "Sheet1", &args).await.unwrap(); match result { CellValue::Int(v) => { - assert!(v >= -10 && v <= -1); + assert!((-10..=-1).contains(&v)); }, _ => panic!("Expected Int"), } @@ -149,7 +149,7 @@ mod tests { let result = eval_randbetween(ctx, "Sheet1", &args).await.unwrap(); match result { CellValue::Int(v) => { - assert!(v >= -5 && v <= 5); + assert!((-5..=5).contains(&v)); }, _ => panic!("Expected Int"), } diff --git a/src/sheet/eval/engine/math/rounding.rs b/crates/litchi-eval/src/engine/math/rounding.rs similarity index 91% rename from src/sheet/eval/engine/math/rounding.rs rename to crates/litchi-eval/src/engine/math/rounding.rs index 4d696dd..7071da2 100644 --- a/src/sheet/eval/engine/math/rounding.rs +++ b/crates/litchi-eval/src/engine/math/rounding.rs @@ -1,8 +1,8 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{EPS, is_even, number_result, round_away_from_zero}; -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; pub(crate) async fn eval_round( ctx: EvalCtx<'_>, @@ -677,7 +677,7 @@ pub(crate) async fn eval_iso_ceiling( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -689,7 +689,7 @@ mod tests { #[tokio::test] async fn test_eval_round_positive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.14159), num_expr(2.0)]; let result = eval_round(ctx, "Sheet1", &args).await.unwrap(); @@ -701,7 +701,7 @@ mod tests { #[tokio::test] async fn test_eval_round_negative_digits() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1234.0), num_expr(-2.0)]; let result = eval_round(ctx, "Sheet1", &args).await.unwrap(); @@ -713,7 +713,7 @@ mod tests { #[tokio::test] async fn test_eval_round_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.14)]; let result = eval_round(ctx, "Sheet1", &args).await.unwrap(); @@ -725,7 +725,7 @@ mod tests { #[tokio::test] async fn test_eval_rounddown() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.99), num_expr(0.0)]; let result = eval_rounddown(ctx, "Sheet1", &args).await.unwrap(); @@ -737,7 +737,7 @@ mod tests { #[tokio::test] async fn test_eval_rounddown_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.99), num_expr(0.0)]; let result = eval_rounddown(ctx, "Sheet1", &args).await.unwrap(); @@ -749,7 +749,7 @@ mod tests { #[tokio::test] async fn test_eval_roundup() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.01), num_expr(0.0)]; let result = eval_roundup(ctx, "Sheet1", &args).await.unwrap(); @@ -761,7 +761,7 @@ mod tests { #[tokio::test] async fn test_eval_roundup_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.01), num_expr(0.0)]; let result = eval_roundup(ctx, "Sheet1", &args).await.unwrap(); @@ -773,7 +773,7 @@ mod tests { #[tokio::test] async fn test_eval_floor() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.7), num_expr(1.0)]; let result = eval_floor(ctx, "Sheet1", &args).await.unwrap(); @@ -785,7 +785,7 @@ mod tests { #[tokio::test] async fn test_eval_floor_default_sig() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.7)]; let result = eval_floor(ctx, "Sheet1", &args).await.unwrap(); @@ -797,7 +797,7 @@ mod tests { #[tokio::test] async fn test_eval_floor_zero_sig() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.7), num_expr(0.0)]; let result = eval_floor(ctx, "Sheet1", &args).await.unwrap(); @@ -809,7 +809,7 @@ mod tests { #[tokio::test] async fn test_eval_ceiling() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2), num_expr(1.0)]; let result = eval_ceiling(ctx, "Sheet1", &args).await.unwrap(); @@ -821,7 +821,7 @@ mod tests { #[tokio::test] async fn test_eval_ceiling_default_sig() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2)]; let result = eval_ceiling(ctx, "Sheet1", &args).await.unwrap(); @@ -833,7 +833,7 @@ mod tests { #[tokio::test] async fn test_eval_ceiling_zero_sig() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2), num_expr(0.0)]; let result = eval_ceiling(ctx, "Sheet1", &args).await.unwrap(); @@ -845,7 +845,7 @@ mod tests { #[tokio::test] async fn test_eval_floor_math() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.7)]; let result = eval_floor_math(ctx, "Sheet1", &args).await.unwrap(); @@ -857,7 +857,7 @@ mod tests { #[tokio::test] async fn test_eval_floor_math_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.7)]; let result = eval_floor_math(ctx, "Sheet1", &args).await.unwrap(); @@ -869,7 +869,7 @@ mod tests { #[tokio::test] async fn test_eval_floor_math_mode() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.7), num_expr(1.0), num_expr(1.0)]; let result = eval_floor_math(ctx, "Sheet1", &args).await.unwrap(); @@ -881,7 +881,7 @@ mod tests { #[tokio::test] async fn test_eval_floor_precise() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.7)]; let result = eval_floor_precise(ctx, "Sheet1", &args).await.unwrap(); @@ -893,7 +893,7 @@ mod tests { #[tokio::test] async fn test_eval_ceiling_math() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2)]; let result = eval_ceiling_math(ctx, "Sheet1", &args).await.unwrap(); @@ -905,7 +905,7 @@ mod tests { #[tokio::test] async fn test_eval_ceiling_math_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.2)]; let result = eval_ceiling_math(ctx, "Sheet1", &args).await.unwrap(); @@ -917,7 +917,7 @@ mod tests { #[tokio::test] async fn test_eval_ceiling_precise() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2)]; let result = eval_ceiling_precise(ctx, "Sheet1", &args).await.unwrap(); @@ -929,7 +929,7 @@ mod tests { #[tokio::test] async fn test_eval_mod() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(3.0)]; let result = eval_mod(ctx, "Sheet1", &args).await.unwrap(); @@ -942,7 +942,7 @@ mod tests { #[tokio::test] async fn test_eval_mod_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-10.0), num_expr(3.0)]; let result = eval_mod(ctx, "Sheet1", &args).await.unwrap(); @@ -957,7 +957,7 @@ mod tests { #[tokio::test] async fn test_eval_mod_zero_divisor() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(0.0)]; let result = eval_mod(ctx, "Sheet1", &args).await.unwrap(); @@ -969,7 +969,7 @@ mod tests { #[tokio::test] async fn test_eval_mround() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(3.0)]; let result = eval_mround(ctx, "Sheet1", &args).await.unwrap(); @@ -983,7 +983,7 @@ mod tests { #[tokio::test] async fn test_eval_mround_different_signs() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(-3.0)]; let result = eval_mround(ctx, "Sheet1", &args).await.unwrap(); @@ -995,7 +995,7 @@ mod tests { #[tokio::test] async fn test_eval_quotient() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(3.0)]; let result = eval_quotient(ctx, "Sheet1", &args).await.unwrap(); @@ -1008,7 +1008,7 @@ mod tests { #[tokio::test] async fn test_eval_quotient_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-10.0), num_expr(3.0)]; let result = eval_quotient(ctx, "Sheet1", &args).await.unwrap(); @@ -1021,7 +1021,7 @@ mod tests { #[tokio::test] async fn test_eval_trunc() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.99)]; let result = eval_trunc(ctx, "Sheet1", &args).await.unwrap(); @@ -1034,7 +1034,7 @@ mod tests { #[tokio::test] async fn test_eval_trunc_with_digits() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.14159), num_expr(2.0)]; let result = eval_trunc(ctx, "Sheet1", &args).await.unwrap(); @@ -1046,7 +1046,7 @@ mod tests { #[tokio::test] async fn test_eval_even_positive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2)]; let result = eval_even(ctx, "Sheet1", &args).await.unwrap(); @@ -1059,7 +1059,7 @@ mod tests { #[tokio::test] async fn test_eval_even_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.2)]; let result = eval_even(ctx, "Sheet1", &args).await.unwrap(); @@ -1072,7 +1072,7 @@ mod tests { #[tokio::test] async fn test_eval_even_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_even(ctx, "Sheet1", &args).await.unwrap(); @@ -1084,7 +1084,7 @@ mod tests { #[tokio::test] async fn test_eval_odd_positive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2)]; let result = eval_odd(ctx, "Sheet1", &args).await.unwrap(); @@ -1098,7 +1098,7 @@ mod tests { #[tokio::test] async fn test_eval_odd_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-3.2)]; let result = eval_odd(ctx, "Sheet1", &args).await.unwrap(); @@ -1112,7 +1112,7 @@ mod tests { #[tokio::test] async fn test_eval_odd_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_odd(ctx, "Sheet1", &args).await.unwrap(); @@ -1124,7 +1124,7 @@ mod tests { #[tokio::test] async fn test_eval_sign_positive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(42.0)]; let result = eval_sign(ctx, "Sheet1", &args).await.unwrap(); @@ -1136,7 +1136,7 @@ mod tests { #[tokio::test] async fn test_eval_sign_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-42.0)]; let result = eval_sign(ctx, "Sheet1", &args).await.unwrap(); @@ -1148,7 +1148,7 @@ mod tests { #[tokio::test] async fn test_eval_sign_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_sign(ctx, "Sheet1", &args).await.unwrap(); @@ -1160,7 +1160,7 @@ mod tests { #[tokio::test] async fn test_eval_iso_ceiling() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(3.2)]; let result = eval_iso_ceiling(ctx, "Sheet1", &args).await.unwrap(); diff --git a/crates/litchi-eval/src/engine/math/series.rs b/crates/litchi-eval/src/engine/math/series.rs new file mode 100644 index 0000000..9020a89 --- /dev/null +++ b/crates/litchi-eval/src/engine/math/series.rs @@ -0,0 +1,639 @@ +use crate::engine::{EvalCtx, evaluate_expression, for_each_value_in_expr, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; +use rand::RngExt; + +use super::helpers::flatten_numeric_values; + +pub(crate) async fn eval_sumsq( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + let mut total = 0.0f64; + for arg in args { + for_each_value_in_expr(ctx, current_sheet, arg, |value| { + if let Some(n) = to_number(value) { + total += n * n; + } + Ok(()) + }) + .await?; + } + Ok(CellValue::Float(total)) +} + +pub(crate) async fn eval_sumx2my2( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 2 { + return Ok(CellValue::Error( + "SUMX2MY2 expects 2 arguments (array_x, array_y)".to_string(), + )); + } + let x_vals = flatten_numeric_values(ctx, current_sheet, &args[0]).await?; + let y_vals = flatten_numeric_values(ctx, current_sheet, &args[1]).await?; + if x_vals.len() != y_vals.len() { + return Ok(CellValue::Error( + "SUMX2MY2 requires arrays of the same size".to_string(), + )); + } + let total = x_vals + .iter() + .zip(y_vals.iter()) + .fold(0.0, |acc, (&x, &y)| acc + x * x - y * y); + Ok(CellValue::Float(total)) +} + +pub(crate) async fn eval_sumx2py2( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 2 { + return Ok(CellValue::Error( + "SUMX2PY2 expects 2 arguments (array_x, array_y)".to_string(), + )); + } + let x_vals = flatten_numeric_values(ctx, current_sheet, &args[0]).await?; + let y_vals = flatten_numeric_values(ctx, current_sheet, &args[1]).await?; + if x_vals.len() != y_vals.len() { + return Ok(CellValue::Error( + "SUMX2PY2 requires arrays of the same size".to_string(), + )); + } + let total = x_vals + .iter() + .zip(y_vals.iter()) + .fold(0.0, |acc, (&x, &y)| acc + x * x + y * y); + Ok(CellValue::Float(total)) +} + +pub(crate) async fn eval_sumxmy2( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 2 { + return Ok(CellValue::Error( + "SUMXMY2 expects 2 arguments (array_x, array_y)".to_string(), + )); + } + let x_vals = flatten_numeric_values(ctx, current_sheet, &args[0]).await?; + let y_vals = flatten_numeric_values(ctx, current_sheet, &args[1]).await?; + if x_vals.len() != y_vals.len() { + return Ok(CellValue::Error( + "SUMXMY2 requires arrays of the same size".to_string(), + )); + } + let total = x_vals.iter().zip(y_vals.iter()).fold(0.0, |acc, (&x, &y)| { + let diff = x - y; + acc + diff * diff + }); + Ok(CellValue::Float(total)) +} + +pub(crate) async fn eval_seriessum( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 4 { + return Ok(CellValue::Error( + "SERIESSUM expects 4 arguments (x, n, m, coefficients)".to_string(), + )); + } + + let x = match to_number(&evaluate_expression(ctx, current_sheet, &args[0]).await?) { + Some(v) => v, + None => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + let n = match to_number(&evaluate_expression(ctx, current_sheet, &args[1]).await?) { + Some(v) => v, + None => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + let m = match to_number(&evaluate_expression(ctx, current_sheet, &args[2]).await?) { + Some(v) => v, + None => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + + let mut coefficients = Vec::new(); + for_each_value_in_expr(ctx, current_sheet, &args[3], |val| { + if let Some(c) = to_number(val) { + coefficients.push(c); + } + Ok(()) + }) + .await?; + + if coefficients.is_empty() { + return Ok(CellValue::Float(0.0)); + } + + let mut total = 0.0; + for (i, &coeff) in coefficients.iter().enumerate() { + let power = n + (i as f64) * m; + total += coeff * x.powf(power); + } + + Ok(CellValue::Float(total)) +} + +pub(crate) async fn eval_sequence( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 4 { + return Ok(CellValue::Error( + "SEQUENCE expects 1 to 4 arguments (rows, [columns], [start], [step])".to_string(), + )); + } + + let rows = match args.first() { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) if n >= 0.0 => n.trunc() as usize, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1, + }; + + let cols = match args.get(1) { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) if n >= 0.0 => n.trunc() as usize, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1, + }; + + let start = match args.get(2) { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) => n, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1.0, + }; + + let step = match args.get(3) { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) => n, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1.0, + }; + + if rows == 0 || cols == 0 { + return Ok(CellValue::Error("#CALC!".to_string())); + } + + let _step = step; // Avoid unused warning for now + // Since CellValue doesn't support arrays yet, return the first value (start) + Ok(CellValue::Float(start)) +} + +pub(crate) async fn eval_vstack( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() { + return Ok(CellValue::Error( + "VSTACK expects at least 1 argument".to_string(), + )); + } + // Placeholder: return first value of first argument + evaluate_expression(ctx, current_sheet, &args[0]).await +} + +pub(crate) async fn eval_hstack( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() { + return Ok(CellValue::Error( + "HSTACK expects at least 1 argument".to_string(), + )); + } + // Placeholder: return first value of first argument + evaluate_expression(ctx, current_sheet, &args[0]).await +} + +pub(crate) async fn eval_wrapcols( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() < 2 || args.len() > 3 { + return Ok(CellValue::Error( + "WRAPCOLS expects 2 or 3 arguments".to_string(), + )); + } + // Placeholder: return first value of first argument + evaluate_expression(ctx, current_sheet, &args[0]).await +} + +pub(crate) async fn eval_wraprows( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() < 2 || args.len() > 3 { + return Ok(CellValue::Error( + "WRAPROWS expects 2 or 3 arguments".to_string(), + )); + } + // Placeholder: return first value of first argument + evaluate_expression(ctx, current_sheet, &args[0]).await +} + +pub(crate) async fn eval_randarray( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() > 5 { + return Ok(CellValue::Error( + "RANDARRAY expects 0 to 5 arguments (rows, [columns], [min], [max], [whole_number])" + .to_string(), + )); + } + + let rows = match args.first() { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) if n >= 0.0 => n.trunc() as usize, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1, + }; + + let cols = match args.get(1) { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) if n >= 0.0 => n.trunc() as usize, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1, + }; + + let min = match args.get(2) { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) => n, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 0.0, + }; + + let max = match args.get(3) { + Some(expr) => match to_number(&evaluate_expression(ctx, current_sheet, expr).await?) { + Some(n) => n, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }, + None => 1.0, + }; + + let whole_number = match args.get(4) { + Some(expr) => { + let val = evaluate_expression(ctx, current_sheet, expr).await?; + match val { + CellValue::Bool(b) => b, + _ => match to_number(&val) { + Some(n) => n != 0.0, + None => false, + }, + } + }, + None => false, + }; + + if rows == 0 || cols == 0 { + return Ok(CellValue::Error("#CALC!".to_string())); + } + + if min > max { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + + let mut rng = rand::rng(); + let val = if whole_number { + let bottom = min.ceil() as i64; + let top = max.floor() as i64; + if bottom > top { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + CellValue::Int(rng.random_range(bottom..=top)) + } else { + CellValue::Float(min + (max - min) * rng.random::()) + }; + + // Since CellValue doesn't support arrays yet, return a single random value + Ok(val) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + use crate::parser::ast::RangeRef; + + fn num_expr(n: f64) -> Expr { + if n == n.floor() { + Expr::Literal(CellValue::Int(n as i64)) + } else { + Expr::Literal(CellValue::Float(n)) + } + } + + fn range_expr(sheet: &str, start_row: u32, start_col: u32, end_row: u32, end_col: u32) -> Expr { + Expr::Range(RangeRef { + sheet: sheet.to_string(), + start_row, + start_col, + end_row, + end_col, + }) + } + + #[tokio::test] + async fn test_eval_sumsq() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // Add some values to the engine + let values = vec![ + CellValue::Int(1), + CellValue::Int(2), + CellValue::Int(3), + CellValue::Int(4), + ]; + engine.add_range("Sheet1", 1, 1, 2, 2, values); + + // SUMSQ(Sheet1!A1:B2) = 1^2 + 2^2 + 3^2 + 4^2 = 30 + let args = vec![range_expr("Sheet1", 1, 1, 2, 2)]; + let result = eval_sumsq(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 30.0).abs() < 1e-9), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_sumsq_single_values() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // SUMSQ(3, 4) = 3^2 + 4^2 = 25 + let args = vec![num_expr(3.0), num_expr(4.0)]; + let result = eval_sumsq(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 25.0).abs() < 1e-9), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_sumx2my2() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // Arrays: [1, 2, 3] and [1, 2, 3] + // Sum of (x^2 - y^2) = (1-1) + (4-4) + (9-9) = 0 + let values1 = vec![CellValue::Int(1), CellValue::Int(2), CellValue::Int(3)]; + let values2 = vec![CellValue::Int(1), CellValue::Int(2), CellValue::Int(3)]; + engine.add_range("Sheet1", 1, 1, 1, 3, values1); + engine.add_range("Sheet1", 2, 1, 1, 3, values2); + + let args = vec![ + range_expr("Sheet1", 1, 1, 1, 3), + range_expr("Sheet1", 2, 1, 2, 3), + ]; + let result = eval_sumx2my2(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!(v.abs() < 1e-9), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_sumx2py2() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // Arrays: [3, 4] and [0, 0] + // Sum of (x^2 + y^2) = (9+0) + (16+0) = 25 + let values1 = vec![CellValue::Int(3), CellValue::Int(4)]; + let values2 = vec![CellValue::Int(0), CellValue::Int(0)]; + engine.add_range("Sheet1", 1, 1, 1, 2, values1); + engine.add_range("Sheet1", 2, 1, 1, 2, values2); + + let args = vec![ + range_expr("Sheet1", 1, 1, 1, 2), + range_expr("Sheet1", 2, 1, 2, 2), + ]; + let result = eval_sumx2py2(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 25.0).abs() < 1e-9), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_sumxmy2() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // Arrays: [1, 2, 3] and [0, 0, 0] + // Sum of (x - y)^2 = 1 + 4 + 9 = 14 + let values1 = vec![CellValue::Int(1), CellValue::Int(2), CellValue::Int(3)]; + let values2 = vec![CellValue::Int(0), CellValue::Int(0), CellValue::Int(0)]; + engine.add_range("Sheet1", 1, 1, 1, 3, values1); + engine.add_range("Sheet1", 2, 1, 1, 3, values2); + + let args = vec![ + range_expr("Sheet1", 1, 1, 1, 3), + range_expr("Sheet1", 2, 1, 2, 3), + ]; + let result = eval_sumxmy2(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 14.0).abs() < 1e-9), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_sumxmy2_wrong_size() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let values1 = vec![CellValue::Int(1), CellValue::Int(2)]; + let values2 = vec![CellValue::Int(1)]; + engine.add_range("Sheet1", 1, 1, 1, 2, values1); + engine.add_range("Sheet1", 2, 1, 1, 1, values2); + + let args = vec![ + range_expr("Sheet1", 1, 1, 1, 2), + range_expr("Sheet1", 2, 1, 2, 1), + ]; + let result = eval_sumxmy2(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("same size")), + _ => panic!("Expected Error result"), + } + } + + #[tokio::test] + async fn test_eval_seriessum() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // x=2, n=0, m=1, coefficients=[1, 2, 3] + // = 1*2^0 + 2*2^1 + 3*2^2 = 1 + 4 + 12 = 17 + let coeffs = vec![CellValue::Int(1), CellValue::Int(2), CellValue::Int(3)]; + engine.add_range("Sheet1", 1, 1, 1, 3, coeffs); + + let args = vec![ + num_expr(2.0), + num_expr(0.0), + num_expr(1.0), + range_expr("Sheet1", 1, 1, 1, 3), + ]; + let result = eval_seriessum(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 17.0).abs() < 1e-9), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_seriessum_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(2.0), num_expr(0.0)]; + let result = eval_seriessum(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 4 arguments")), + _ => panic!("Expected Error result"), + } + } + + #[tokio::test] + async fn test_eval_sequence() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // SEQUENCE(3, 2) - returns first value as placeholder + let args = vec![num_expr(3.0), num_expr(2.0)]; + let result = eval_sequence(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Float(v) => assert!((v - 1.0).abs() < 1e-9), // Returns start value + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_sequence_zero_rows() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(0.0)]; + let result = eval_sequence(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("#CALC!")), + _ => panic!("Expected Error result"), + } + } + + #[tokio::test] + async fn test_eval_vstack() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_vstack(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(42)); + } + + #[tokio::test] + async fn test_eval_vstack_no_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_vstack(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects at least 1 argument")), + _ => panic!("Expected Error result"), + } + } + + #[tokio::test] + async fn test_eval_hstack() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(42.0)]; + let result = eval_hstack(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(42)); + } + + #[tokio::test] + async fn test_eval_wrapcols() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // WRAPCOLS with literal value + let args = vec![num_expr(42.0), num_expr(2.0)]; + let result = eval_wrapcols(ctx, "Sheet1", &args).await.unwrap(); + // Returns the value as placeholder + assert_eq!(result, CellValue::Int(42)); + } + + #[tokio::test] + async fn test_eval_wraprows() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // WRAPROWS with literal value + let args = vec![num_expr(42.0), num_expr(2.0)]; + let result = eval_wraprows(ctx, "Sheet1", &args).await.unwrap(); + // Returns the value as placeholder + assert_eq!(result, CellValue::Int(42)); + } + + #[tokio::test] + async fn test_eval_randarray() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![num_expr(3.0), num_expr(2.0)]; + let result = eval_randarray(ctx, "Sheet1", &args).await.unwrap(); + // Returns a single random value between 0 and 1 + match result { + CellValue::Float(v) => assert!((0.0..=1.0).contains(&v)), + _ => panic!("Expected Float result"), + } + } + + #[tokio::test] + async fn test_eval_randarray_whole_number() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // RANDARRAY(3, 2, 1, 10, TRUE) + let args = vec![ + num_expr(3.0), + num_expr(2.0), + num_expr(1.0), + num_expr(10.0), + num_expr(1.0), // TRUE + ]; + let result = eval_randarray(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Int(v) => assert!((1..=10).contains(&v)), + _ => panic!("Expected Int result"), + } + } + + #[tokio::test] + async fn test_eval_randarray_min_greater_max() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![ + num_expr(3.0), + num_expr(2.0), + num_expr(10.0), + num_expr(1.0), // min > max + ]; + let result = eval_randarray(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("#VALUE!")), + _ => panic!("Expected Error result"), + } + } +} diff --git a/src/sheet/eval/engine/math/trig.rs b/crates/litchi-eval/src/engine/math/trig.rs similarity index 87% rename from src/sheet/eval/engine/math/trig.rs rename to crates/litchi-eval/src/engine/math/trig.rs index 62edb2f..bb08a4f 100644 --- a/src/sheet/eval/engine/math/trig.rs +++ b/crates/litchi-eval/src/engine/math/trig.rs @@ -1,8 +1,8 @@ use std::f64::consts::FRAC_PI_2; -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::EPS; @@ -334,7 +334,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -346,7 +346,7 @@ mod tests { #[tokio::test] async fn test_eval_sin() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_sin(ctx, "Sheet1", &args).await.unwrap(); @@ -358,7 +358,7 @@ mod tests { #[tokio::test] async fn test_eval_sin_pi_over_2() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::FRAC_PI_2)]; let result = eval_sin(ctx, "Sheet1", &args).await.unwrap(); @@ -370,7 +370,7 @@ mod tests { #[tokio::test] async fn test_eval_cos() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_cos(ctx, "Sheet1", &args).await.unwrap(); @@ -382,7 +382,7 @@ mod tests { #[tokio::test] async fn test_eval_cos_pi() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::PI)]; let result = eval_cos(ctx, "Sheet1", &args).await.unwrap(); @@ -394,7 +394,7 @@ mod tests { #[tokio::test] async fn test_eval_tan() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_tan(ctx, "Sheet1", &args).await.unwrap(); @@ -406,7 +406,7 @@ mod tests { #[tokio::test] async fn test_eval_tan_pi_over_4() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::FRAC_PI_4)]; let result = eval_tan(ctx, "Sheet1", &args).await.unwrap(); @@ -418,7 +418,7 @@ mod tests { #[tokio::test] async fn test_eval_cot() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::FRAC_PI_4)]; let result = eval_cot(ctx, "Sheet1", &args).await.unwrap(); @@ -430,7 +430,7 @@ mod tests { #[tokio::test] async fn test_eval_cot_undefined() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_cot(ctx, "Sheet1", &args).await.unwrap(); @@ -442,7 +442,7 @@ mod tests { #[tokio::test] async fn test_eval_csc() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::FRAC_PI_2)]; let result = eval_csc(ctx, "Sheet1", &args).await.unwrap(); @@ -454,7 +454,7 @@ mod tests { #[tokio::test] async fn test_eval_csc_undefined() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_csc(ctx, "Sheet1", &args).await.unwrap(); @@ -466,7 +466,7 @@ mod tests { #[tokio::test] async fn test_eval_sec() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_sec(ctx, "Sheet1", &args).await.unwrap(); @@ -478,7 +478,7 @@ mod tests { #[tokio::test] async fn test_eval_sec_undefined() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::FRAC_PI_2)]; let result = eval_sec(ctx, "Sheet1", &args).await.unwrap(); @@ -490,7 +490,7 @@ mod tests { #[tokio::test] async fn test_eval_asin() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_asin(ctx, "Sheet1", &args).await.unwrap(); @@ -502,7 +502,7 @@ mod tests { #[tokio::test] async fn test_eval_asin_one() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_asin(ctx, "Sheet1", &args).await.unwrap(); @@ -514,7 +514,7 @@ mod tests { #[tokio::test] async fn test_eval_acos() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_acos(ctx, "Sheet1", &args).await.unwrap(); @@ -526,7 +526,7 @@ mod tests { #[tokio::test] async fn test_eval_acos_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_acos(ctx, "Sheet1", &args).await.unwrap(); @@ -538,7 +538,7 @@ mod tests { #[tokio::test] async fn test_eval_atan() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_atan(ctx, "Sheet1", &args).await.unwrap(); @@ -550,7 +550,7 @@ mod tests { #[tokio::test] async fn test_eval_atan_one() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_atan(ctx, "Sheet1", &args).await.unwrap(); @@ -562,7 +562,7 @@ mod tests { #[tokio::test] async fn test_eval_atan2() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(1.0)]; let result = eval_atan2(ctx, "Sheet1", &args).await.unwrap(); @@ -574,7 +574,7 @@ mod tests { #[tokio::test] async fn test_eval_atan2_zero_x() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(0.0)]; let result = eval_atan2(ctx, "Sheet1", &args).await.unwrap(); @@ -586,7 +586,7 @@ mod tests { #[tokio::test] async fn test_eval_sinh() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_sinh(ctx, "Sheet1", &args).await.unwrap(); @@ -598,7 +598,7 @@ mod tests { #[tokio::test] async fn test_eval_cosh() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_cosh(ctx, "Sheet1", &args).await.unwrap(); @@ -610,7 +610,7 @@ mod tests { #[tokio::test] async fn test_eval_tanh() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_tanh(ctx, "Sheet1", &args).await.unwrap(); @@ -622,7 +622,7 @@ mod tests { #[tokio::test] async fn test_eval_csch() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_csch(ctx, "Sheet1", &args).await.unwrap(); @@ -634,7 +634,7 @@ mod tests { #[tokio::test] async fn test_eval_csch_undefined() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_csch(ctx, "Sheet1", &args).await.unwrap(); @@ -646,7 +646,7 @@ mod tests { #[tokio::test] async fn test_eval_sech() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_sech(ctx, "Sheet1", &args).await.unwrap(); @@ -658,7 +658,7 @@ mod tests { #[tokio::test] async fn test_eval_coth() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_coth(ctx, "Sheet1", &args).await.unwrap(); @@ -673,7 +673,7 @@ mod tests { #[tokio::test] async fn test_eval_coth_undefined() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_coth(ctx, "Sheet1", &args).await.unwrap(); @@ -685,7 +685,7 @@ mod tests { #[tokio::test] async fn test_eval_acot() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_acot(ctx, "Sheet1", &args).await.unwrap(); @@ -697,7 +697,7 @@ mod tests { #[tokio::test] async fn test_eval_asinh() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_asinh(ctx, "Sheet1", &args).await.unwrap(); @@ -709,7 +709,7 @@ mod tests { #[tokio::test] async fn test_eval_acosh() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_acosh(ctx, "Sheet1", &args).await.unwrap(); @@ -721,7 +721,7 @@ mod tests { #[tokio::test] async fn test_eval_atanh() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.0)]; let result = eval_atanh(ctx, "Sheet1", &args).await.unwrap(); @@ -733,7 +733,7 @@ mod tests { #[tokio::test] async fn test_eval_acoth() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2.0)]; let result = eval_acoth(ctx, "Sheet1", &args).await.unwrap(); @@ -748,7 +748,7 @@ mod tests { #[tokio::test] async fn test_eval_acoth_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.5)]; let result = eval_acoth(ctx, "Sheet1", &args).await.unwrap(); @@ -760,7 +760,7 @@ mod tests { #[tokio::test] async fn test_eval_radians() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(180.0)]; let result = eval_radians(ctx, "Sheet1", &args).await.unwrap(); @@ -772,7 +772,7 @@ mod tests { #[tokio::test] async fn test_eval_degrees() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(std::f64::consts::PI)]; let result = eval_degrees(ctx, "Sheet1", &args).await.unwrap(); @@ -784,7 +784,7 @@ mod tests { #[tokio::test] async fn test_eval_sin_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![]; let result = eval_sin(ctx, "Sheet1", &args).await.unwrap(); @@ -796,7 +796,7 @@ mod tests { #[tokio::test] async fn test_eval_sin_non_numeric() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![Expr::Literal(CellValue::String("abc".to_string()))]; let result = eval_sin(ctx, "Sheet1", &args).await.unwrap(); @@ -808,7 +808,7 @@ mod tests { #[tokio::test] async fn test_eval_atan2_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_atan2(ctx, "Sheet1", &args).await.unwrap(); @@ -820,7 +820,7 @@ mod tests { #[tokio::test] async fn test_eval_atan2_non_numeric_y() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ Expr::Literal(CellValue::String("abc".to_string())), @@ -835,7 +835,7 @@ mod tests { #[tokio::test] async fn test_eval_atan2_non_numeric_x() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ num_expr(1.0), diff --git a/src/sheet/eval/engine/registry.rs b/crates/litchi-eval/src/engine/registry.rs similarity index 99% rename from src/sheet/eval/engine/registry.rs rename to crates/litchi-eval/src/engine/registry.rs index aca5db8..8b39471 100644 --- a/src/sheet/eval/engine/registry.rs +++ b/crates/litchi-eval/src/engine/registry.rs @@ -1,4 +1,4 @@ -use crate::sheet::{CellValue, Result}; +use litchi_core::sheet::{CellValue, Result}; use phf::phf_map; use super::super::parser::Expr; diff --git a/src/sheet/eval/engine/statistical/distributions.rs b/crates/litchi-eval/src/engine/statistical/distributions.rs similarity index 98% rename from src/sheet/eval/engine/statistical/distributions.rs rename to crates/litchi-eval/src/engine/statistical/distributions.rs index 01b39e0..ccd2446 100644 --- a/src/sheet/eval/engine/statistical/distributions.rs +++ b/crates/litchi-eval/src/engine/statistical/distributions.rs @@ -1,8 +1,8 @@ use super::super::{ EvalCtx, evaluate_expression, flatten_range_expr, for_each_value_in_expr, to_bool, to_number, }; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use statrs::distribution::{ Beta, Binomial, ChiSquared, Continuous, ContinuousCDF, Discrete, DiscreteCDF, Exp, FisherSnedecor, Gamma, Hypergeometric, LogNormal, NegativeBinomial, Normal, Poisson, StudentsT, @@ -1941,8 +1941,8 @@ pub(crate) async fn eval_phi( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { Expr::Literal(CellValue::Float(n)) @@ -2191,7 +2191,7 @@ mod tests { engine.set_cell("Sheet1", 1, 0, CellValue::Int(5)); engine.set_cell("Sheet1", 2, 0, CellValue::Int(8)); engine.set_cell("Sheet1", 3, 0, CellValue::Int(9)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -2232,14 +2232,14 @@ mod tests { engine.set_cell("Sheet1", 1, 1, CellValue::Float(0.2)); engine.set_cell("Sheet1", 2, 1, CellValue::Float(0.3)); engine.set_cell("Sheet1", 3, 1, CellValue::Float(0.4)); - let x_range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let x_range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, end_row: 3, end_col: 0, }); - let prob_range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let prob_range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, @@ -2879,7 +2879,7 @@ mod tests { engine.set_cell("Sheet1", 7, 0, CellValue::Int(2)); engine.set_cell("Sheet1", 8, 0, CellValue::Int(1)); engine.set_cell("Sheet1", 9, 0, CellValue::Int(9)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -2912,14 +2912,14 @@ mod tests { engine.set_cell("Sheet1", 3, 1, CellValue::Int(38)); engine.set_cell("Sheet1", 4, 1, CellValue::Int(40)); - let range1 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range1 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, end_row: 4, end_col: 0, }); - let range2 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range2 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, @@ -2950,14 +2950,14 @@ mod tests { engine.set_cell("Sheet1", 3, 0, CellValue::Float(23.65)); engine.set_cell("Sheet1", 3, 1, CellValue::Float(24.85)); - let actual = Expr::Range(crate::sheet::eval::parser::RangeRef { + let actual = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, end_row: 1, end_col: 1, }); - let expected = Expr::Range(crate::sheet::eval::parser::RangeRef { + let expected = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 2, start_col: 0, @@ -2985,14 +2985,14 @@ mod tests { engine.set_cell("Sheet1", 1, 1, CellValue::Int(19)); engine.set_cell("Sheet1", 2, 1, CellValue::Int(3)); - let range1 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range1 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, end_row: 2, end_col: 0, }); - let range2 = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range2 = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 1, @@ -3003,7 +3003,7 @@ mod tests { let args = vec![range1, range2, int_expr(2), int_expr(1)]; let result = eval_t_test(ctx, "Sheet1", &args).await.unwrap(); match result { - CellValue::Float(v) => assert!(v >= 0.0 && v <= 1.0), + CellValue::Float(v) => assert!((0.0..=1.0).contains(&v)), _ => panic!("Expected Float, got {:?}", result), } } @@ -3015,7 +3015,7 @@ mod tests { engine.set_cell("Sheet1", 0, 0, CellValue::Int(3)); engine.set_cell("Sheet1", 1, 0, CellValue::Int(4)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -3039,7 +3039,7 @@ mod tests { let args = vec![num_expr(6.0), num_expr(0.5), num_expr(0.75)]; let result = eval_binom_inv(ctx, "Sheet1", &args).await.unwrap(); match result { - CellValue::Int(v) => assert!(v >= 0 && v <= 6), + CellValue::Int(v) => assert!((0..=6).contains(&v)), _ => panic!("Expected Int, got {:?}", result), } } diff --git a/crates/litchi-eval/src/engine/statistical/helpers.rs b/crates/litchi-eval/src/engine/statistical/helpers.rs new file mode 100644 index 0000000..4d9bb4b --- /dev/null +++ b/crates/litchi-eval/src/engine/statistical/helpers.rs @@ -0,0 +1,295 @@ +use std::cmp::Ordering; +use std::result::Result as StdResult; + +use crate::engine::{EvalCtx, evaluate_expression, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +pub(super) const EPS: f64 = 1e-12; + +pub(super) async fn number_arg( + ctx: EvalCtx<'_>, + current_sheet: &str, + expr: &Expr, +) -> Result> { + let v = evaluate_expression(ctx, current_sheet, expr).await?; + Ok(to_number(&v)) +} + +pub(super) fn collect_numeric_values( + values: Vec, + func_name: &str, +) -> StdResult, CellValue> { + let mut numbers = Vec::new(); + for value in values { + match value { + CellValue::Error(msg) => return Err(CellValue::Error(msg)), + other => { + if let Some(n) = to_number(&other) { + if n.is_nan() { + return Err(CellValue::Error(format!( + "{func_name} encountered an invalid numeric value" + ))); + } + numbers.push(n); + } + }, + } + } + + if numbers.is_empty() { + return Err(CellValue::Error(format!( + "{func_name} requires at least one numeric value in the array" + ))); + } + + numbers.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); + Ok(numbers) +} + +pub(super) fn collect_numeric_values_unsorted( + values: Vec, + func_name: &str, +) -> StdResult, CellValue> { + let mut numbers = Vec::new(); + for value in values { + match value { + CellValue::Error(msg) => return Err(CellValue::Error(msg)), + other => { + if let Some(n) = to_number(&other) { + if n.is_nan() { + return Err(CellValue::Error(format!( + "{func_name} encountered an invalid numeric value" + ))); + } + numbers.push(n); + } + }, + } + } + + if numbers.is_empty() { + return Err(CellValue::Error(format!( + "{func_name} requires at least one numeric value in the reference" + ))); + } + + Ok(numbers) +} + +pub(super) fn to_positive_index( + value: f64, + func_name: &str, + arg_name: &str, +) -> StdResult { + if !value.is_finite() { + return Err(CellValue::Error(format!( + "{func_name} {arg_name} must be a finite positive integer" + ))); + } + let rounded = value.round(); + if (value - rounded).abs() > EPS { + return Err(CellValue::Error(format!( + "{func_name} {arg_name} must be an integer" + ))); + } + if rounded < 1.0 { + return Err(CellValue::Error(format!( + "{func_name} {arg_name} must be greater than or equal to 1" + ))); + } + Ok(rounded as usize) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + + fn num_expr(n: f64) -> Expr { + if n == n.floor() { + Expr::Literal(CellValue::Int(n as i64)) + } else { + Expr::Literal(CellValue::Float(n)) + } + } + + // ===== number_arg tests ===== + + #[tokio::test] + async fn test_number_arg_with_number() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let expr = num_expr(42.0); + let result = number_arg(ctx, "Sheet1", &expr).await.unwrap(); + assert_eq!(result, Some(42.0)); + } + + #[tokio::test] + async fn test_number_arg_with_string() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let expr = Expr::Literal(CellValue::String("abc".to_string())); + let result = number_arg(ctx, "Sheet1", &expr).await.unwrap(); + assert_eq!(result, None); + } + + // ===== collect_numeric_values tests ===== + + #[test] + fn test_collect_numeric_values_basic() { + let values = vec![CellValue::Int(3), CellValue::Int(1), CellValue::Int(2)]; + let result = collect_numeric_values(values, "MEDIAN").unwrap(); + assert_eq!(result, vec![1.0, 2.0, 3.0]); // sorted + } + + #[test] + fn test_collect_numeric_values_with_floats() { + let values = vec![ + CellValue::Float(3.5), + CellValue::Int(1), + CellValue::Float(2.5), + ]; + let result = collect_numeric_values(values, "AVERAGE").unwrap(); + assert_eq!(result, vec![1.0, 2.5, 3.5]); + } + + #[test] + fn test_collect_numeric_values_ignores_non_numeric() { + let values = vec![ + CellValue::Int(3), + CellValue::String("abc".to_string()), + CellValue::Int(1), + ]; + let result = collect_numeric_values(values, "COUNT").unwrap(); + assert_eq!(result, vec![1.0, 3.0]); + } + + #[test] + fn test_collect_numeric_values_empty_result() { + let values = vec![CellValue::String("abc".to_string()), CellValue::Bool(true)]; + let result = collect_numeric_values(values, "SUM"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("requires at least one numeric value")), + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_collect_numeric_values_with_error() { + let values = vec![ + CellValue::Int(1), + CellValue::Error("Some error".to_string()), + CellValue::Int(2), + ]; + let result = collect_numeric_values(values, "AVERAGE"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert_eq!(e, "Some error"), + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_collect_numeric_values_with_nan() { + let values = vec![ + CellValue::Int(1), + CellValue::Float(f64::NAN), + CellValue::Int(2), + ]; + let result = collect_numeric_values(values, "AVERAGE"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("invalid numeric value")), + _ => panic!("Expected Error"), + } + } + + // ===== collect_numeric_values_unsorted tests ===== + + #[test] + fn test_collect_numeric_values_unsorted() { + let values = vec![CellValue::Int(3), CellValue::Int(1), CellValue::Int(2)]; + let result = collect_numeric_values_unsorted(values, "MEDIAN").unwrap(); + assert_eq!(result, vec![3.0, 1.0, 2.0]); // unsorted + } + + #[test] + fn test_collect_numeric_values_unsorted_empty() { + let values: Vec = vec![]; + let result = collect_numeric_values_unsorted(values, "SUM"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("requires at least one numeric value")), + _ => panic!("Expected Error"), + } + } + + // ===== to_positive_index tests ===== + + #[test] + fn test_to_positive_index_valid() { + assert_eq!(to_positive_index(1.0, "LARGE", "k").unwrap(), 1); + assert_eq!(to_positive_index(5.0, "LARGE", "k").unwrap(), 5); + // Values within EPS of an integer are accepted + assert_eq!(to_positive_index(5.0000000000001, "LARGE", "k").unwrap(), 5); + } + + #[test] + fn test_to_positive_index_zero() { + let result = to_positive_index(0.0, "LARGE", "k"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("must be greater than or equal to 1")), + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_to_positive_index_negative() { + let result = to_positive_index(-1.0, "LARGE", "k"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("must be greater than or equal to 1")), + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_to_positive_index_non_integer() { + let result = to_positive_index(1.5, "LARGE", "k"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("must be an integer")), + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_to_positive_index_infinite() { + let result = to_positive_index(f64::INFINITY, "LARGE", "k"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("must be a finite positive integer")), + _ => panic!("Expected Error"), + } + } + + #[test] + fn test_to_positive_index_nan() { + let result = to_positive_index(f64::NAN, "LARGE", "k"); + assert!(result.is_err()); + match result.unwrap_err() { + CellValue::Error(e) => assert!(e.contains("must be a finite positive integer")), + _ => panic!("Expected Error"), + } + } + + // ===== EPS constant ===== + + #[test] + fn test_eps_value() { + assert_eq!(EPS, 1e-12); + } +} diff --git a/src/sheet/eval/engine/statistical/mod.rs b/crates/litchi-eval/src/engine/statistical/mod.rs similarity index 100% rename from src/sheet/eval/engine/statistical/mod.rs rename to crates/litchi-eval/src/engine/statistical/mod.rs diff --git a/src/sheet/eval/engine/statistical/ranking.rs b/crates/litchi-eval/src/engine/statistical/ranking.rs similarity index 97% rename from src/sheet/eval/engine/statistical/ranking.rs rename to crates/litchi-eval/src/engine/statistical/ranking.rs index 067513a..66181f2 100644 --- a/src/sheet/eval/engine/statistical/ranking.rs +++ b/crates/litchi-eval/src/engine/statistical/ranking.rs @@ -1,9 +1,9 @@ use std::cmp::Ordering; use std::result::Result as StdResult; -use crate::sheet::eval::engine::{EvalCtx, flatten_range_expr}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, flatten_range_expr}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{ EPS, collect_numeric_values, collect_numeric_values_unsorted, number_arg, to_positive_index, @@ -752,8 +752,8 @@ fn round_to_significance(value: f64, significance: u32) -> f64 { #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -779,7 +779,7 @@ mod tests { engine.set_cell("Sheet1", 0, 6, CellValue::Int(2)); engine.set_cell("Sheet1", 0, 7, CellValue::Int(6)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -810,7 +810,7 @@ mod tests { engine.set_cell("Sheet1", 0, 6, CellValue::Int(2)); engine.set_cell("Sheet1", 0, 7, CellValue::Int(6)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -839,7 +839,7 @@ mod tests { engine.set_cell("Sheet1", 0, 5, CellValue::Int(6)); engine.set_cell("Sheet1", 0, 6, CellValue::Int(7)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -867,7 +867,7 @@ mod tests { engine.set_cell("Sheet1", 0, 5, CellValue::Int(2)); engine.set_cell("Sheet1", 0, 6, CellValue::Int(1)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -894,7 +894,7 @@ mod tests { engine.set_cell("Sheet1", 0, 3, CellValue::Int(4)); engine.set_cell("Sheet1", 0, 4, CellValue::Int(5)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -920,7 +920,7 @@ mod tests { engine.set_cell("Sheet1", 0, 3, CellValue::Int(4)); engine.set_cell("Sheet1", 0, 4, CellValue::Int(5)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -949,7 +949,7 @@ mod tests { engine.set_cell("Sheet1", 0, 6, CellValue::Int(7)); engine.set_cell("Sheet1", 0, 7, CellValue::Int(8)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -975,7 +975,7 @@ mod tests { engine.set_cell("Sheet1", 0, 3, CellValue::Int(4)); engine.set_cell("Sheet1", 0, 4, CellValue::Int(5)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, diff --git a/src/sheet/eval/engine/statistical/simple.rs b/crates/litchi-eval/src/engine/statistical/simple.rs similarity index 92% rename from src/sheet/eval/engine/statistical/simple.rs rename to crates/litchi-eval/src/engine/statistical/simple.rs index f02997e..79bc5da 100644 --- a/src/sheet/eval/engine/statistical/simple.rs +++ b/crates/litchi-eval/src/engine/statistical/simple.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, for_each_value_in_expr, to_number}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, for_each_value_in_expr, to_number}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use std::cmp::Ordering; pub(crate) async fn eval_median( @@ -770,7 +770,7 @@ async fn collect_aligned_numeric_pairs( arg1: &Expr, arg2: &Expr, ) -> Result<(Vec, Vec)> { - use crate::sheet::eval::engine::flatten_range_expr; + use crate::engine::flatten_range_expr; let range1 = flatten_range_expr(ctx, current_sheet, arg1).await?; let range2 = flatten_range_expr(ctx, current_sheet, arg2).await?; @@ -799,7 +799,7 @@ async fn collect_aligned_numeric_pairs( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { @@ -811,7 +811,7 @@ mod tests { #[tokio::test] async fn test_eval_median_odd() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(3.0), num_expr(5.0)]; let result = eval_median(ctx, "Sheet1", &args).await.unwrap(); @@ -823,7 +823,7 @@ mod tests { #[tokio::test] async fn test_eval_median_even() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(3.0), num_expr(5.0), num_expr(7.0)]; let result = eval_median(ctx, "Sheet1", &args).await.unwrap(); @@ -835,7 +835,7 @@ mod tests { #[tokio::test] async fn test_eval_median_empty() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_median(ctx, "Sheet1", &args).await.unwrap(); @@ -847,7 +847,7 @@ mod tests { #[tokio::test] async fn test_eval_mode_sngl() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(2.0), num_expr(3.0)]; let result = eval_mode_sngl(ctx, "Sheet1", &args).await.unwrap(); @@ -859,7 +859,7 @@ mod tests { #[tokio::test] async fn test_eval_mode_sngl_no_duplicates() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0)]; let result = eval_mode_sngl(ctx, "Sheet1", &args).await.unwrap(); @@ -871,7 +871,7 @@ mod tests { #[tokio::test] async fn test_eval_stdev_s() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0), num_expr(4.0)]; let result = eval_stdev_s(ctx, "Sheet1", &args).await.unwrap(); @@ -883,7 +883,7 @@ mod tests { #[tokio::test] async fn test_eval_stdev_p() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0), num_expr(4.0)]; let result = eval_stdev_p(ctx, "Sheet1", &args).await.unwrap(); @@ -895,7 +895,7 @@ mod tests { #[tokio::test] async fn test_eval_var_s() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0), num_expr(4.0)]; let result = eval_var_s(ctx, "Sheet1", &args).await.unwrap(); @@ -907,7 +907,7 @@ mod tests { #[tokio::test] async fn test_eval_var_p() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0), num_expr(4.0)]; let result = eval_var_p(ctx, "Sheet1", &args).await.unwrap(); @@ -919,7 +919,7 @@ mod tests { #[tokio::test] async fn test_eval_geomean() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2.0), num_expr(8.0)]; let result = eval_geomean(ctx, "Sheet1", &args).await.unwrap(); @@ -931,7 +931,7 @@ mod tests { #[tokio::test] async fn test_eval_geomean_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(-1.0), num_expr(2.0)]; let result = eval_geomean(ctx, "Sheet1", &args).await.unwrap(); @@ -943,7 +943,7 @@ mod tests { #[tokio::test] async fn test_eval_harmean() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(2.0), num_expr(4.0), num_expr(8.0)]; let result = eval_harmean(ctx, "Sheet1", &args).await.unwrap(); @@ -955,7 +955,7 @@ mod tests { #[tokio::test] async fn test_eval_fisher() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.5)]; let result = eval_fisher(ctx, "Sheet1", &args).await.unwrap(); @@ -967,7 +967,7 @@ mod tests { #[tokio::test] async fn test_eval_fisher_out_of_range() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_fisher(ctx, "Sheet1", &args).await.unwrap(); @@ -979,7 +979,7 @@ mod tests { #[tokio::test] async fn test_eval_fisherinv() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(0.5)]; let result = eval_fisherinv(ctx, "Sheet1", &args).await.unwrap(); @@ -991,7 +991,7 @@ mod tests { #[tokio::test] async fn test_eval_standardize() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(5.0), num_expr(2.0)]; let result = eval_standardize(ctx, "Sheet1", &args).await.unwrap(); @@ -1003,7 +1003,7 @@ mod tests { #[tokio::test] async fn test_eval_standardize_zero_stdev() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(10.0), num_expr(5.0), num_expr(0.0)]; let result = eval_standardize(ctx, "Sheet1", &args).await.unwrap(); @@ -1013,7 +1013,7 @@ mod tests { } } - use crate::sheet::eval::parser::RangeRef; + use crate::parser::RangeRef; fn range_expr(start_row: u32, start_col: u32, end_row: u32, end_col: u32) -> Expr { Expr::Range(RangeRef { @@ -1027,7 +1027,7 @@ mod tests { #[tokio::test] async fn test_eval_trimmean() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Data: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 20% trim for i in 0..10 { @@ -1043,7 +1043,7 @@ mod tests { #[tokio::test] async fn test_eval_trimmean_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_trimmean(ctx, "Sheet1", &args).await.unwrap(); @@ -1055,7 +1055,7 @@ mod tests { #[tokio::test] async fn test_eval_skew() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Skewness of symmetric distribution should be near 0 for i in 0..5 { @@ -1071,7 +1071,7 @@ mod tests { #[tokio::test] async fn test_eval_skew_too_few() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); engine.set_cell("", 0, 0, CellValue::Int(1)); engine.set_cell("", 0, 1, CellValue::Int(2)); @@ -1085,7 +1085,7 @@ mod tests { #[tokio::test] async fn test_eval_skew_p() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); for i in 0..5 { engine.set_cell("", 0, i, CellValue::Int((i + 1) as i64)); @@ -1100,7 +1100,7 @@ mod tests { #[tokio::test] async fn test_eval_kurt() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Kurtosis of normal-like data for i in 0..8 { @@ -1120,7 +1120,7 @@ mod tests { #[tokio::test] async fn test_eval_kurt_too_few() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); engine.set_cell("", 0, 0, CellValue::Int(1)); engine.set_cell("", 0, 1, CellValue::Int(2)); @@ -1135,7 +1135,7 @@ mod tests { #[tokio::test] async fn test_eval_correl() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Perfect positive correlation: y = x for i in 0..5 { @@ -1152,7 +1152,7 @@ mod tests { #[tokio::test] async fn test_eval_correl_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0)]; let result = eval_correl(ctx, "Sheet1", &args).await.unwrap(); @@ -1164,7 +1164,7 @@ mod tests { #[tokio::test] async fn test_eval_pearson() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // PEARSON is same as CORREL for i in 0..5 { @@ -1181,7 +1181,7 @@ mod tests { #[tokio::test] async fn test_eval_rsq() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // R-squared of perfect correlation is 1 for i in 0..5 { @@ -1198,7 +1198,7 @@ mod tests { #[tokio::test] async fn test_eval_slope() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // y = 2x, slope should be 2 for i in 0..5 { @@ -1215,7 +1215,7 @@ mod tests { #[tokio::test] async fn test_eval_intercept() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // y = 2x + 1, intercept should be 1 for i in 0..5 { @@ -1232,7 +1232,7 @@ mod tests { #[tokio::test] async fn test_eval_steyx() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Standard error for y = 2x + 1 (perfect fit, error should be 0) for i in 0..5 { @@ -1249,7 +1249,7 @@ mod tests { #[tokio::test] async fn test_eval_covar_p() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Perfect linear relationship: y = 2x for i in 0..5 { @@ -1266,7 +1266,7 @@ mod tests { #[tokio::test] async fn test_eval_covar_s() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Perfect linear relationship: y = 2x for i in 0..5 { @@ -1283,7 +1283,7 @@ mod tests { #[tokio::test] async fn test_eval_stdev_a() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0), num_expr(4.0)]; let result = eval_stdev_a(ctx, "Sheet1", &args).await.unwrap(); @@ -1297,7 +1297,7 @@ mod tests { #[tokio::test] async fn test_eval_var_a() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(1.0), num_expr(2.0), num_expr(3.0), num_expr(4.0)]; let result = eval_var_a(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/text/basic.rs b/crates/litchi-eval/src/engine/text/basic.rs similarity index 90% rename from src/sheet/eval/engine/text/basic.rs rename to crates/litchi-eval/src/engine/text/basic.rs index 7874353..da826eb 100644 --- a/src/sheet/eval/engine/text/basic.rs +++ b/crates/litchi-eval/src/engine/text/basic.rs @@ -1,8 +1,8 @@ -use crate::sheet::eval::engine::{ +use crate::engine::{ EvalCtx, evaluate_expression, for_each_value_in_expr, is_blank, to_bool, to_text, }; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{to_non_negative_int, to_positive_int}; @@ -433,7 +433,7 @@ pub(crate) async fn eval_substitute( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn str_expr(s: &str) -> Expr { Expr::Literal(CellValue::String(s.to_string())) @@ -449,7 +449,7 @@ mod tests { #[tokio::test] async fn test_eval_len() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello")]; let result = eval_len(ctx, "Sheet1", &args).await.unwrap(); @@ -461,7 +461,7 @@ mod tests { #[tokio::test] async fn test_eval_len_unicode() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello 世界")]; let result = eval_len(ctx, "Sheet1", &args).await.unwrap(); @@ -473,7 +473,7 @@ mod tests { #[tokio::test] async fn test_eval_len_wrong_args() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args: Vec = vec![]; let result = eval_len(ctx, "Sheet1", &args).await.unwrap(); @@ -485,7 +485,7 @@ mod tests { #[tokio::test] async fn test_eval_lower() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("HELLO WORLD")]; let result = eval_lower(ctx, "Sheet1", &args).await.unwrap(); @@ -497,7 +497,7 @@ mod tests { #[tokio::test] async fn test_eval_upper() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello world")]; let result = eval_upper(ctx, "Sheet1", &args).await.unwrap(); @@ -509,7 +509,7 @@ mod tests { #[tokio::test] async fn test_eval_proper() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello world")]; let result = eval_proper(ctx, "Sheet1", &args).await.unwrap(); @@ -521,7 +521,7 @@ mod tests { #[tokio::test] async fn test_eval_proper_multiple_spaces() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello world")]; let result = eval_proper(ctx, "Sheet1", &args).await.unwrap(); @@ -533,7 +533,7 @@ mod tests { #[tokio::test] async fn test_eval_trim() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr(" hello world ")]; let result = eval_trim(ctx, "Sheet1", &args).await.unwrap(); @@ -545,7 +545,7 @@ mod tests { #[tokio::test] async fn test_eval_rept() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("ab"), num_expr(3.0)]; let result = eval_rept(ctx, "Sheet1", &args).await.unwrap(); @@ -557,7 +557,7 @@ mod tests { #[tokio::test] async fn test_eval_rept_zero() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("ab"), num_expr(0.0)]; let result = eval_rept(ctx, "Sheet1", &args).await.unwrap(); @@ -569,7 +569,7 @@ mod tests { #[tokio::test] async fn test_eval_rept_negative() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("ab"), num_expr(-1.0)]; let result = eval_rept(ctx, "Sheet1", &args).await.unwrap(); @@ -581,7 +581,7 @@ mod tests { #[tokio::test] async fn test_eval_exact_same() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), str_expr("hello")]; let result = eval_exact(ctx, "Sheet1", &args).await.unwrap(); @@ -593,7 +593,7 @@ mod tests { #[tokio::test] async fn test_eval_exact_different() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), str_expr("world")]; let result = eval_exact(ctx, "Sheet1", &args).await.unwrap(); @@ -605,7 +605,7 @@ mod tests { #[tokio::test] async fn test_eval_exact_case_sensitive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("Hello"), str_expr("hello")]; let result = eval_exact(ctx, "Sheet1", &args).await.unwrap(); @@ -617,7 +617,7 @@ mod tests { #[tokio::test] async fn test_eval_substitute_all() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello world"), str_expr("o"), str_expr("0")]; let result = eval_substitute(ctx, "Sheet1", &args).await.unwrap(); @@ -629,7 +629,7 @@ mod tests { #[tokio::test] async fn test_eval_substitute_instance() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ str_expr("hello world"), @@ -646,7 +646,7 @@ mod tests { #[tokio::test] async fn test_eval_substitute_empty_old() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), str_expr(""), str_expr("x")]; let result = eval_substitute(ctx, "Sheet1", &args).await.unwrap(); @@ -658,7 +658,7 @@ mod tests { #[tokio::test] async fn test_eval_concat() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), str_expr(" "), str_expr("world")]; let result = eval_concat(ctx, "Sheet1", &args).await.unwrap(); @@ -670,7 +670,7 @@ mod tests { #[tokio::test] async fn test_eval_textjoin() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr(","), bool_expr(true), str_expr("a"), str_expr("b")]; let result = eval_textjoin(ctx, "Sheet1", &args).await.unwrap(); @@ -686,7 +686,7 @@ mod tests { #[tokio::test] async fn test_eval_textjoin_ignore_empty() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); // Test with ignore_empty = true and an empty string let args = vec![ @@ -705,7 +705,7 @@ mod tests { #[tokio::test] async fn test_eval_numbervalue_default() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("123.45")]; let result = eval_numbervalue(ctx, "Sheet1", &args).await.unwrap(); @@ -718,7 +718,7 @@ mod tests { #[tokio::test] async fn test_eval_numbervalue_with_group() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("1,234.56"), str_expr("."), str_expr(",")]; let result = eval_numbervalue(ctx, "Sheet1", &args).await.unwrap(); @@ -730,7 +730,7 @@ mod tests { #[tokio::test] async fn test_eval_numbervalue_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("abc")]; let result = eval_numbervalue(ctx, "Sheet1", &args).await.unwrap(); @@ -742,7 +742,7 @@ mod tests { #[tokio::test] async fn test_eval_asc() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("ABC")]; // Fullwidth characters let result = eval_asc(ctx, "Sheet1", &args).await.unwrap(); @@ -754,7 +754,7 @@ mod tests { #[tokio::test] async fn test_eval_jis() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("ABC")]; let result = eval_jis(ctx, "Sheet1", &args).await.unwrap(); @@ -766,7 +766,7 @@ mod tests { #[tokio::test] async fn test_eval_lenb() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello")]; let result = eval_lenb(ctx, "Sheet1", &args).await.unwrap(); @@ -778,7 +778,7 @@ mod tests { #[tokio::test] async fn test_eval_phonetic() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello")]; let result = eval_phonetic(ctx, "Sheet1", &args).await.unwrap(); @@ -790,7 +790,7 @@ mod tests { #[tokio::test] async fn test_eval_phonetic_non_string() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(123.0)]; let result = eval_phonetic(ctx, "Sheet1", &args).await.unwrap(); diff --git a/src/sheet/eval/engine/text/excel_formatter.rs b/crates/litchi-eval/src/engine/text/excel_formatter.rs similarity index 99% rename from src/sheet/eval/engine/text/excel_formatter.rs rename to crates/litchi-eval/src/engine/text/excel_formatter.rs index 49d9049..bfccf3a 100644 --- a/src/sheet/eval/engine/text/excel_formatter.rs +++ b/crates/litchi-eval/src/engine/text/excel_formatter.rs @@ -82,7 +82,7 @@ impl ExcelDateTime { let year = 1600 + year_days_400 * 400 + year_days_100 * 100 + year_days_4 * 4 + year_days_1; days += 1; - if year.is_multiple_of(4) && (!year.is_multiple_of(100) || year.is_multiple_of(400)) { + if year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) { months[1] = 29; } diff --git a/src/sheet/eval/engine/text/formatting.rs b/crates/litchi-eval/src/engine/text/formatting.rs similarity index 98% rename from src/sheet/eval/engine/text/formatting.rs rename to crates/litchi-eval/src/engine/text/formatting.rs index 4c0c637..0683ced 100644 --- a/src/sheet/eval/engine/text/formatting.rs +++ b/crates/litchi-eval/src/engine/text/formatting.rs @@ -1,10 +1,10 @@ use super::excel_formatter::{ CellFormat, FormattedData, detect_custom_number_format, format_excel_f64, }; -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_bool, to_number, to_text}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_bool, to_number, to_text}; +use crate::parser::Expr; use chrono::Datelike; +use litchi_core::sheet::{CellValue, Result}; const MAX_FIXED_DECIMALS: i32 = 30; @@ -507,7 +507,7 @@ fn insert_commas(digits: &str) -> String { let mut result = String::with_capacity(digits.len() + digits.len() / 3); let chars: Vec = digits.chars().collect(); for (idx, ch) in chars.iter().enumerate() { - if idx > 0 && (chars.len() - idx).is_multiple_of(3) { + if idx > 0 && (chars.len() - idx) % 3 == 0 { result.push(','); } result.push(*ch); @@ -518,8 +518,8 @@ fn insert_commas(digits: &str) -> String { #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn num_expr(n: f64) -> Expr { if n == n.floor() { diff --git a/crates/litchi-eval/src/engine/text/helpers.rs b/crates/litchi-eval/src/engine/text/helpers.rs new file mode 100644 index 0000000..7e90699 --- /dev/null +++ b/crates/litchi-eval/src/engine/text/helpers.rs @@ -0,0 +1,569 @@ +use crate::engine::to_number; +use litchi_core::sheet::CellValue; + +pub(crate) fn to_non_negative_int(value: &CellValue) -> Option { + to_number(value).and_then(|n| { + if n >= 0.0 { + Some(n.trunc() as usize) + } else { + None + } + }) +} + +pub(crate) fn to_positive_int(value: &CellValue) -> Option { + to_number(value).and_then(|n| { + if n > 0.0 { + Some(n.trunc() as usize) + } else { + None + } + }) +} + +pub(crate) fn take_left(s: &str, count: usize) -> String { + s.chars().take(count).collect() +} + +pub(crate) fn take_right(s: &str, count: usize) -> String { + let chars: Vec = s.chars().collect(); + let len = chars.len(); + let start = len.saturating_sub(count); + chars[start..].iter().collect() +} + +pub(crate) fn take_mid(s: &str, start_num: usize, count: usize) -> String { + if count == 0 { + return String::new(); + } + let chars: Vec = s.chars().collect(); + if start_num == 0 || start_num > chars.len() { + return String::new(); + } + let start_idx = start_num - 1; + let end_idx = (start_idx + count).min(chars.len()); + chars[start_idx..end_idx].iter().collect() +} + +fn char_byte_width(ch: char) -> usize { + if ch as u32 <= 0xFF { 1 } else { 2 } +} + +pub(crate) fn dbcs_byte_len(s: &str) -> usize { + s.chars().map(char_byte_width).sum() +} + +fn slice_by_bytes(s: &str, start_byte: usize, byte_count: usize) -> String { + if byte_count == 0 || start_byte == 0 { + return String::new(); + } + let mut byte_pos = 0; + let mut start_idx: Option = None; + let mut taken = 0; + let mut end_idx = s.len(); + + for (idx, ch) in s.char_indices() { + let width = char_byte_width(ch); + let next_pos = byte_pos + width; + + if start_idx.is_none() && next_pos >= start_byte { + start_idx = Some(idx); + } + + if start_idx.is_some() { + if taken + width > byte_count { + end_idx = idx; + break; + } + taken += width; + } + + byte_pos = next_pos; + } + + if start_idx.is_none() || taken == 0 { + return String::new(); + } + let start_idx = start_idx.unwrap(); + s[start_idx..end_idx].to_string() +} + +pub(crate) fn take_left_bytes(s: &str, byte_count: usize) -> String { + if byte_count == 0 { + return String::new(); + } + slice_by_bytes(s, 1, byte_count) +} + +pub(crate) fn take_right_bytes(s: &str, byte_count: usize) -> String { + if byte_count == 0 { + return String::new(); + } + let total = dbcs_byte_len(s); + if byte_count >= total { + return s.to_string(); + } + let start_byte = total - byte_count + 1; + slice_by_bytes(s, start_byte, byte_count) +} + +pub(crate) fn take_mid_bytes(s: &str, start_byte: usize, byte_count: usize) -> String { + if byte_count == 0 { + return String::new(); + } + slice_by_bytes(s, start_byte, byte_count) +} + +pub(crate) fn dbcs_byte_prefixes(s: &str) -> Vec { + let mut prefixes = Vec::with_capacity(s.chars().count() + 1); + let mut total = 0; + prefixes.push(0); + for ch in s.chars() { + total += char_byte_width(ch); + prefixes.push(total); + } + prefixes +} + +pub(crate) fn char_index_from_dbcs_byte(prefixes: &[usize], start_byte: usize) -> Option { + if start_byte == 0 { + return None; + } + let total = *prefixes.last().unwrap_or(&0); + if start_byte > total { + return None; + } + let target = start_byte - 1; + (0..prefixes.len().saturating_sub(1)).find(|&i| prefixes[i + 1] > target) +} + +pub(crate) fn replace_chars_segment( + s: &str, + start_num: usize, + num_chars: usize, + replacement: &str, +) -> Option { + if start_num == 0 { + return None; + } + let chars: Vec = s.chars().collect(); + if start_num > chars.len() { + return None; + } + let start_idx = start_num - 1; + let end_idx = (start_idx + num_chars).min(chars.len()); + let mut out = String::new(); + for ch in &chars[..start_idx] { + out.push(*ch); + } + out.push_str(replacement); + for ch in &chars[end_idx..] { + out.push(*ch); + } + Some(out) +} + +pub(crate) fn replace_bytes_segment( + s: &str, + start_byte: usize, + num_bytes: usize, + replacement: &str, +) -> Option { + if start_byte == 0 { + return None; + } + let chars: Vec = s.chars().collect(); + let widths: Vec = chars.iter().map(|&ch| char_byte_width(ch)).collect(); + let total_bytes: usize = widths.iter().sum(); + if start_byte > total_bytes { + return None; + } + if chars.is_empty() { + return Some(replacement.to_string()); + } + + let mut bytes_seen = 0; + let mut start_idx = 0; + for (idx, width) in widths.iter().enumerate() { + let next = bytes_seen + width; + if start_byte <= next { + start_idx = idx; + break; + } + bytes_seen = next; + } + + let mut end_idx = start_idx; + let mut removed = 0; + if num_bytes == 0 { + end_idx = start_idx; + } else { + while end_idx < chars.len() && removed < num_bytes { + removed += widths[end_idx]; + end_idx += 1; + } + } + + let mut out = String::new(); + for ch in &chars[..start_idx] { + out.push(*ch); + } + out.push_str(replacement); + for ch in &chars[end_idx..] { + out.push(*ch); + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::*; + + // ===== to_non_negative_int tests ===== + + #[test] + fn test_to_non_negative_int_with_positive() { + assert_eq!(to_non_negative_int(&CellValue::Int(5)), Some(5)); + assert_eq!(to_non_negative_int(&CellValue::Float(5.7)), Some(5)); + } + + #[test] + fn test_to_non_negative_int_with_zero() { + assert_eq!(to_non_negative_int(&CellValue::Int(0)), Some(0)); + assert_eq!(to_non_negative_int(&CellValue::Float(0.0)), Some(0)); + } + + #[test] + fn test_to_non_negative_int_with_negative() { + assert_eq!(to_non_negative_int(&CellValue::Int(-5)), None); + assert_eq!(to_non_negative_int(&CellValue::Float(-1.5)), None); + } + + #[test] + fn test_to_non_negative_int_with_non_numeric() { + assert_eq!( + to_non_negative_int(&CellValue::String("abc".to_string())), + None + ); + assert_eq!(to_non_negative_int(&CellValue::Bool(true)), None); + } + + // ===== to_positive_int tests ===== + + #[test] + fn test_to_positive_int_with_positive() { + assert_eq!(to_positive_int(&CellValue::Int(5)), Some(5)); + assert_eq!(to_positive_int(&CellValue::Float(5.7)), Some(5)); + } + + #[test] + fn test_to_positive_int_with_zero() { + assert_eq!(to_positive_int(&CellValue::Int(0)), None); + assert_eq!(to_positive_int(&CellValue::Float(0.0)), None); + } + + #[test] + fn test_to_positive_int_with_negative() { + assert_eq!(to_positive_int(&CellValue::Int(-5)), None); + assert_eq!(to_positive_int(&CellValue::Float(-1.5)), None); + } + + // ===== take_left tests ===== + + #[test] + fn test_take_left_basic() { + assert_eq!(take_left("Hello", 2), "He"); + assert_eq!(take_left("Hello", 5), "Hello"); + } + + #[test] + fn test_take_left_zero() { + assert_eq!(take_left("Hello", 0), ""); + } + + #[test] + fn test_take_left_more_than_length() { + assert_eq!(take_left("Hello", 10), "Hello"); + } + + #[test] + fn test_take_left_unicode() { + // take_left takes characters, not bytes + assert_eq!(take_left("Hello 世界", 8), "Hello 世界"); + assert_eq!(take_left("Hello 世界", 7), "Hello 世"); + } + + // ===== take_right tests ===== + + #[test] + fn test_take_right_basic() { + assert_eq!(take_right("Hello", 2), "lo"); + assert_eq!(take_right("Hello", 5), "Hello"); + } + + #[test] + fn test_take_right_zero() { + assert_eq!(take_right("Hello", 0), ""); + } + + #[test] + fn test_take_right_more_than_length() { + assert_eq!(take_right("Hello", 10), "Hello"); + } + + #[test] + fn test_take_right_unicode() { + assert_eq!(take_right("Hello 世界", 3), " 世界"); + } + + // ===== take_mid tests ===== + + #[test] + fn test_take_mid_basic() { + assert_eq!(take_mid("Hello World", 7, 5), "World"); + assert_eq!(take_mid("Hello", 2, 2), "el"); + } + + #[test] + fn test_take_mid_zero_count() { + assert_eq!(take_mid("Hello", 1, 0), ""); + } + + #[test] + fn test_take_mid_start_zero() { + assert_eq!(take_mid("Hello", 0, 3), ""); + } + + #[test] + fn test_take_mid_start_beyond_length() { + assert_eq!(take_mid("Hello", 10, 3), ""); + } + + #[test] + fn test_take_mid_partial() { + assert_eq!(take_mid("Hello", 4, 5), "lo"); + } + + #[test] + fn test_take_mid_unicode() { + assert_eq!(take_mid("Hello 世界", 7, 2), "世界"); + } + + // ===== dbcs_byte_len tests ===== + + #[test] + fn test_dbcs_byte_len_ascii() { + assert_eq!(dbcs_byte_len("Hello"), 5); + } + + #[test] + fn test_dbcs_byte_len_unicode() { + // Characters > 0xFF count as 2 bytes + assert_eq!(dbcs_byte_len("世界"), 4); + // "Hello 世界" has: H(1)+e(1)+l(1)+l(1)+o(1)+ (1)+世(2)+界(2) = 10 bytes + assert_eq!(dbcs_byte_len("Hello 世界"), 10); + } + + #[test] + fn test_dbcs_byte_len_empty() { + assert_eq!(dbcs_byte_len(""), 0); + } + + // ===== take_left_bytes tests ===== + + #[test] + fn test_take_left_bytes_basic() { + assert_eq!(take_left_bytes("Hello", 3), "Hel"); + } + + #[test] + fn test_take_left_bytes_zero() { + assert_eq!(take_left_bytes("Hello", 0), ""); + } + + #[test] + fn test_take_left_bytes_more_than_length() { + assert_eq!(take_left_bytes("Hello", 10), "Hello"); + } + + #[test] + fn test_take_left_bytes_unicode() { + // "世" is 2 bytes, "界" is 2 bytes + assert_eq!(take_left_bytes("世界", 2), "世"); + assert_eq!(take_left_bytes("世界", 4), "世界"); + assert_eq!(take_left_bytes("Hello 世界", 8), "Hello 世"); + } + + // ===== take_right_bytes tests ===== + + #[test] + fn test_take_right_bytes_basic() { + assert_eq!(take_right_bytes("Hello", 3), "llo"); + } + + #[test] + fn test_take_right_bytes_zero() { + assert_eq!(take_right_bytes("Hello", 0), ""); + } + + #[test] + fn test_take_right_bytes_more_than_length() { + assert_eq!(take_right_bytes("Hello", 10), "Hello"); + } + + #[test] + fn test_take_right_bytes_unicode() { + assert_eq!(take_right_bytes("世界", 2), "界"); + assert_eq!(take_right_bytes("Hello 世界", 5), " 世界"); + } + + // ===== take_mid_bytes tests ===== + + #[test] + fn test_take_mid_bytes_basic() { + assert_eq!(take_mid_bytes("Hello", 2, 3), "ell"); + } + + #[test] + fn test_take_mid_bytes_zero_count() { + assert_eq!(take_mid_bytes("Hello", 1, 0), ""); + } + + #[test] + fn test_take_mid_bytes_unicode() { + // "Hello 世界" has bytes: H(1)e(1)l(1)l(1)o(1) (1)世(2)界(2) + // Byte positions: 1 2 3 4 5 6 7 9 11 + assert_eq!(take_mid_bytes("Hello 世界", 7, 2), "世"); + assert_eq!(take_mid_bytes("Hello 世界", 7, 4), "世界"); + } + + // ===== dbcs_byte_prefixes tests ===== + + #[test] + fn test_dbcs_byte_prefixes_ascii() { + assert_eq!(dbcs_byte_prefixes("ABC"), vec![0, 1, 2, 3]); + } + + #[test] + fn test_dbcs_byte_prefixes_unicode() { + // "A世B" - A(1) + 世(2) + B(1) = 4 bytes + assert_eq!(dbcs_byte_prefixes("A世B"), vec![0, 1, 3, 4]); + } + + #[test] + fn test_dbcs_byte_prefixes_empty() { + assert_eq!(dbcs_byte_prefixes(""), vec![0]); + } + + // ===== char_index_from_dbcs_byte tests ===== + + #[test] + fn test_char_index_from_dbcs_byte_basic() { + let prefixes = vec![0, 1, 2, 3]; + assert_eq!(char_index_from_dbcs_byte(&prefixes, 1), Some(0)); + assert_eq!(char_index_from_dbcs_byte(&prefixes, 2), Some(1)); + assert_eq!(char_index_from_dbcs_byte(&prefixes, 3), Some(2)); + } + + #[test] + fn test_char_index_from_dbcs_byte_unicode() { + // "A世B" - prefixes [0, 1, 3, 4] + let prefixes = vec![0, 1, 3, 4]; + assert_eq!(char_index_from_dbcs_byte(&prefixes, 1), Some(0)); // A + assert_eq!(char_index_from_dbcs_byte(&prefixes, 2), Some(1)); // within 世 + assert_eq!(char_index_from_dbcs_byte(&prefixes, 3), Some(1)); // 世 end + assert_eq!(char_index_from_dbcs_byte(&prefixes, 4), Some(2)); // B + } + + #[test] + fn test_char_index_from_dbcs_byte_zero() { + let prefixes = vec![0, 1, 2, 3]; + assert_eq!(char_index_from_dbcs_byte(&prefixes, 0), None); + } + + #[test] + fn test_char_index_from_dbcs_byte_beyond() { + let prefixes = vec![0, 1, 2, 3]; + assert_eq!(char_index_from_dbcs_byte(&prefixes, 5), None); + } + + // ===== replace_chars_segment tests ===== + + #[test] + fn test_replace_chars_segment_basic() { + assert_eq!( + replace_chars_segment("Hello World", 7, 5, "Universe"), + Some("Hello Universe".to_string()) + ); + } + + #[test] + fn test_replace_chars_segment_start_zero() { + assert_eq!(replace_chars_segment("Hello", 0, 2, "XX"), None); + } + + #[test] + fn test_replace_chars_segment_start_beyond() { + assert_eq!(replace_chars_segment("Hello", 10, 2, "XX"), None); + } + + #[test] + fn test_replace_chars_segment_zero_chars() { + assert_eq!( + replace_chars_segment("Hello", 3, 0, "XX"), + Some("HeXXllo".to_string()) + ); + } + + #[test] + fn test_replace_chars_segment_more_chars() { + // start_num=4 removes chars[3]=l, and 10 chars from position 4 removes "lo" + assert_eq!( + replace_chars_segment("Hello", 4, 10, "!"), + Some("Hel!".to_string()) + ); + } + + // ===== replace_bytes_segment tests ===== + + #[test] + fn test_replace_bytes_segment_basic() { + // Each char is 1 byte for ASCII, start_byte=7 (position of 'W'), remove 5 bytes ("World") + assert_eq!( + replace_bytes_segment("Hello World", 7, 5, "XYZ"), + Some("Hello XYZ".to_string()) + ); + } + + #[test] + fn test_replace_bytes_segment_start_zero() { + assert_eq!(replace_bytes_segment("Hello", 0, 2, "XX"), None); + } + + #[test] + fn test_replace_bytes_segment_start_beyond() { + assert_eq!(replace_bytes_segment("Hello", 10, 2, "XX"), None); + } + + #[test] + fn test_replace_bytes_segment_zero_bytes() { + assert_eq!( + replace_bytes_segment("Hello", 3, 0, "XX"), + Some("HeXXllo".to_string()) + ); + } + + #[test] + fn test_replace_bytes_segment_empty_string() { + // Empty string with start_byte=1 returns None (start_byte > total_bytes) + assert_eq!(replace_bytes_segment("", 1, 0, "XX"), None); + } + + #[test] + fn test_replace_bytes_segment_unicode() { + // "A世C" - A(1 byte), 世(2 bytes), C(1 byte) + // Start at byte 2 (within 世), remove 1 byte + let result = replace_bytes_segment("A世C", 2, 1, "X"); + assert!(result.is_some()); + } +} diff --git a/src/sheet/eval/engine/text/mod.rs b/crates/litchi-eval/src/engine/text/mod.rs similarity index 100% rename from src/sheet/eval/engine/text/mod.rs rename to crates/litchi-eval/src/engine/text/mod.rs diff --git a/src/sheet/eval/engine/text/modern.rs b/crates/litchi-eval/src/engine/text/modern.rs similarity index 89% rename from src/sheet/eval/engine/text/modern.rs rename to crates/litchi-eval/src/engine/text/modern.rs index 1b53567..d7a4e42 100644 --- a/src/sheet/eval/engine/text/modern.rs +++ b/crates/litchi-eval/src/engine/text/modern.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_bool, to_text}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_bool, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; pub(crate) async fn eval_valuetotext( ctx: EvalCtx<'_>, @@ -15,7 +15,7 @@ pub(crate) async fn eval_valuetotext( let value = evaluate_expression(ctx, current_sheet, &args[0]).await?; let format = if args.len() == 2 { let f_val = evaluate_expression(ctx, current_sheet, &args[1]).await?; - match crate::sheet::eval::engine::to_number(&f_val) { + match crate::engine::to_number(&f_val) { Some(n) => n.trunc() as i32, None => 0, } @@ -54,7 +54,7 @@ pub(crate) async fn eval_arraytotext( let format = if args.len() == 2 { let f_val = evaluate_expression(ctx, current_sheet, &args[1]).await?; - match crate::sheet::eval::engine::to_number(&f_val) { + match crate::engine::to_number(&f_val) { Some(n) => n.trunc() as i32, None => 0, } @@ -63,7 +63,7 @@ pub(crate) async fn eval_arraytotext( }; let mut values = Vec::new(); - crate::sheet::eval::engine::for_each_value_in_expr(ctx, current_sheet, &args[0], |v| { + crate::engine::for_each_value_in_expr(ctx, current_sheet, &args[0], |v| { values.push(v.clone()); Ok(()) }) @@ -107,28 +107,22 @@ pub(crate) async fn eval_textbefore( } let instance_num = if args.len() >= 3 { - crate::sheet::eval::engine::to_number( - &evaluate_expression(ctx, current_sheet, &args[2]).await?, - ) - .unwrap_or(1.0) as i32 + crate::engine::to_number(&evaluate_expression(ctx, current_sheet, &args[2]).await?) + .unwrap_or(1.0) as i32 } else { 1 }; let match_mode = if args.len() >= 4 { - crate::sheet::eval::engine::to_number( - &evaluate_expression(ctx, current_sheet, &args[3]).await?, - ) - .unwrap_or(0.0) as i32 + crate::engine::to_number(&evaluate_expression(ctx, current_sheet, &args[3]).await?) + .unwrap_or(0.0) as i32 } else { 0 }; let _match_end = if args.len() >= 5 { - crate::sheet::eval::engine::to_number( - &evaluate_expression(ctx, current_sheet, &args[4]).await?, - ) - .unwrap_or(0.0) as i32 + crate::engine::to_number(&evaluate_expression(ctx, current_sheet, &args[4]).await?) + .unwrap_or(0.0) as i32 } else { 0 }; @@ -198,19 +192,15 @@ pub(crate) async fn eval_textafter( } let instance_num = if args.len() >= 3 { - crate::sheet::eval::engine::to_number( - &evaluate_expression(ctx, current_sheet, &args[2]).await?, - ) - .unwrap_or(1.0) as i32 + crate::engine::to_number(&evaluate_expression(ctx, current_sheet, &args[2]).await?) + .unwrap_or(1.0) as i32 } else { 1 }; let match_mode = if args.len() >= 4 { - crate::sheet::eval::engine::to_number( - &evaluate_expression(ctx, current_sheet, &args[3]).await?, - ) - .unwrap_or(0.0) as i32 + crate::engine::to_number(&evaluate_expression(ctx, current_sheet, &args[3]).await?) + .unwrap_or(0.0) as i32 } else { 0 }; @@ -270,7 +260,7 @@ pub(crate) async fn eval_textsplit( // Delimiters can be a single value or an array of values let mut col_delimiters = Vec::new(); - crate::sheet::eval::engine::for_each_value_in_expr(ctx, current_sheet, &args[1], |v| { + crate::engine::for_each_value_in_expr(ctx, current_sheet, &args[1], |v| { let s = to_text(v); if !s.is_empty() { col_delimiters.push(s); @@ -281,7 +271,7 @@ pub(crate) async fn eval_textsplit( let mut row_delimiters = Vec::new(); if args.len() >= 3 { - crate::sheet::eval::engine::for_each_value_in_expr(ctx, current_sheet, &args[2], |v| { + crate::engine::for_each_value_in_expr(ctx, current_sheet, &args[2], |v| { let s = to_text(v); if !s.is_empty() { row_delimiters.push(s); @@ -298,9 +288,7 @@ pub(crate) async fn eval_textsplit( }; let match_mode = if args.len() >= 5 { - match crate::sheet::eval::engine::to_number( - &evaluate_expression(ctx, current_sheet, &args[4]).await?, - ) { + match crate::engine::to_number(&evaluate_expression(ctx, current_sheet, &args[4]).await?) { Some(n) => n.trunc() as i32, None => 0, } @@ -351,8 +339,8 @@ pub(crate) async fn eval_textsplit( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::engine::test_helpers::TestEngine; - use crate::sheet::eval::parser::Expr; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; fn str_expr(s: &str) -> Expr { Expr::Literal(CellValue::String(s.to_string())) @@ -406,7 +394,7 @@ mod tests { for i in 0..3 { engine.set_cell("Sheet1", 0, i, CellValue::Int(i as i64 + 1)); } - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, @@ -424,7 +412,7 @@ mod tests { let ctx = engine.ctx(); engine.set_cell("Sheet1", 0, 0, CellValue::String("a".to_string())); engine.set_cell("Sheet1", 0, 1, CellValue::Int(1)); - let range = Expr::Range(crate::sheet::eval::parser::RangeRef { + let range = Expr::Range(crate::parser::RangeRef { sheet: "Sheet1".to_string(), start_row: 0, start_col: 0, diff --git a/crates/litchi-eval/src/engine/text/numbering.rs b/crates/litchi-eval/src/engine/text/numbering.rs new file mode 100644 index 0000000..7ee2aad --- /dev/null +++ b/crates/litchi-eval/src/engine/text/numbering.rs @@ -0,0 +1,224 @@ +use crate::engine::{EvalCtx, evaluate_expression, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +pub(crate) async fn eval_arabic( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("ARABIC expects 1 argument".to_string())); + } + let text = to_text(&evaluate_expression(ctx, current_sheet, &args[0]).await?) + .trim() + .to_uppercase(); + if text.is_empty() { + return Ok(CellValue::Int(0)); + } + + let mut result = 0i64; + let mut last_val = 0i64; + + // Handle negative sign + let (haystack, multiplier) = if let Some(stripped) = text.strip_prefix('-') { + (stripped, -1) + } else { + (text.as_str(), 1) + }; + + for c in haystack.chars().rev() { + let val = match c { + 'I' => 1, + 'V' => 5, + 'X' => 10, + 'L' => 50, + 'C' => 100, + 'D' => 500, + 'M' => 1000, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + if val < last_val { + result -= val; + } else { + result += val; + } + last_val = val; + } + + Ok(CellValue::Int(result * multiplier)) +} + +pub(crate) async fn eval_roman( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.is_empty() || args.len() > 2 { + return Ok(CellValue::Error( + "ROMAN expects 1 or 2 arguments".to_string(), + )); + } + let val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let num = match crate::engine::to_number(&val) { + Some(n) => n.trunc() as i64, + None => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + + if !(0..=3999).contains(&num) { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + if num == 0 { + return Ok(CellValue::String(String::new())); + } + + // Excel ROMAN function has different forms (0 to 4), but usually 0 (classic) is used. + // We'll implement classic (form 0) for now. + let mut n = num; + let mut result = String::new(); + let mapping = [ + (1000, "M"), + (900, "CM"), + (500, "D"), + (400, "CD"), + (100, "C"), + (90, "XC"), + (50, "L"), + (40, "XL"), + (10, "X"), + (9, "IX"), + (5, "V"), + (4, "IV"), + (1, "I"), + ]; + + for (val, sym) in mapping { + while n >= val { + result.push_str(sym); + n -= val; + } + } + + Ok(CellValue::String(result)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + + #[tokio::test] + async fn test_eval_arabic_simple() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("XII".to_string()))]; + let result = eval_arabic(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(12)); + } + + #[tokio::test] + async fn test_eval_arabic_complex() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("MCMXCIV".to_string()))]; + let result = eval_arabic(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(1994)); + } + + #[tokio::test] + async fn test_eval_arabic_lowercase() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("xiv".to_string()))]; + let result = eval_arabic(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(14)); + } + + #[tokio::test] + async fn test_eval_arabic_empty() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("".to_string()))]; + let result = eval_arabic(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(0)); + } + + #[tokio::test] + async fn test_eval_arabic_invalid() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("ABC".to_string()))]; + let result = eval_arabic(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_eval_arabic_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_arabic(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 argument")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_roman_simple() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(12))]; + let result = eval_roman(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("XII".to_string())); + } + + #[tokio::test] + async fn test_eval_roman_complex() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(1994))]; + let result = eval_roman(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("MCMXCIV".to_string())); + } + + #[tokio::test] + async fn test_eval_roman_zero() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(0))]; + let result = eval_roman(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("".to_string())); + } + + #[tokio::test] + async fn test_eval_roman_too_large() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(4000))]; + let result = eval_roman(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_eval_roman_negative() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(-5))]; + let result = eval_roman(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_eval_roman_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_roman(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 or 2")), + _ => panic!("Expected Error"), + } + } +} diff --git a/src/sheet/eval/engine/text/substring.rs b/crates/litchi-eval/src/engine/text/substring.rs similarity index 94% rename from src/sheet/eval/engine/text/substring.rs rename to crates/litchi-eval/src/engine/text/substring.rs index 6342475..5d768ac 100644 --- a/src/sheet/eval/engine/text/substring.rs +++ b/crates/litchi-eval/src/engine/text/substring.rs @@ -1,6 +1,6 @@ -use crate::sheet::eval::engine::{EvalCtx, evaluate_expression, to_text}; -use crate::sheet::eval::parser::Expr; -use crate::sheet::{CellValue, Result}; +use crate::engine::{EvalCtx, evaluate_expression, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; use super::helpers::{ char_index_from_dbcs_byte, dbcs_byte_prefixes, replace_bytes_segment, replace_chars_segment, @@ -569,7 +569,7 @@ async fn eval_find_like( #[cfg(test)] mod tests { use super::*; - use crate::sheet::eval::parser::Expr; + use crate::parser::Expr; fn str_expr(s: &str) -> Expr { Expr::Literal(CellValue::String(s.to_string())) @@ -585,7 +585,7 @@ mod tests { #[tokio::test] async fn test_eval_char() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(65.0)]; let result = eval_char(ctx, "Sheet1", &args).await.unwrap(); @@ -597,7 +597,7 @@ mod tests { #[tokio::test] async fn test_eval_char_invalid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![num_expr(300.0)]; let result = eval_char(ctx, "Sheet1", &args).await.unwrap(); @@ -609,7 +609,7 @@ mod tests { #[tokio::test] async fn test_eval_code() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("A")]; let result = eval_code(ctx, "Sheet1", &args).await.unwrap(); @@ -621,7 +621,7 @@ mod tests { #[tokio::test] async fn test_eval_code_empty() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("")]; let result = eval_code(ctx, "Sheet1", &args).await.unwrap(); @@ -633,7 +633,7 @@ mod tests { #[tokio::test] async fn test_eval_clean() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello\x00world")]; let result = eval_clean(ctx, "Sheet1", &args).await.unwrap(); @@ -645,7 +645,7 @@ mod tests { #[tokio::test] async fn test_eval_left() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), num_expr(3.0)]; let result = eval_left(ctx, "Sheet1", &args).await.unwrap(); @@ -657,7 +657,7 @@ mod tests { #[tokio::test] async fn test_eval_left_default() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello")]; let result = eval_left(ctx, "Sheet1", &args).await.unwrap(); @@ -669,7 +669,7 @@ mod tests { #[tokio::test] async fn test_eval_right() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), num_expr(3.0)]; let result = eval_right(ctx, "Sheet1", &args).await.unwrap(); @@ -681,7 +681,7 @@ mod tests { #[tokio::test] async fn test_eval_mid() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello world"), num_expr(7.0), num_expr(5.0)]; let result = eval_mid(ctx, "Sheet1", &args).await.unwrap(); @@ -693,7 +693,7 @@ mod tests { #[tokio::test] async fn test_eval_mid_start_at_1() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("hello"), num_expr(1.0), num_expr(3.0)]; let result = eval_mid(ctx, "Sheet1", &args).await.unwrap(); @@ -705,7 +705,7 @@ mod tests { #[tokio::test] async fn test_eval_replace() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![ str_expr("hello world"), @@ -722,7 +722,7 @@ mod tests { #[tokio::test] async fn test_eval_find() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("world"), str_expr("hello world")]; let result = eval_find(ctx, "Sheet1", &args).await.unwrap(); @@ -734,7 +734,7 @@ mod tests { #[tokio::test] async fn test_eval_find_case_sensitive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("WORLD"), str_expr("hello world")]; let result = eval_find(ctx, "Sheet1", &args).await.unwrap(); @@ -746,7 +746,7 @@ mod tests { #[tokio::test] async fn test_eval_search() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("world"), str_expr("hello world")]; let result = eval_search(ctx, "Sheet1", &args).await.unwrap(); @@ -758,7 +758,7 @@ mod tests { #[tokio::test] async fn test_eval_search_case_insensitive() { - let engine = crate::sheet::eval::engine::test_helpers::TestEngine::new(); + let engine = crate::engine::test_helpers::TestEngine::new(); let ctx = engine.ctx(); let args = vec![str_expr("WORLD"), str_expr("hello world")]; let result = eval_search(ctx, "Sheet1", &args).await.unwrap(); diff --git a/crates/litchi-eval/src/engine/text/unicode.rs b/crates/litchi-eval/src/engine/text/unicode.rs new file mode 100644 index 0000000..3fbb9a0 --- /dev/null +++ b/crates/litchi-eval/src/engine/text/unicode.rs @@ -0,0 +1,147 @@ +use crate::engine::{EvalCtx, evaluate_expression, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +pub(crate) async fn eval_unichar( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("UNICHAR expects 1 argument".to_string())); + } + let val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + if let Some(n) = crate::engine::to_number(&val) { + let code = n.trunc() as u32; + if code == 0 { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + match char::from_u32(code) { + Some(c) => Ok(CellValue::String(c.to_string())), + None => Ok(CellValue::Error("#VALUE!".to_string())), + } + } else { + Ok(CellValue::Error("#VALUE!".to_string())) + } +} + +pub(crate) async fn eval_unicode( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + if args.len() != 1 { + return Ok(CellValue::Error("UNICODE expects 1 argument".to_string())); + } + let val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let text = to_text(&val); + if let Some(c) = text.chars().next() { + Ok(CellValue::Int(c as u32 as i64)) + } else { + Ok(CellValue::Error("#VALUE!".to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + + #[tokio::test] + async fn test_eval_unichar_ascii() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(65))]; // 'A' + let result = eval_unichar(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("A".to_string())); + } + + #[tokio::test] + async fn test_eval_unichar_space() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(32))]; // space + let result = eval_unichar(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String(" ".to_string())); + } + + #[tokio::test] + async fn test_eval_unichar_zero() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::Int(0))]; + let result = eval_unichar(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_eval_unichar_invalid() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("abc".to_string()))]; + let result = eval_unichar(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_eval_unichar_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_unichar(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 argument")), + _ => panic!("Expected Error"), + } + } + + #[tokio::test] + async fn test_eval_unicode_ascii() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("A".to_string()))]; + let result = eval_unicode(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(65)); + } + + #[tokio::test] + async fn test_eval_unicode_space() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String(" ".to_string()))]; + let result = eval_unicode(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(32)); + } + + #[tokio::test] + async fn test_eval_unicode_empty() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![Expr::Literal(CellValue::String("".to_string()))]; + let result = eval_unicode(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_eval_unicode_first_char() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + // Should return code for first character only + let args = vec![Expr::Literal(CellValue::String("ABC".to_string()))]; + let result = eval_unicode(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Int(65)); + } + + #[tokio::test] + async fn test_eval_unicode_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_unicode(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 argument")), + _ => panic!("Expected Error"), + } + } +} diff --git a/crates/litchi-eval/src/engine/web/functions.rs b/crates/litchi-eval/src/engine/web/functions.rs new file mode 100644 index 0000000..d14755a --- /dev/null +++ b/crates/litchi-eval/src/engine/web/functions.rs @@ -0,0 +1,318 @@ +use crate::engine::EvalCtx; +#[cfg(feature = "web_functions")] +use crate::engine::{evaluate_expression, to_text}; +use crate::parser::Expr; +use litchi_core::sheet::{CellValue, Result}; + +pub(crate) async fn eval_encodeurl( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + #[cfg(feature = "web_functions")] + { + if args.len() != 1 { + return Ok(CellValue::Error("ENCODEURL expects 1 argument".to_string())); + } + let text = to_text(&evaluate_expression(ctx, current_sheet, &args[0]).await?); + + let encoded = urlencoding::encode(&text).to_string(); + Ok(CellValue::String(encoded)) + } + #[cfg(not(feature = "web_functions"))] + { + let _ = (ctx, current_sheet, args); + Ok(CellValue::Error("#NAME?".to_string())) + } +} + +pub(crate) async fn eval_webservice( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + #[cfg(feature = "web_functions")] + { + if args.len() != 1 { + return Ok(CellValue::Error( + "WEBSERVICE expects 1 argument".to_string(), + )); + } + let url = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let url_str = to_text(&url); + + if url_str.len() > 2048 { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + + if !url_str.starts_with("http://") && !url_str.starts_with("https://") { + return Ok(CellValue::Error("#VALUE!".to_string())); + } + + let client = ctx.http_client(); + match client.get(&url_str).send().await { + Ok(response) => match response.text().await { + Ok(body) => { + if body.len() > 32767 { + Ok(CellValue::Error("#VALUE!".to_string())) + } else { + Ok(CellValue::String(body)) + } + }, + Err(_) => Ok(CellValue::Error("#VALUE!".to_string())), + }, + Err(_) => Ok(CellValue::Error("#VALUE!".to_string())), + } + } + #[cfg(not(feature = "web_functions"))] + { + let _ = (ctx, current_sheet, args); + Ok(CellValue::Error("#NAME?".to_string())) + } +} + +pub(crate) async fn eval_filterxml( + ctx: EvalCtx<'_>, + current_sheet: &str, + args: &[Expr], +) -> Result { + #[cfg(feature = "web_functions")] + { + if args.len() != 2 { + return Ok(CellValue::Error( + "FILTERXML expects 2 arguments (xml, xpath)".to_string(), + )); + } + let xml_val = evaluate_expression(ctx, current_sheet, &args[0]).await?; + let xml = to_text(&xml_val); + let xpath_val = evaluate_expression(ctx, current_sheet, &args[1]).await?; + let xpath_str = to_text(&xpath_val); + + let package = match sxd_document::parser::parse(&xml) { + Ok(p) => p, + Err(_) => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + let document = package.as_document(); + + let factory = sxd_xpath::Factory::new(); + let xpath = match factory.build(&xpath_str) { + Ok(Some(xpath)) => xpath, + _ => return Ok(CellValue::Error("#VALUE!".to_string())), + }; + + let context = sxd_xpath::Context::new(); + match xpath.evaluate(&context, document.root()) { + Ok(value) => match value { + sxd_xpath::Value::Nodeset(ns) => { + if let Some(node) = ns.document_order_first() { + Ok(CellValue::String(node.string_value())) + } else { + Ok(CellValue::Error("#VALUE!".to_string())) + } + }, + sxd_xpath::Value::Boolean(b) => Ok(CellValue::Bool(b)), + sxd_xpath::Value::Number(n) => Ok(CellValue::Float(n)), + sxd_xpath::Value::String(s) => Ok(CellValue::String(s)), + }, + Err(_) => Ok(CellValue::Error("#VALUE!".to_string())), + } + } + #[cfg(not(feature = "web_functions"))] + { + let _ = (ctx, current_sheet, args); + Ok(CellValue::Error("#NAME?".to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::test_helpers::TestEngine; + use crate::parser::Expr; + + fn str_expr(s: &str) -> Expr { + Expr::Literal(CellValue::String(s.to_string())) + } + + // Tests for when eval_engine_web_functions feature is NOT enabled + #[cfg(not(feature = "web_functions"))] + mod no_feature_tests { + use super::*; + + #[tokio::test] + async fn test_encodeurl_without_feature() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello world")]; + let result = eval_encodeurl(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#NAME?".to_string())); + } + + #[tokio::test] + async fn test_webservice_without_feature() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("https://example.com")]; + let result = eval_webservice(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#NAME?".to_string())); + } + + #[tokio::test] + async fn test_filterxml_without_feature() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr(""), str_expr("/root")]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#NAME?".to_string())); + } + } + + // Tests for when eval_engine_web_functions feature IS enabled + #[cfg(feature = "web_functions")] + mod feature_tests { + use super::*; + + #[tokio::test] + async fn test_encodeurl_simple() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("hello world")]; + let result = eval_encodeurl(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("hello%20world".to_string())); + } + + #[tokio::test] + async fn test_encodeurl_special_chars() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("foo/bar+baz?key=value")]; + let result = eval_encodeurl(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!( + result, + CellValue::String("foo%2Fbar%2Bbaz%3Fkey%3Dvalue".to_string()) + ); + } + + #[tokio::test] + async fn test_encodeurl_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_encodeurl(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 argument")), + _ => panic!("Expected Error result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_filterxml_basic() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let xml = "Hello"; + let xpath = "/root/item"; + let args = vec![str_expr(xml), str_expr(xpath)]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("Hello".to_string())); + } + + #[tokio::test] + async fn test_filterxml_number_result() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let xml = "42.5"; + let xpath = "/data/value"; + let args = vec![str_expr(xml), str_expr(xpath)]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + // XPath on element returns the text content as a string + assert_eq!(result, CellValue::String("42.5".to_string())); + } + + #[tokio::test] + async fn test_filterxml_attribute() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let xml = ""; + let xpath = "/user/@name"; + let args = vec![str_expr(xml), str_expr(xpath)]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::String("John".to_string())); + } + + #[tokio::test] + async fn test_filterxml_invalid_xml() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let xml = "unclosed"; + let xpath = "/root"; + let args = vec![str_expr(xml), str_expr(xpath)]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_filterxml_invalid_xpath() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let xml = ""; + let xpath = "[[[invalid"; + let args = vec![str_expr(xml), str_expr(xpath)]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_filterxml_no_match() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let xml = "value"; + let xpath = "/root/nonexistent"; + let args = vec![str_expr(xml), str_expr(xpath)]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_filterxml_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![str_expr("")]; + let result = eval_filterxml(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 2 arguments")), + _ => panic!("Expected Error result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_webservice_wrong_args() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args: Vec = vec![]; + let result = eval_webservice(ctx, "Sheet1", &args).await.unwrap(); + match result { + CellValue::Error(e) => assert!(e.contains("expects 1 argument")), + _ => panic!("Expected Error result, got {:?}", result), + } + } + + #[tokio::test] + async fn test_webservice_invalid_url() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let args = vec![str_expr("ftp://invalid.protocol.com")]; + let result = eval_webservice(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + + #[tokio::test] + async fn test_webservice_url_too_long() { + let engine = TestEngine::new(); + let ctx = engine.ctx(); + let long_url = format!("https://example.com/{}", "a".repeat(2048)); + let args = vec![str_expr(&long_url)]; + let result = eval_webservice(ctx, "Sheet1", &args).await.unwrap(); + assert_eq!(result, CellValue::Error("#VALUE!".to_string())); + } + } +} diff --git a/src/sheet/eval/engine/web/mod.rs b/crates/litchi-eval/src/engine/web/mod.rs similarity index 100% rename from src/sheet/eval/engine/web/mod.rs rename to crates/litchi-eval/src/engine/web/mod.rs diff --git a/crates/litchi-eval/src/lib.rs b/crates/litchi-eval/src/lib.rs new file mode 100644 index 0000000..54547dd --- /dev/null +++ b/crates/litchi-eval/src/lib.rs @@ -0,0 +1,592 @@ +#![allow(missing_docs)] +//! Formula evaluation engine shared across spreadsheet formats. +//! +//! This module provides a small, format-agnostic evaluation layer that works +//! on top of the unified `sheet` traits. It is intentionally conservative: +//! it prefers using cached values embedded in files and can be extended +//! over time to support more Excel semantics. + +pub mod engine; +pub mod parser; + +use self::engine::{ReferenceResolver, ResolvedName}; +use self::parser::{RangeRef, parse_range_reference, parse_single_cell_reference}; +use litchi_core::sheet::{CellValue, Result, WorkbookTrait}; +use std::borrow::Cow; +use std::collections::{HashMap, HashSet}; +use tokio::sync::RwLock; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +struct CellRef { + sheet_idx: usize, + row: u32, + col: u32, +} + +struct EvalState { + cache: HashMap, + visiting: HashSet, +} + +use std::future::Future; +use std::pin::Pin; + +pub(crate) type BoxFuture<'a, T> = Pin + Send + 'a>>; + +/// Evaluation context used by the engine runtime. +pub(crate) trait EngineCtx: Send + Sync { + fn get_cell_value<'a>( + &'a self, + sheet_name: &'a str, + row: u32, + col: u32, + ) -> BoxFuture<'a, Result>; + + /// Returns the current evaluation position (sheet, row, col) if a formula is being + /// evaluated. This is primarily used by functions such as ROW() or COLUMN() that need + /// to know the location of the formula cell when no explicit reference is supplied. + fn current_position(&self) -> Option<(String, u32, u32)>; + + /// Returns the raw value stored in the workbook without triggering evaluation. + /// + /// This is useful for functions like ISFORMULA that need to inspect the cell's + /// original content rather than the evaluated result. + fn raw_cell_value<'a>( + &'a self, + sheet_name: &'a str, + row: u32, + col: u32, + ) -> BoxFuture<'a, Result>; + + /// Returns true if the workbook backing this context uses the 1904 date system. + fn is_1904_date_system(&self) -> bool; + + /// Returns a shared HTTP client for web functions. + #[cfg(feature = "web_functions")] + fn http_client(&self) -> &reqwest::Client; + + /// Returns the index of the given sheet (0-based). + fn get_sheet_index(&self, name: &str) -> Option; + + /// Returns the total number of sheets in the workbook. + fn get_sheet_count(&self) -> usize; +} + +/// Simple formula evaluator operating on a `WorkbookTrait`. +/// +/// The initial implementation is intentionally basic: +/// - For non-formula cells, it returns the stored value. +/// - For formula cells, it returns the cached result if present. +/// - If no cached result is available, it returns an Error cell. +pub struct FormulaEvaluator<'a, W: WorkbookTrait + ?Sized> { + workbook: &'a W, + sheet_index: HashMap, + eval_state: RwLock, + names: HashMap, + local_names: HashMap<(String, String), String>, + tables: HashMap, + position_stack: RwLock>, + #[cfg(feature = "web_functions")] + http_client: reqwest::Client, +} + +#[derive(Clone)] +struct NamedTable { + sheet: String, + start_row: u32, + start_col: u32, + end_row: u32, + end_col: u32, + headers: HashMap, +} + +impl<'a, W: WorkbookTrait + Sync + Send + ?Sized> EngineCtx for FormulaEvaluator<'a, W> { + fn get_cell_value<'b>( + &'b self, + sheet_name: &'b str, + row: u32, + col: u32, + ) -> BoxFuture<'b, Result> { + Box::pin(async move { + let sheet_idx = *self + .sheet_index + .get(sheet_name) + .expect("Sheet name not found in index"); + let key = CellRef { + sheet_idx, + row, + col, + }; + + // Fast path: cached value + { + let state = self.eval_state.read().await; + if let Some(v) = state.cache.get(&key) { + return Ok(v.clone()); + } + + if state.visiting.contains(&key) { + // Circular reference detected. + return Ok(CellValue::Error("Circular reference detected".to_string())); + } + } + + // Mark as visiting + { + let mut state = self.eval_state.write().await; + state.visiting.insert(key); + } + + // Load raw value from workbook + let sheet = self.workbook.worksheet_by_name(sheet_name)?; + let value: Cow<'_, CellValue> = sheet.cell_value(row, col)?; + let raw = value.into_owned(); + + // Evaluate value (handles formulas and cached results) + let result = self.evaluate_value(sheet_name, row, col, raw).await?; + + // Store in cache and clear visiting + { + let mut state = self.eval_state.write().await; + state.visiting.remove(&key); + state.cache.insert(key, result.clone()); + } + + Ok(result) + }) + } + + fn current_position(&self) -> Option<(String, u32, u32)> { + // NOTE: This now returns a default or requires a sync way to access. + // For simplicity in this migration, we might need a sync guard or just accept that + // functions needing position might need to be async or we use a different mechanism. + // For now, use blocking read as it's just a stack of strings/u32. + self.position_stack.blocking_read().last().cloned() + } + + fn raw_cell_value<'b>( + &'b self, + sheet_name: &'b str, + row: u32, + col: u32, + ) -> BoxFuture<'b, Result> { + Box::pin(async move { + let sheet = self.workbook.worksheet_by_name(sheet_name)?; + let value: Cow<'_, CellValue> = sheet.cell_value(row, col)?; + Ok(value.into_owned()) + }) + } + + fn is_1904_date_system(&self) -> bool { + self.workbook.is_1904_date_system() + } + + #[cfg(feature = "web_functions")] + fn http_client(&self) -> &reqwest::Client { + &self.http_client + } + + fn get_sheet_index(&self, name: &str) -> Option { + self.sheet_index.get(name).copied() + } + + fn get_sheet_count(&self) -> usize { + self.workbook.worksheet_names().len() + } +} + +impl<'a, W: WorkbookTrait + ?Sized> ReferenceResolver for FormulaEvaluator<'a, W> { + fn resolve_name(&self, current_sheet: &str, name: &str) -> Result> { + let trimmed = name.trim(); + if trimmed.is_empty() { + return Ok(None); + } + + if let Some(resolved) = self.resolve_table_reference(current_sheet, trimmed)? { + return Ok(Some(resolved)); + } + + let norm = trimmed.to_uppercase(); + if let Some(reference) = self + .local_names + .get(&(current_sheet.to_string(), norm.clone())) + { + return Ok(self.resolve_reference_string(current_sheet, reference)); + } + + if let Some(reference) = self.names.get(&norm) { + return Ok(self.resolve_reference_string(current_sheet, reference)); + } + + Ok(None) + } +} + +pub struct TableConfig<'a> { + pub name: &'a str, + pub sheet_name: &'a str, + pub start_row: u32, + pub start_col: u32, + pub end_row: u32, + pub end_col: u32, + pub headers: &'a [String], +} + +impl<'a, W: WorkbookTrait + Sync + Send + ?Sized> FormulaEvaluator<'a, W> { + /// Create a new evaluator for the given workbook. + pub fn new(workbook: &'a W) -> Self { + let mut sheet_index = HashMap::new(); + for (idx, name) in workbook.worksheet_names().iter().enumerate() { + sheet_index.insert(name.clone(), idx); + } + Self { + workbook, + sheet_index, + eval_state: RwLock::new(EvalState { + cache: HashMap::new(), + visiting: HashSet::new(), + }), + names: HashMap::new(), + local_names: HashMap::new(), + tables: HashMap::new(), + position_stack: RwLock::new(Vec::new()), + #[cfg(feature = "web_functions")] + http_client: reqwest::Client::new(), + } + } + + pub fn define_name(&mut self, name: &str, reference: &str) { + self.names + .insert(name.trim().to_uppercase(), reference.trim().to_string()); + } + + pub fn define_name_local(&mut self, sheet_name: &str, name: &str, reference: &str) { + self.local_names.insert( + (sheet_name.to_string(), name.trim().to_uppercase()), + reference.trim().to_string(), + ); + } + + pub fn define_table(&mut self, config: TableConfig) { + let mut header_map = HashMap::new(); + for (i, h) in config.headers.iter().enumerate() { + let col = config.start_col + i as u32; + if col > config.end_col { + break; + } + let key = h.trim().to_uppercase(); + if !key.is_empty() { + header_map.insert(key, col); + } + } + self.tables.insert( + config.name.trim().to_uppercase(), + NamedTable { + sheet: config.sheet_name.to_string(), + start_row: config.start_row, + start_col: config.start_col, + end_row: config.end_row, + end_col: config.end_col, + headers: header_map, + }, + ); + } + + fn resolve_reference_string( + &self, + current_sheet: &str, + reference: &str, + ) -> Option { + if let Some(range) = parse_range_reference(current_sheet, reference) { + return Some(ResolvedName::Range(range)); + } + if let Some((sheet, row, col)) = parse_single_cell_reference(current_sheet, reference) { + return Some(ResolvedName::Cell { sheet, row, col }); + } + None + } + + fn resolve_table_reference( + &self, + _current_sheet: &str, + name: &str, + ) -> Result> { + use self::parser::StructuredReference; + + let structured_ref = match parser::parse_structured_reference(name) { + Some(r) => r, + None => return Ok(None), + }; + + let table_name = match &structured_ref { + StructuredReference::WholeTable { table_name } + | StructuredReference::DataOnly { table_name } + | StructuredReference::Headers { table_name } + | StructuredReference::Totals { table_name } + | StructuredReference::All { table_name } + | StructuredReference::ThisRow { table_name } + | StructuredReference::Column { table_name, .. } + | StructuredReference::ColumnThisRow { table_name, .. } + | StructuredReference::ColumnRange { table_name, .. } + | StructuredReference::HeaderColumn { table_name, .. } + | StructuredReference::TotalsColumn { table_name, .. } => table_name, + }; + + let table = match self.tables.get(&table_name.to_uppercase()) { + Some(t) => t, + None => return Ok(None), + }; + + let range = match structured_ref { + StructuredReference::WholeTable { .. } | StructuredReference::All { .. } => RangeRef { + sheet: table.sheet.clone(), + start_row: table.start_row, + start_col: table.start_col, + end_row: table.end_row, + end_col: table.end_col, + }, + StructuredReference::DataOnly { .. } => { + let mut range = RangeRef { + sheet: table.sheet.clone(), + start_row: table.start_row, + start_col: table.start_col, + end_row: table.end_row, + end_col: table.end_col, + }; + if range.start_row < range.end_row { + range.start_row += 1; + } + range + }, + StructuredReference::Headers { .. } => RangeRef { + sheet: table.sheet.clone(), + start_row: table.start_row, + start_col: table.start_col, + end_row: table.start_row, + end_col: table.end_col, + }, + StructuredReference::Totals { .. } => RangeRef { + sheet: table.sheet.clone(), + start_row: table.end_row, + start_col: table.start_col, + end_row: table.end_row, + end_col: table.end_col, + }, + StructuredReference::ThisRow { .. } => { + return Err("[@] this row references require row context".into()); + }, + StructuredReference::Column { column_name, .. } => { + let col = table + .headers + .get(&column_name.to_uppercase()) + .copied() + .ok_or_else(|| format!("Column '{}' not found in table", column_name))?; + let mut range = RangeRef { + sheet: table.sheet.clone(), + start_row: table.start_row, + start_col: col, + end_row: table.end_row, + end_col: col, + }; + if range.start_row < range.end_row { + range.start_row += 1; + } + range + }, + StructuredReference::ColumnThisRow { column_name, .. } => { + let _col = table + .headers + .get(&column_name.to_uppercase()) + .copied() + .ok_or_else(|| format!("Column '{}' not found in table", column_name))?; + return Err( + format!("[@{}] this row references require row context", column_name).into(), + ); + }, + StructuredReference::ColumnRange { + start_column, + end_column, + .. + } => { + let start_col = table + .headers + .get(&start_column.to_uppercase()) + .copied() + .ok_or_else(|| format!("Column '{}' not found in table", start_column))?; + let end_col = table + .headers + .get(&end_column.to_uppercase()) + .copied() + .ok_or_else(|| format!("Column '{}' not found in table", end_column))?; + let mut range = RangeRef { + sheet: table.sheet.clone(), + start_row: table.start_row, + start_col, + end_row: table.end_row, + end_col, + }; + if range.start_row < range.end_row { + range.start_row += 1; + } + range + }, + StructuredReference::HeaderColumn { column_name, .. } => { + let col = table + .headers + .get(&column_name.to_uppercase()) + .copied() + .ok_or_else(|| format!("Column '{}' not found in table", column_name))?; + RangeRef { + sheet: table.sheet.clone(), + start_row: table.start_row, + start_col: col, + end_row: table.start_row, + end_col: col, + } + }, + StructuredReference::TotalsColumn { column_name, .. } => { + let col = table + .headers + .get(&column_name.to_uppercase()) + .copied() + .ok_or_else(|| format!("Column '{}' not found in table", column_name))?; + RangeRef { + sheet: table.sheet.clone(), + start_row: table.end_row, + start_col: col, + end_row: table.end_row, + end_col: col, + } + }, + }; + + Ok(Some(ResolvedName::Range(range))) + } + + /// Evaluate a single cell in the given worksheet. + /// + /// Row and column are 1-based, consistent with the `Worksheet` trait. + pub async fn evaluate_cell(&self, sheet_name: &str, row: u32, col: u32) -> Result { + self.get_cell_value(sheet_name, row, col).await + } + + /// Evaluate all cells in a worksheet and return a dense 2D grid + /// covering the sheet's declared dimensions. + pub async fn evaluate_sheet(&self, sheet_name: &str) -> Result>> { + let sheet = self.workbook.worksheet_by_name(sheet_name)?; + let dims = match sheet.dimensions() { + Some(d) => d, + None => return Ok(Vec::new()), + }; + + let (min_row, min_col, max_row, max_col) = dims; + let mut rows = Vec::new(); + + for row in min_row..=max_row { + let mut out_row = Vec::new(); + for col in min_col..=max_col { + out_row.push(self.get_cell_value(sheet_name, row, col).await?); + } + rows.push(out_row); + } + + Ok(rows) + } + + /// Core evaluation routine for a single cell value. + /// + /// This remains conservative and still prefers cached results when + /// available. When no cached result is present, it performs a minimal + /// evaluation of the formula text, currently limited to: + /// + /// - Literal constants (numbers, strings, booleans) + /// - Single-cell references (same-sheet or qualified with a sheet name) + async fn evaluate_value( + &self, + sheet_name: &str, + row: u32, + col: u32, + value: CellValue, + ) -> Result { + let result = match value { + CellValue::Formula { + formula, + cached_value, + .. + } => { + if let Some(cached) = cached_value { + // Prefer the cached result embedded in the file. + (*cached).clone() + } else { + // No cached value – perform a minimal evaluation of the + // formula text. Any parsing/semantic issues are reported as + // CellValue::Error rather than hard failures. + self.evaluate_formula(sheet_name, row, col, &formula) + .await? + } + }, + other => other, + }; + + Ok(result) + } + + async fn evaluate_formula( + &self, + sheet_name: &str, + _row: u32, + _col: u32, + expr: &str, + ) -> Result { + let s = expr.trim(); + if s.is_empty() { + return Ok(CellValue::Error("Empty formula".to_string())); + } + + let body = s.strip_prefix('=').unwrap_or(s); + if body.is_empty() { + return Ok(CellValue::Error("Empty formula".to_string())); + } + + struct PositionGuard<'a> { + stack: &'a RwLock>, + } + + impl<'a> PositionGuard<'a> { + async fn new( + stack: &'a RwLock>, + sheet: &str, + row: u32, + col: u32, + ) -> Self { + stack.write().await.push((sheet.to_string(), row, col)); + PositionGuard { stack } + } + } + + impl<'a> Drop for PositionGuard<'a> { + fn drop(&mut self) { + if let Ok(mut guard) = self.stack.try_write() { + guard.pop(); + } + } + } + + let _position_guard = + PositionGuard::new(&self.position_stack, sheet_name, _row, _col).await; + + // General expression (e.g., A1+2, 1+2*3, CONCAT("a","b"), + // TEXTJOIN("-",TRUE,A1:A3)). This uses the small expression parser + // and runtime engine. If parsing fails, fall back to returning an + // Error cell rather than panicking. + if let Some(expr) = parser::parse_expression(sheet_name, body) { + return engine::evaluate_expression(self, sheet_name, &expr).await; + } + + // Unsupported or unrecognized formula in this MVP implementation. + Ok(CellValue::Error(format!( + "Unsupported formula for MVP evaluator: {}", + s + ))) + } +} diff --git a/src/sheet/eval/parser/ast.rs b/crates/litchi-eval/src/parser/ast.rs similarity index 96% rename from src/sheet/eval/parser/ast.rs rename to crates/litchi-eval/src/parser/ast.rs index ede3d05..370f609 100644 --- a/src/sheet/eval/parser/ast.rs +++ b/crates/litchi-eval/src/parser/ast.rs @@ -1,6 +1,6 @@ //! AST types for formula expressions. -use crate::sheet::CellValue; +use litchi_core::sheet::CellValue; /// Binary operators supported by the expression parser. #[derive(Debug, Clone, Copy)] diff --git a/src/sheet/eval/parser/expr.rs b/crates/litchi-eval/src/parser/expr.rs similarity index 100% rename from src/sheet/eval/parser/expr.rs rename to crates/litchi-eval/src/parser/expr.rs diff --git a/src/sheet/eval/parser/literal.rs b/crates/litchi-eval/src/parser/literal.rs similarity index 98% rename from src/sheet/eval/parser/literal.rs rename to crates/litchi-eval/src/parser/literal.rs index c1107dc..e5823a7 100644 --- a/src/sheet/eval/parser/literal.rs +++ b/crates/litchi-eval/src/parser/literal.rs @@ -1,6 +1,6 @@ //! Literal parsing for formula expressions. -use crate::sheet::CellValue; +use litchi_core::sheet::CellValue; /// Try to parse a literal formula expression into a `CellValue`. /// diff --git a/src/sheet/eval/parser/mod.rs b/crates/litchi-eval/src/parser/mod.rs similarity index 100% rename from src/sheet/eval/parser/mod.rs rename to crates/litchi-eval/src/parser/mod.rs diff --git a/src/sheet/eval/parser/reference.rs b/crates/litchi-eval/src/parser/reference.rs similarity index 100% rename from src/sheet/eval/parser/reference.rs rename to crates/litchi-eval/src/parser/reference.rs diff --git a/src/sheet/eval/parser/structured_ref.rs b/crates/litchi-eval/src/parser/structured_ref.rs similarity index 100% rename from src/sheet/eval/parser/structured_ref.rs rename to crates/litchi-eval/src/parser/structured_ref.rs diff --git a/crates/litchi-fonts/Cargo.toml b/crates/litchi-fonts/Cargo.toml new file mode 100644 index 0000000..8f84100 --- /dev/null +++ b/crates/litchi-fonts/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "litchi-fonts" +description = "Font discovery, loading, and subsetting for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +allsorts = { workspace = true } +font-kit = { workspace = true } +litchi-core = { workspace = true } +roaring = { workspace = true } +thiserror = { workspace = true } diff --git a/crates/litchi-fonts/README.md b/crates/litchi-fonts/README.md new file mode 100644 index 0000000..e66cfd8 --- /dev/null +++ b/crates/litchi-fonts/README.md @@ -0,0 +1,49 @@ +# litchi-fonts + +Font discovery, loading, and subsetting for the Litchi office-formats library. + +## Overview + +`litchi-fonts` provides the font-handling layer used when generating Office +documents that need to embed or subset typefaces. It wraps `font-kit` for +system font enumeration, `allsorts` for OpenType table parsing, and +`roaring` bitmaps for compact glyph-coverage tracking. It is consumed by +the OOXML writer (`docx`/`pptx` font embedding) inside the +[Litchi](https://github.com/DevExzh/litchi) workspace. + +## System Dependencies + +On Ubuntu/Debian, install the FreeType and Fontconfig development packages: + +```bash +sudo apt install pkg-config libfreetype6-dev libfontconfig1-dev +``` + +## Usage + +```toml +[dependencies] +litchi-fonts = "0.0.1" +``` + +```rust +use litchi_fonts::{FontLoader, FontError}; + +fn load(family: &str) -> Result { + let loader = FontLoader::new(); + let font = loader.load_system_font(family)?; + Ok(font.data.len()) +} +``` + +## Features + +- System font discovery via `FontLoader` +- OpenType property extraction (panose, charset, family, pitch, Unicode signature) +- Glyph-set collection through the `CollectGlyphs` trait, backed by `RoaringBitmap` +- Pluggable font subsetting via the `FontSubsetter` trait + +## License + +Licensed under the Apache License, Version 2.0. Part of the +[Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-fonts/examples/load_font.rs b/crates/litchi-fonts/examples/load_font.rs new file mode 100644 index 0000000..578e718 --- /dev/null +++ b/crates/litchi-fonts/examples/load_font.rs @@ -0,0 +1,82 @@ +//! Load a system font and print its metadata. +//! +//! Run with: +//! +//! ```bash +//! cargo run -p litchi-fonts --example load_font +//! ``` +//! +//! The example tries a list of common font families and stops at the first one +//! it can resolve on the host system. If none of the candidates exist, it +//! exits cleanly with an explanatory message rather than panicking — useful +//! for CI environments that may not have all fonts installed. + +use litchi_fonts::{FontData, FontError, FontLoader}; + +/// Common font families that are usually available on at least one of the +/// major desktop / CI platforms (macOS, Windows, Linux distros with the +/// `fontconfig`/`liberation`/`dejavu` packages). +const CANDIDATE_FAMILIES: &[&str] = &[ + "Arial", + "Helvetica", + "DejaVu Sans", + "Liberation Sans", + "Times New Roman", + "Sans", +]; + +fn try_load(loader: &FontLoader, families: &[&str]) -> Option { + for family in families { + match loader.load_system_font(family) { + Ok(font) => { + println!("Loaded font family: {family}"); + return Some(font); + }, + Err(FontError::NotFound(name)) => { + println!(" - '{name}' not found, trying next candidate..."); + }, + Err(err) => { + println!(" - error loading '{family}': {err}"); + }, + } + } + None +} + +fn main() -> Result<(), Box> { + let loader = FontLoader::new(); + + let Some(font) = try_load(&loader, CANDIDATE_FAMILIES) else { + println!( + "No candidate fonts could be loaded on this system. \ + Tried: {CANDIDATE_FAMILIES:?}" + ); + println!("Skipping report. (This is non-fatal — useful for CI without fonts.)"); + return Ok(()); + }; + + println!(); + println!("Font report"); + println!("==========="); + println!("name : {}", font.name); + println!("data length: {} bytes", font.data.len()); + println!("face index : {}", font.index); + + match &font.properties { + Some(props) => { + println!("properties :"); + println!(" panose : {:?}", props.panose); + println!(" charset: {:?}", props.charset); + println!(" family : {:?}", props.family); + println!(" pitch : {:?}", props.pitch); + if let Some((u0, u1, u2, u3, c0, c1)) = &props.sig { + println!(" sig : usb=({u0} {u1} {u2} {u3}) csb=({c0} {c1})"); + } + }, + None => { + println!("properties : "); + }, + } + + Ok(()) +} diff --git a/crates/litchi-fonts/examples/subset_glyphs.rs b/crates/litchi-fonts/examples/subset_glyphs.rs new file mode 100644 index 0000000..fe71921 --- /dev/null +++ b/crates/litchi-fonts/examples/subset_glyphs.rs @@ -0,0 +1,130 @@ +//! Demonstrate glyph collection and font subsetting. +//! +//! Run with: +//! +//! ```bash +//! cargo run -p litchi-fonts --example subset_glyphs +//! ``` +//! +//! This example: +//! 1. Loads a system font (with graceful fallbacks), +//! 2. Implements `CollectGlyphs` on a tiny in-memory document type to +//! show how callers integrate with the trait, +//! 3. Builds a `RoaringBitmap` of code points used in a sample string, +//! 4. Maps a handful of code points to glyph IDs and runs the +//! concrete `AllsortsSubsetter` to produce a smaller font blob. +//! +//! Like `load_font.rs`, the example exits cleanly when no candidate font +//! is available on the host so it remains usable in CI. + +use std::collections::HashMap; + +use litchi_fonts::{ + AllsortsSubsetter, CollectGlyphs, FontData, FontError, FontLoader, FontSubsetter, +}; +use roaring::RoaringBitmap; + +const CANDIDATE_FAMILIES: &[&str] = &[ + "Arial", + "Helvetica", + "DejaVu Sans", + "Liberation Sans", + "Times New Roman", + "Sans", +]; + +/// A trivial document representation: each entry is a `(font name, text run)` +/// pair. Implementing `CollectGlyphs` shows the shape of the trait that +/// real document parsers (docx, pptx, ...) provide. +struct SimpleDocument<'a> { + runs: Vec<(&'a str, &'a str)>, +} + +impl CollectGlyphs for SimpleDocument<'_> { + fn collect_glyphs(&self) -> HashMap { + let mut out: HashMap = HashMap::new(); + for (font_name, text) in &self.runs { + let bitmap = out.entry((*font_name).to_string()).or_default(); + for ch in text.chars() { + bitmap.insert(ch as u32); + } + } + out + } +} + +fn try_load(loader: &FontLoader, families: &[&str]) -> Option { + for family in families { + match loader.load_system_font(family) { + Ok(font) => { + println!("Loaded font family: {family}"); + return Some(font); + }, + Err(FontError::NotFound(_)) => continue, + Err(err) => { + println!(" - error loading '{family}': {err}"); + }, + } + } + None +} + +fn main() -> Result<(), Box> { + // 1. Demonstrate `CollectGlyphs` on a fake document, regardless of + // whether any system font is installed. This part always runs. + let doc = SimpleDocument { + runs: vec![ + ("BodyFont", "The quick brown fox jumps over the lazy dog."), + ("BodyFont", "Sphinx of black quartz, judge my vow."), + ("HeadingFont", "Hello, world!"), + ], + }; + let glyph_map = doc.collect_glyphs(); + println!("CollectGlyphs result:"); + for (font_name, bitmap) in &glyph_map { + println!( + " font '{font_name}' -> {} unique code points", + bitmap.len() + ); + } + + // 2. Try to load a real system font for the subsetting demo. + let loader = FontLoader::new(); + let Some(font) = try_load(&loader, CANDIDATE_FAMILIES) else { + println!(); + println!( + "No candidate fonts could be loaded on this system; \ + skipping subsetting demo. Tried: {CANDIDATE_FAMILIES:?}" + ); + return Ok(()); + }; + + println!(); + println!("Original font data: {} bytes", font.data.len()); + + // 3. Build a small set of glyph IDs to keep. We pick a tiny set + // deliberately so the size reduction is obvious. + // Glyph 0 is always `.notdef` and must be present in a valid font; + // keep a handful of low IDs that exist in essentially every font. + let glyph_ids: Vec = (0u16..16).collect(); + + // 4. Run the concrete subsetter. The `Pdf` subset profile inside the + // impl is permissive enough to work with most TrueType/OpenType + // fonts; if it fails we report and exit gracefully. + let subsetter = AllsortsSubsetter::new(); + match subsetter.subset(&font, &glyph_ids) { + Ok(subset_bytes) => { + println!( + "Subset font ({} glyph IDs): {} bytes ({:.1}% of original)", + glyph_ids.len(), + subset_bytes.len(), + 100.0 * (subset_bytes.len() as f64) / (font.data.len() as f64), + ); + }, + Err(err) => { + println!("Subsetting failed (this can happen with CFF or unusual fonts): {err}"); + }, + } + + Ok(()) +} diff --git a/crates/litchi-fonts/src/lib.rs b/crates/litchi-fonts/src/lib.rs new file mode 100644 index 0000000..6dbfffa --- /dev/null +++ b/crates/litchi-fonts/src/lib.rs @@ -0,0 +1,58 @@ +//! Font discovery, loading, and subsetting for the Litchi office-formats library. + +use std::collections::HashMap; + +use roaring::RoaringBitmap; + +pub mod loader; +pub mod subsetter; + +pub use loader::*; +pub use subsetter::*; + +/// Trait for document types that can collect all glyphs (characters) used in the document. +/// +/// This is used to determine which fonts need to be embedded and which glyphs +/// should be included in font subsets. +/// +/// Uses `RoaringBitmap` instead of `HashSet` for better cache locality and memory efficiency. +/// The bitmap stores Unicode code points (u32 values from chars). +pub trait CollectGlyphs { + /// Returns a map of font names to the set of character code points used with that font. + fn collect_glyphs(&self) -> HashMap; +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FontData { + pub name: String, + pub data: Vec, + pub index: u32, + pub properties: Option, +} + +/// Font properties needed for Office font embedding +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FontProperties { + pub panose: Option, + pub charset: Option, + pub family: Option, + pub pitch: Option, + /// Unicode signature (usb0, usb1, usb2, usb3, csb0, csb1) + pub sig: Option<(String, String, String, String, String, String)>, +} + +pub trait FontSubsetter { + fn subset(&self, font: &FontData, glyph_ids: &[u16]) -> Result, FontError>; +} + +#[derive(Debug, thiserror::Error)] +pub enum FontError { + #[error("Font not found: {0}")] + NotFound(String), + #[error("Invalid font data")] + InvalidData, + #[error("Subsetting failed: {0}")] + SubsettingFailed(String), + #[error("IO error: {0}")] + Io(#[from] std::io::Error), +} diff --git a/src/fonts/loader.rs b/crates/litchi-fonts/src/loader.rs similarity index 99% rename from src/fonts/loader.rs rename to crates/litchi-fonts/src/loader.rs index 6648b48..55288dc 100644 --- a/src/fonts/loader.rs +++ b/crates/litchi-fonts/src/loader.rs @@ -3,7 +3,7 @@ use font_kit::handle::Handle; use font_kit::properties::Properties; use font_kit::source::SystemSource; -use crate::fonts::{FontData, FontError, FontProperties}; +use crate::{FontData, FontError, FontProperties}; pub struct FontLoader { source: SystemSource, diff --git a/src/fonts/subsetter.rs b/crates/litchi-fonts/src/subsetter.rs similarity index 95% rename from src/fonts/subsetter.rs rename to crates/litchi-fonts/src/subsetter.rs index fdb9336..bd405e2 100644 --- a/src/fonts/subsetter.rs +++ b/crates/litchi-fonts/src/subsetter.rs @@ -4,7 +4,7 @@ use allsorts::{ subset::{CmapTarget, SubsetProfile, subset}, }; -use crate::fonts::{FontData, FontError, FontSubsetter}; +use crate::{FontData, FontError, FontSubsetter}; pub struct AllsortsSubsetter; diff --git a/crates/litchi-formula/Cargo.toml b/crates/litchi-formula/Cargo.toml new file mode 100644 index 0000000..1783a13 --- /dev/null +++ b/crates/litchi-formula/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "litchi-formula" +description = "MathType/MathML to LaTeX formula conversion for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +atoi_simd = { workspace = true } +bumpalo = { workspace = true } +fast-float2 = { workspace = true } +memchr = { workspace = true } +phf = { workspace = true } +quick-xml = { workspace = true } +rowan = { workspace = true } +smallvec = { workspace = true } +zerocopy = { workspace = true } +zerocopy-derive = { workspace = true } diff --git a/crates/litchi-formula/README.md b/crates/litchi-formula/README.md new file mode 100644 index 0000000..a2a95b8 --- /dev/null +++ b/crates/litchi-formula/README.md @@ -0,0 +1,43 @@ +# litchi-formula + +Mathematical formula parsing and conversion between OMML, MTEF, and LaTeX. + +## Overview + +`litchi-formula` provides parsers and converters for the math formats +encountered in Office documents: **OMML** (Office Math Markup Language, +used in `.docx`/`.pptx`), **MTEF** (the binary MathType Equation Format +embedded in legacy `.doc`/`.ppt` files), and **LaTeX** as the canonical +output. Internally it uses an arena-allocated AST so OMML and MTEF input +share the same conversion path. It is part of the +[Litchi](https://github.com/DevExzh/litchi) workspace. + +## Usage + +```toml +[dependencies] +litchi-formula = "0.0.1" +``` + +```rust +use litchi_formula::{omml_to_latex, mtef_to_latex, FormulaError}; + +fn convert_examples(mtef_bytes: &[u8]) -> Result<(String, String), FormulaError> { + let from_omml = omml_to_latex("x")?; + let from_mtef = mtef_to_latex(mtef_bytes)?; + Ok((from_omml, from_mtef)) +} +``` + +## Features + +- OMML parser (`OmmlParser`) for modern Office math markup +- MTEF parser (`MtefParser`) for legacy MathType binary streams +- LaTeX writer (`LatexConverter`) over a shared arena-allocated AST +- One-shot helpers: `omml_to_latex`, `mtef_to_latex` +- Unified `FormulaError` covering all parser/converter failures + +## License + +Licensed under the Apache License, Version 2.0. Part of the +[Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-formula/examples/build_ast_manually.rs b/crates/litchi-formula/examples/build_ast_manually.rs new file mode 100644 index 0000000..4498a6c --- /dev/null +++ b/crates/litchi-formula/examples/build_ast_manually.rs @@ -0,0 +1,86 @@ +//! Build a small formula AST by hand and convert it to LaTeX. +//! +//! Run with: +//! +//! ```bash +//! cargo run -p litchi-formula --example build_ast_manually --all-features +//! ``` +//! +//! This example skips the OMML/MTEF parsers entirely. It uses +//! [`Formula::new`] together with [`FormulaBuilder`] (and the [`MathNode`] +//! enum directly) to construct a small AST in code, then converts it to +//! LaTeX with [`LatexConverter`]. +//! +//! The two formulas built are: +//! 1. `x^2 + 1` (a simple superscript followed by an addition and a +//! number literal). +//! 2. `(a + b) / 2` (a fraction whose numerator is itself a small +//! sub-expression). + +use litchi_formula::{Formula, FormulaBuilder, LatexConverter, MathNode, Operator}; + +fn main() -> Result<(), Box> { + let mut converter = LatexConverter::new(); + + // --------------------------------------------------------------- + // Formula 1: x^2 + 1 + // --------------------------------------------------------------- + // + // We use two `Formula` values: the first owns the arena that backs + // the AST nodes, and the second is what we hand to `LatexConverter`. + // This split mirrors what `omml_to_latex()` does internally and is + // needed because `FormulaBuilder` holds a shared borrow of the arena + // for the whole construction phase, which prevents calling + // `set_root` (a mutable borrow) on the same `Formula`. + let arena_owner = Formula::new(); + let nodes = { + let builder = FormulaBuilder::new(arena_owner.arena()); + + // x^2 -- a Power node whose base is `x` and exponent is `2`. + let power = builder.power(vec![builder.text("x")], vec![builder.number("2")]); + + // The trailing "+ 1" is rendered as an Operator followed by a + // number literal. Using `MathNode::Operator` (rather than raw + // `Text`) lets the LaTeX backend pick the right spacing. + let plus = MathNode::Operator(Operator::Plus); + let one = builder.number("1"); + + vec![power, plus, one] + }; + let mut formula = Formula::new(); + formula.set_root(nodes); + println!("Formula 1 : x^2 + 1"); + println!("LaTeX : {}", converter.convert(&formula)?); + println!(); + + // --------------------------------------------------------------- + // Formula 2: (a + b) / 2 + // --------------------------------------------------------------- + let arena_owner = Formula::new(); + let nodes = { + let builder = FormulaBuilder::new(arena_owner.arena()); + + // Numerator: `a + b`, written as three nodes that flow inline. + let numerator: Vec = vec![ + builder.text("a"), + MathNode::Operator(Operator::Plus), + builder.text("b"), + ]; + + // Denominator: just a single number. + let denominator: Vec = vec![builder.number("2")]; + + vec![builder.frac(numerator, denominator)] + }; + let mut formula = Formula::new(); + formula.set_root(nodes); + println!("Formula 2 : (a + b) / 2"); + println!("LaTeX : {}", converter.convert(&formula)?); + + // Keep `arena_owner` values alive until the end so the nodes they + // back outlive the converter's reads. (In practice they are dropped + // here at the end of `main`.) + drop(arena_owner); + + Ok(()) +} diff --git a/crates/litchi-formula/examples/mtef_to_latex.rs b/crates/litchi-formula/examples/mtef_to_latex.rs new file mode 100644 index 0000000..3ef8276 --- /dev/null +++ b/crates/litchi-formula/examples/mtef_to_latex.rs @@ -0,0 +1,107 @@ +//! Convert MTEF (MathType Equation Format) binary data to LaTeX. +//! +//! Run with: +//! +//! ```bash +//! # use the bundled inline sample bytes +//! cargo run -p litchi-formula --example mtef_to_latex --all-features +//! +//! # or pass a path to a file containing raw MTEF bytes +//! cargo run -p litchi-formula --example mtef_to_latex --all-features -- /path/to/equation.mtef +//! ``` +//! +//! When invoked without arguments, this example feeds a minimal valid MTEF +//! header (copied from the `litchi-formula` test suite) through +//! [`MtefParser`] to demonstrate version-info extraction and end-to-end +//! conversion via [`mtef_to_latex`]. When a path is provided, the bytes at +//! that path are read and converted instead. + +use std::fs; +use std::path::PathBuf; + +use litchi_formula::{Formula, MtefParser, mtef_to_latex}; + +/// A minimal but structurally valid MTEF byte sequence. +/// +/// Lifted from `crates/litchi-formula/src/mtef/mod.rs::tests::test_mtef_parser_with_valid_header`. +/// It contains the 28-byte OLE wrapper, an MTEF v5 header identifying +/// MathType on Windows, and just enough body (a SIZE record followed by +/// an END record) to terminate parsing cleanly. +const SAMPLE_MTEF: &[u8] = &[ + // OLE header (28 bytes) + 0x1C, 0x00, // cb_hdr = 28 + 0x00, 0x00, 0x02, 0x00, // version = 0x00020000 (little endian) + 0xD3, 0xC2, // format = 0xC2D3 + 0x0B, 0x00, 0x00, 0x00, // size = 11 (MTEF header + minimal content) + 0x00, 0x00, 0x00, 0x00, // reserved[0] + 0x00, 0x00, 0x00, 0x00, // reserved[1] + 0x00, 0x00, 0x00, 0x00, // reserved[2] + 0x00, 0x00, 0x00, 0x00, // reserved[3] + // MTEF header with signature + 0x28, 0x04, 0x6D, 0x74, // signature "(\x04mt" + 0x05, // version = 5 + 0x01, // platform = 1 (Windows) + 0x01, // product = 1 (MathType) + 0x01, // version = 1 + 0x00, // version_sub = 0 + 0x00, // application_key (empty null-terminated string) + 0x00, // inline = 0 + // Minimal MTEF content (SIZE + END tags) + 0x09, // SIZE tag + 0x00, // END tag +]; + +fn main() -> Result<(), Box> { + let arg = std::env::args().nth(1).map(PathBuf::from); + + let (source_label, bytes): (String, Vec) = match arg { + Some(path) => { + let bytes = fs::read(&path)?; + if bytes.is_empty() { + println!("no input: file `{}` is empty", path.display()); + return Ok(()); + } + (format!("file `{}`", path.display()), bytes) + }, + None => ("inline sample".to_string(), SAMPLE_MTEF.to_vec()), + }; + + println!("Source : {source_label}"); + println!("Byte length : {}", bytes.len()); + + // Inspect the parser without running a full conversion first. The + // arena-tied lifetime means the parser borrows from `bytes`, so we + // create a dedicated `Formula` for this scope. + { + let formula = Formula::new(); + let parser = MtefParser::new(formula.arena(), &bytes); + println!("Parser valid : {}", parser.is_valid()); + if let Some((mtef_version, platform, product, version, sub)) = parser.version_info() { + println!( + "MTEF header : version={mtef_version}, platform={platform}, \ + product={product}, app_version={version}.{sub}" + ); + } else { + println!("MTEF header : (unavailable - data did not pass validation)"); + } + } + + // Now run the full helper to produce a LaTeX string. The minimal + // sample contains no glyphs, so the output for it is essentially an + // empty display-style block - that is intentional and demonstrates + // that the pipeline runs end-to-end without errors. + match mtef_to_latex(&bytes) { + Ok(latex) => { + if latex.is_empty() { + println!("LaTeX : "); + } else { + println!("LaTeX : {latex}"); + } + }, + Err(e) => { + println!("LaTeX : "); + }, + } + + Ok(()) +} diff --git a/crates/litchi-formula/examples/omml_to_latex.rs b/crates/litchi-formula/examples/omml_to_latex.rs new file mode 100644 index 0000000..b4858ee --- /dev/null +++ b/crates/litchi-formula/examples/omml_to_latex.rs @@ -0,0 +1,122 @@ +//! Convert several OMML (Office Math Markup Language) snippets to LaTeX. +//! +//! Run with: +//! +//! ```bash +//! cargo run -p litchi-formula --example omml_to_latex --all-features +//! ``` +//! +//! The example feeds a handful of representative formulas (a Pythagorean +//! identity, a fraction, an integral, and a square root) through +//! [`OmmlParser`] and then through [`LatexConverter`], printing both the +//! input markup and the resulting LaTeX for each one. + +use litchi_formula::{Formula, LatexConverter, OmmlParser, omml_to_latex}; + +fn main() -> Result<(), Box> { + // A handful of representative OMML fragments. + // + // Each fragment is wrapped in a `` root with the standard math + // namespace declared, which is what the parser expects to see at the top + // level of an OMML island inside an OOXML document. + let samples: &[(&str, &str)] = &[ + ( + "x^2 + y^2 = z^2", + r#" + + x + 2 + + + + + y + 2 + + = + + z + 2 + + "#, + ), + ( + "1/2", + r#" + + 1 + 2 + + "#, + ), + ( + "definite integral of x dx from 0 to 1", + r#" + + + 0 + 1 + xdx + + "#, + ), + ( + "square root of (a + b)", + r#" + + + + + a + + + b + + + "#, + ), + ]; + + println!("== Using the high-level helper `omml_to_latex` ==\n"); + for (label, omml) in samples { + println!("Formula : {label}"); + println!("OMML : {}", oneline(omml)); + match omml_to_latex(omml) { + Ok(latex) => println!("LaTeX : {latex}"), + Err(e) => println!("error : {e}"), + } + println!(); + } + + // The same conversion done by hand, showing the building blocks the + // helper composes internally. This is useful when you want to reuse a + // single converter across many formulas to amortize allocations. + println!("== Using `OmmlParser` and `LatexConverter` directly ==\n"); + let mut converter = LatexConverter::new(); + for (label, omml) in samples { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + let nodes = match parser.parse(omml) { + Ok(nodes) => nodes, + Err(e) => { + println!("Formula : {label}"); + println!("error : {e}"); + println!(); + continue; + }, + }; + + let mut formula = Formula::new(); + formula.set_root(nodes); + + let latex = converter.convert(&formula)?; + println!("Formula : {label}"); + println!("LaTeX : {latex}"); + println!(); + } + + Ok(()) +} + +/// Squash whitespace so OMML prints on a single line in the demo output. +fn oneline(s: &str) -> String { + s.split_whitespace().collect::>().join(" ") +} diff --git a/crates/litchi-formula/fuzz/.gitignore b/crates/litchi-formula/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/crates/litchi-formula/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/litchi-formula/fuzz/Cargo.toml b/crates/litchi-formula/fuzz/Cargo.toml new file mode 100644 index 0000000..6d154b5 --- /dev/null +++ b/crates/litchi-formula/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "litchi-formula-fuzz" +version = "0.0.0" +edition = "2024" +publish = false +authors = ["Ryker Zhu "] +license = "Apache-2.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +litchi-formula = { path = ".." } + +[[bin]] +name = "convert_formula" +path = "fuzz_targets/convert_formula.rs" +test = false +doc = false +bench = false + +[profile.release] +debug = 1 +codegen-units = 1 +lto = "thin" + +[workspace] diff --git a/crates/litchi-formula/fuzz/fuzz_targets/convert_formula.rs b/crates/litchi-formula/fuzz/fuzz_targets/convert_formula.rs new file mode 100644 index 0000000..1697f6f --- /dev/null +++ b/crates/litchi-formula/fuzz/fuzz_targets/convert_formula.rs @@ -0,0 +1,8 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + // Primary target: binary MTEF -> LaTeX (takes &[u8] directly). + let _ = litchi_formula::mtef_to_latex(data); +}); diff --git a/crates/litchi-formula/src/ast/builder.rs b/crates/litchi-formula/src/ast/builder.rs new file mode 100644 index 0000000..7be7c3b --- /dev/null +++ b/crates/litchi-formula/src/ast/builder.rs @@ -0,0 +1,97 @@ +use super::node::MathNode; +use bumpalo::Bump; +use std::borrow::Cow; + +/// Builder for constructing formula nodes efficiently +pub struct FormulaBuilder<'arena> { + arena: &'arena Bump, +} + +impl<'arena> FormulaBuilder<'arena> { + /// Create a new builder with the given arena + pub fn new(arena: &'arena Bump) -> Self { + Self { arena } + } + + /// Allocate a string in the arena + pub fn alloc_str(&self, s: &str) -> &'arena str { + self.arena.alloc_str(s) + } + + /// Create a text node + pub fn text(&self, text: impl Into>) -> MathNode<'arena> { + MathNode::Text(text.into()) + } + + /// Create a number node + pub fn number(&self, num: impl Into>) -> MathNode<'arena> { + MathNode::Number(num.into()) + } + + /// Create a fraction node + pub fn frac( + &self, + numerator: Vec>, + denominator: Vec>, + ) -> MathNode<'arena> { + MathNode::Frac { + numerator, + denominator, + line_thickness: None, + frac_type: None, + } + } + + /// Create a square root node + pub fn sqrt(&self, base: Vec>) -> MathNode<'arena> { + MathNode::Root { base, index: None } + } + + /// Create an nth root node + pub fn root( + &self, + base: Vec>, + index: Vec>, + ) -> MathNode<'arena> { + MathNode::Root { + base, + index: Some(index), + } + } + + /// Create a power node + pub fn power( + &self, + base: Vec>, + exponent: Vec>, + ) -> MathNode<'arena> { + MathNode::Power { base, exponent } + } + + /// Create a subscript node + pub fn sub( + &self, + base: Vec>, + subscript: Vec>, + ) -> MathNode<'arena> { + MathNode::Sub { base, subscript } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::Formula; + + #[test] + fn test_builder() { + let formula = Formula::new(); + let builder = FormulaBuilder::new(formula.arena()); + + let node = builder.text("x"); + match node { + MathNode::Text(ref text) => assert_eq!(text, "x"), + _ => panic!("Expected text node"), + } + } +} diff --git a/src/formula/ast/mod.rs b/crates/litchi-formula/src/ast/mod.rs similarity index 100% rename from src/formula/ast/mod.rs rename to crates/litchi-formula/src/ast/mod.rs diff --git a/src/formula/ast/node.rs b/crates/litchi-formula/src/ast/node.rs similarity index 100% rename from src/formula/ast/node.rs rename to crates/litchi-formula/src/ast/node.rs diff --git a/src/formula/ast/types.rs b/crates/litchi-formula/src/ast/types.rs similarity index 100% rename from src/formula/ast/types.rs rename to crates/litchi-formula/src/ast/types.rs diff --git a/crates/litchi-formula/src/latex/conv/converter.rs b/crates/litchi-formula/src/latex/conv/converter.rs new file mode 100644 index 0000000..c216d4c --- /dev/null +++ b/crates/litchi-formula/src/latex/conv/converter.rs @@ -0,0 +1,166 @@ +// LaTeX Converter Implementation +// +// This module contains the LatexConverter struct and its core methods. + +use super::error::LatexError; +use super::utils::estimate_nodes_size; +use super::utils::extend_buffer_with_capacity; +use crate::ast::{Formula, MathNode}; +use crate::latex::{LatexConversionStats, LatexStringCache}; +use smallvec::SmallVec; + +/// LaTeX converter that converts formula AST to LaTeX strings +/// +/// Uses optimized string building and memory management for high performance. +/// Includes SIMD optimizations and efficient buffer management. +pub struct LatexConverter { + /// Buffer for building the LaTeX output with pre-allocated capacity + pub(super) buffer: String, + /// Temporary buffer for complex conversions to avoid allocations + pub(super) temp_buffer: SmallVec<[String; 4]>, + /// String cache for repeated LaTeX commands + pub(super) string_cache: LatexStringCache, + /// Performance statistics + pub(super) stats: LatexConversionStats, +} + +impl LatexConverter { + /// Create a new LaTeX converter with optimized initial capacity + pub fn new() -> Self { + Self { + buffer: String::with_capacity(2048), // Larger initial capacity for better performance + temp_buffer: SmallVec::new(), + string_cache: LatexStringCache::new(), + stats: LatexConversionStats::default(), + } + // NOTE: Cache initialization removed - it was O(n²) and never used. + // The cache will be populated lazily during conversion as needed. + } + + /// Create a new LaTeX converter with custom initial capacity + pub fn with_capacity(capacity: usize) -> Self { + Self { + buffer: String::with_capacity(capacity), + temp_buffer: SmallVec::new(), + string_cache: LatexStringCache::new(), + stats: LatexConversionStats::default(), + } + // NOTE: Cache initialization removed - lazy population is more efficient + } + + // NOTE: initialize_cache() removed - was O(n²) complexity with 150+ string allocations. + // The cache now populated lazily during conversion, which is more efficient since: + // 1. Avoids upfront cost when converter is created but not used + // 2. Only caches strings that are actually needed + // 3. Eliminates wasteful linear search through growing cache (O(n²) → O(1) with lazy) + + /// Convert a formula to LaTeX + /// + /// Returns a reference to avoid unnecessary string cloning. + /// + /// # Example + /// ```ignore + /// let converter = LatexConverter::new(); + /// let latex = converter.convert(&formula)?; + /// ``` + pub fn convert(&mut self, formula: &Formula) -> Result<&str, LatexError> { + self.reset(); + + // Reserve additional capacity based on estimated formula size + let estimated_size = super::utils::estimate_formula_size(formula.root()); + extend_buffer_with_capacity(&mut self.buffer, "", estimated_size); + + // Add display style delimiters + if formula.display_style() { + self.buffer.push_str("\\["); + } else { + self.buffer.push_str("\\("); + } + + // Convert all root nodes + for node in formula.root() { + self.convert_node(node)?; + } + + // Close delimiters + if formula.display_style() { + self.buffer.push_str("\\]"); + } else { + self.buffer.push_str("\\)"); + } + + Ok(&self.buffer) + } + + /// Convert nodes without wrapping delimiters + /// + /// Returns a reference to avoid unnecessary string cloning. + pub fn convert_nodes(&mut self, nodes: &[MathNode]) -> Result<&str, LatexError> { + self.reset(); + + // Reserve capacity + let estimated_size = estimate_nodes_size(nodes); + extend_buffer_with_capacity(&mut self.buffer, "", estimated_size); + + for node in nodes { + self.convert_node(node)?; + } + + Ok(&self.buffer) + } + + /// Get the current buffer content without clearing + #[inline] + pub fn buffer(&self) -> &str { + &self.buffer + } + + /// Reset the converter state for a new conversion + #[inline] + pub fn reset(&mut self) { + self.buffer.clear(); + self.temp_buffer.clear(); + self.string_cache.clear(); + // Keep stats for performance monitoring + } + + /// Clear the internal buffers (legacy method) + #[inline] + pub fn clear(&mut self) { + self.reset(); + } + + /// Get performance statistics + #[inline] + pub fn stats(&self) -> &LatexConversionStats { + &self.stats + } + + /// Get a cached LaTeX command string to avoid repeated allocations + #[inline] + #[allow(dead_code)] + fn get_cached_command(&mut self, cmd: &str) -> &str { + let index = self.string_cache.get_or_insert(cmd); + self.string_cache.get(index) + } + + /// Efficiently append a cached LaTeX command to the buffer + #[inline] + pub fn append_cached_command(&mut self, cmd: &str) { + let index = self.string_cache.get_or_insert(cmd); + let cached = self.string_cache.get(index); + self.buffer.push_str(cached); + } +} + +impl Default for LatexConverter { + fn default() -> Self { + Self::new() + } +} + +impl AsRef for LatexConverter { + fn as_ref(&self) -> &str { + &self.buffer + } +} diff --git a/crates/litchi-formula/src/latex/conv/error.rs b/crates/litchi-formula/src/latex/conv/error.rs new file mode 100644 index 0000000..1e8a6ed --- /dev/null +++ b/crates/litchi-formula/src/latex/conv/error.rs @@ -0,0 +1,44 @@ +// Error definitions for LaTeX conversion + +/// Errors that can occur during LaTeX conversion +#[derive(Debug)] +#[non_exhaustive] +pub enum LatexError { + FormatError(String), + InvalidNode(String), +} + +impl std::fmt::Display for LatexError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LatexError::FormatError(msg) => write!(f, "Format error: {}", msg), + LatexError::InvalidNode(msg) => write!(f, "Invalid node: {}", msg), + } + } +} + +impl std::error::Error for LatexError {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_latex_error_display() { + let err = LatexError::FormatError("test format".to_string()); + assert!(err.to_string().contains("Format error")); + assert!(err.to_string().contains("test format")); + + let err = LatexError::InvalidNode("test node".to_string()); + assert!(err.to_string().contains("Invalid node")); + assert!(err.to_string().contains("test node")); + } + + #[test] + fn test_latex_error_debug() { + let err = LatexError::FormatError("test".to_string()); + let debug_str = format!("{:?}", err); + assert!(debug_str.contains("FormatError")); + assert!(debug_str.contains("test")); + } +} diff --git a/crates/litchi-formula/src/latex/conv/matrix.rs b/crates/litchi-formula/src/latex/conv/matrix.rs new file mode 100644 index 0000000..6b09fd0 --- /dev/null +++ b/crates/litchi-formula/src/latex/conv/matrix.rs @@ -0,0 +1,108 @@ +// Matrix conversion logic for LaTeX conversion +// +// This module contains specialized matrix conversion functionality. + +use super::converter::LatexConverter; +use super::error::LatexError; +use super::utils::estimate_matrix_capacity; +use crate::ast::{Alignment, MatrixFence, MatrixProperties}; +use crate::latex::matrix::matrix_fence_to_env; +use std::fmt::Write; + +/// Convert matrix with optimized performance (no temporary converters) +pub fn convert_matrix_optimized_internal( + converter: &mut LatexConverter, + rows: &[Vec>], + fence_type: MatrixFence, + properties: Option<&MatrixProperties>, +) -> Result<(), LatexError> { + if rows.is_empty() { + return Ok(()); + } + + let use_array_env = properties.as_ref().and_then(|p| p.base_alignment).is_some(); + let env = if use_array_env { + "array" + } else { + matrix_fence_to_env(fence_type) + }; + + let mut estimated_capacity = estimate_matrix_capacity(rows); + if use_array_env { + estimated_capacity += 20; + } + converter.buffer.reserve(estimated_capacity); + + if use_array_env { + if let Some(props) = properties { + if let Some(alignment) = props.base_alignment { + write!(converter.buffer, "\\begin{{{}}}", env) + .map_err(|e| LatexError::FormatError(e.to_string()))?; + converter.buffer.push('{'); + let align_char = match alignment { + Alignment::Left => 'l', + Alignment::Center => 'c', + Alignment::Right => 'r', + _ => 'c', + }; + if let Some(num_cols) = rows.first().map(|r| r.len()) { + for _ in 0..num_cols { + converter.buffer.push(align_char); + } + } + converter.buffer.push('}'); + + match fence_type { + MatrixFence::Paren => converter.buffer.push_str("\\left("), + MatrixFence::Bracket => converter.buffer.push_str("\\left["), + MatrixFence::Brace => converter.buffer.push_str("\\left\\{"), + MatrixFence::Pipe => converter.buffer.push_str("\\left|"), + MatrixFence::DoublePipe => converter.buffer.push_str("\\left\\|"), + MatrixFence::None => {}, + } + } else { + let num_cols = rows.first().map(|r| r.len()).unwrap_or(1); + write!(converter.buffer, "\\begin{{{}}}", env) + .map_err(|e| LatexError::FormatError(e.to_string()))?; + converter.buffer.push('{'); + for _ in 0..num_cols { + converter.buffer.push('c'); + } + converter.buffer.push('}'); + } + } + } else { + write!(converter.buffer, "\\begin{{{}}}", env) + .map_err(|e| LatexError::FormatError(e.to_string()))?; + } + + for (i, row) in rows.iter().enumerate() { + if i > 0 { + converter.buffer.push_str(" \\\\ "); + } + for (j, cell) in row.iter().enumerate() { + if j > 0 { + converter.buffer.push_str(" & "); + } + for node in cell { + converter.convert_node(node)?; + } + } + } + + if use_array_env { + match fence_type { + MatrixFence::Paren => converter.buffer.push_str("\\right)"), + MatrixFence::Bracket => converter.buffer.push_str("\\right]"), + MatrixFence::Brace => converter.buffer.push_str("\\right\\}"), + MatrixFence::Pipe => converter.buffer.push_str("\\right|"), + MatrixFence::DoublePipe => converter.buffer.push_str("\\right\\|"), + MatrixFence::None => {}, + } + } + + write!(converter.buffer, "\\end{{{}}}", env) + .map_err(|e| LatexError::FormatError(e.to_string()))?; + + Ok(()) +} diff --git a/src/formula/latex/conv/mod.rs b/crates/litchi-formula/src/latex/conv/mod.rs similarity index 100% rename from src/formula/latex/conv/mod.rs rename to crates/litchi-formula/src/latex/conv/mod.rs diff --git a/src/formula/latex/conv/node.rs b/crates/litchi-formula/src/latex/conv/node.rs similarity index 98% rename from src/formula/latex/conv/node.rs rename to crates/litchi-formula/src/latex/conv/node.rs index e4981af..6a5ff9d 100644 --- a/src/formula/latex/conv/node.rs +++ b/crates/litchi-formula/src/latex/conv/node.rs @@ -5,16 +5,16 @@ use super::converter::LatexConverter; use super::error::LatexError; -use crate::formula::ast::{ +use crate::ast::{ FunctionName, LimitType, MathNode, Position, PredefinedSymbol, VerticalAlignment, }; -use crate::formula::latex::operators::{ +use crate::latex::operators::{ accent_to_latex, fence_to_latex, is_standard_function, large_operator_to_latex, operator_to_latex, space_to_latex, style_to_latex, }; -use crate::formula::latex::symbols::convert_symbol; -use crate::formula::latex::templates::needs_grouping_for_scripts; -use crate::formula::latex::utils::{ +use crate::latex::symbols::convert_symbol; +use crate::latex::templates::needs_grouping_for_scripts; +use crate::latex::utils::{ escape_latex_special_chars, is_valid_number_fast, needs_latex_protection, }; use std::fmt::Write; diff --git a/crates/litchi-formula/src/latex/conv/utils.rs b/crates/litchi-formula/src/latex/conv/utils.rs new file mode 100644 index 0000000..2fca093 --- /dev/null +++ b/crates/litchi-formula/src/latex/conv/utils.rs @@ -0,0 +1,298 @@ +// Performance utilities for LaTeX conversion +// +// This module contains optimized helper functions for high-performance +// LaTeX conversion operations using SIMD and efficient algorithms. + +use crate::ast::MathNode; +use memchr::memchr; + +/// Fast check if a string represents a valid number using SIMD +#[inline] +#[allow(dead_code)] +pub fn is_valid_number_fast(s: &str) -> bool { + if s.is_empty() { + return false; + } + + let bytes = s.as_bytes(); + let mut has_digit = false; + let mut has_dot = false; + + // Use SIMD-friendly loop + for &b in bytes { + match b { + b'0'..=b'9' => has_digit = true, + b'.' => { + if has_dot { + return false; // Multiple dots + } + has_dot = true; + }, + b'-' if bytes.len() == 1 => return false, // Just a minus sign + b'-' => {}, // Allow negative numbers at start + _ => return false, // Invalid character + } + } + + has_digit // Must have at least one digit +} + +/// SIMD-accelerated check if a character sequence contains LaTeX special characters +#[inline] +pub fn contains_latex_special_simd(text: &str) -> bool { + let bytes = text.as_bytes(); + + // Use memchr for common special characters + memchr(b' ', bytes).is_some() + || memchr(b'#', bytes).is_some() + || memchr(b'$', bytes).is_some() + || memchr(b'%', bytes).is_some() + || memchr(b'&', bytes).is_some() + || memchr(b'_', bytes).is_some() + || memchr(b'{', bytes).is_some() + || memchr(b'}', bytes).is_some() + || memchr(b'~', bytes).is_some() + || memchr(b'^', bytes).is_some() + || memchr(b'\\', bytes).is_some() +} + +/// SIMD-accelerated LaTeX special character escaping +/// Returns true if escaping was needed +#[inline] +#[allow(dead_code)] +pub fn escape_latex_special_chars(text: &str, buffer: &mut String) -> bool { + if !contains_latex_special_simd(text) { + buffer.push_str(text); + return false; + } + + // Need to escape - process character by character + for ch in text.chars() { + match ch { + ' ' | '#' | '$' | '%' | '&' | '_' | '{' | '}' | '~' | '^' | '\\' => { + buffer.push('\\'); + buffer.push(ch); + }, + _ => buffer.push(ch), + } + } + true +} + +/// Fast buffer extension with capacity management +#[inline] +pub fn extend_buffer_with_capacity(buffer: &mut String, text: &str, additional_capacity: usize) { + if buffer.capacity() < buffer.len() + text.len() + additional_capacity { + buffer.reserve(text.len() + additional_capacity); + } + buffer.push_str(text); +} + +/// Fast check if text needs LaTeX protection (contains spaces or special chars) +#[inline] +pub fn needs_latex_protection(text: &str) -> bool { + if text.is_empty() { + return true; + } + + // Quick check for spaces + if memchr(b' ', text.as_bytes()).is_some() { + return true; + } + + // Check for other special characters + contains_latex_special_simd(text) +} + +/// Check if base needs grouping for scripts (subscript/superscript) +#[inline] +#[allow(dead_code)] +pub fn needs_grouping_for_scripts(nodes: &[MathNode]) -> bool { + nodes.len() > 1 +} + +/// Estimate the output size of a formula for buffer pre-allocation +pub fn estimate_formula_size(nodes: &[MathNode]) -> usize { + estimate_nodes_size(nodes) + 10 // Add space for delimiters +} + +/// Estimate the output size of nodes for buffer pre-allocation +pub fn estimate_nodes_size(nodes: &[MathNode]) -> usize { + nodes.iter().map(estimate_node_size).sum() +} + +/// Estimate the output size of a single node +pub fn estimate_node_size(node: &MathNode) -> usize { + match node { + MathNode::Text(text) => { + if needs_latex_protection(text) { + text.len() + 10 // \text{} wrapper + } else { + text.len() + } + }, + MathNode::Number(num) => num.len(), + MathNode::Operator(_) => 5, // Average operator length + MathNode::Symbol(_) => 8, // Average symbol length with escapes + MathNode::Frac { + numerator, + denominator, + .. + } => { + 6 + estimate_nodes_size(numerator) + estimate_nodes_size(denominator) // \frac{}{} + }, + MathNode::Root { base, index } => { + (if index.is_some() { 8 } else { 7 }) + estimate_nodes_size(base) + }, + MathNode::Power { base, exponent } => { + 2 + estimate_nodes_size(base) + estimate_nodes_size(exponent) // ^{} + }, + MathNode::Sub { base, subscript } => { + 2 + estimate_nodes_size(base) + estimate_nodes_size(subscript) // _{} + }, + MathNode::SubSup { + base, + subscript, + superscript, + } => { + 4 + estimate_nodes_size(base) + + estimate_nodes_size(subscript) + + estimate_nodes_size(superscript) // _{}^{} + }, + MathNode::Under { + base, + under, + position: _, + } => { + 10 + estimate_nodes_size(base) + estimate_nodes_size(under) // \underset{}{} + }, + MathNode::Over { + base, + over, + position: _, + } => { + 9 + estimate_nodes_size(base) + estimate_nodes_size(over) // \overset{}{} + }, + MathNode::UnderOver { + base, + under, + over, + position: _, + } => { + 20 + estimate_nodes_size(base) + estimate_nodes_size(under) + estimate_nodes_size(over) // \overset{}{\underset{}{}} + }, + MathNode::Fenced { + open: _, + content, + close: _, + separator: _, + } => { + 12 + estimate_nodes_size(content) // \left...\right... + }, + MathNode::LargeOp { + operator: _, + lower_limit, + upper_limit, + integrand, + hide_lower: _, + hide_upper: _, + } => { + 8 + // operator + lower_limit.as_ref().map_or(0, |l| 2 + estimate_nodes_size(l)) + + upper_limit.as_ref().map_or(0, |u| 2 + estimate_nodes_size(u)) + + integrand.as_ref().map_or(0, |i| 1 + estimate_nodes_size(i)) + }, + MathNode::Function { name, argument } => { + name.len() + 5 + estimate_nodes_size(argument) // \name{} + }, + MathNode::Matrix { rows, .. } => { + 20 + // \begin{matrix}\end{matrix} + rows.len() * 4 + // \\\\ between rows + rows.iter().flatten().flatten().count() * 3 // & between cells and content + }, + MathNode::Accent { base, .. } => { + 8 + estimate_nodes_size(base) // \accent{} + }, + MathNode::Space(_) => 5, // Space commands + MathNode::LineBreak => 2, // \\\\ + MathNode::Style { content, .. } => { + 8 + estimate_nodes_size(content) // \style{} + }, + MathNode::Row(nodes) => estimate_nodes_size(nodes), + MathNode::Phantom(content) => { + 9 + estimate_nodes_size(content) // \phantom{} + }, + MathNode::Error(msg) => { + 15 + msg.len() // \text{[Error: ...]} + }, + MathNode::PredefinedSymbol(_) => 8, // Average predefined symbol length + MathNode::PreSub { + base, + pre_subscript, + } => { + 3 + estimate_nodes_size(base) + estimate_nodes_size(pre_subscript) // \presub{}{} + }, + MathNode::PreSup { + base, + pre_superscript, + } => { + 3 + estimate_nodes_size(base) + estimate_nodes_size(pre_superscript) // \presup{}{} + }, + MathNode::PreSubSup { + base, + pre_subscript, + pre_superscript, + } => { + 5 + estimate_nodes_size(base) + + estimate_nodes_size(pre_subscript) + + estimate_nodes_size(pre_superscript) // \presubsup{}{}{} + }, + MathNode::Bar { base, .. } => { + 6 + estimate_nodes_size(base) // \bar{} + }, + MathNode::BorderBox { content, .. } => { + 12 + estimate_nodes_size(content) // \boxed{} + }, + MathNode::GroupChar { base, .. } => { + 12 + estimate_nodes_size(base) // \overbrace or similar + }, + MathNode::PredefinedFunction { argument, .. } => { + 8 + estimate_nodes_size(argument) // Average function length + }, + MathNode::EqArray { rows, .. } => { + 25 + // \begin{align}\end{align} + rows.len() * 4 + // \\\\ between rows + rows.iter().flatten().count() * 2 // Content size + }, + MathNode::Run { content, .. } => estimate_nodes_size(content), + MathNode::Limit { content, .. } => estimate_nodes_size(content), + MathNode::Degree(content) => estimate_nodes_size(content), + MathNode::Base(content) => estimate_nodes_size(content), + MathNode::Argument(content) => estimate_nodes_size(content), + MathNode::Numerator(content) => estimate_nodes_size(content), + MathNode::Denominator(content) => estimate_nodes_size(content), + MathNode::Integrand(content) => estimate_nodes_size(content), + MathNode::LowerLimit(content) => estimate_nodes_size(content), + MathNode::UpperLimit(content) => estimate_nodes_size(content), + } +} + +/// Estimate capacity needed for matrix conversion to avoid reallocations +pub fn estimate_matrix_capacity(rows: &[Vec>]) -> usize { + if rows.is_empty() { + return 0; + } + + let num_rows = rows.len(); + let num_cols = rows[0].len(); + + // Estimate: environment markers + row separators + column separators + content + let env_overhead = 20; // \begin{matrix}\end{matrix} + let row_separators = (num_rows.saturating_sub(1)) * 4; // " \\\\ " + let col_separators = num_rows * (num_cols.saturating_sub(1)) * 3; // " & " + + // Rough estimate for content (average 5 chars per node) + let content_estimate = rows.iter().flatten().flatten().count() * 5; + + env_overhead + row_separators + col_separators + content_estimate +} diff --git a/crates/litchi-formula/src/latex/matrix.rs b/crates/litchi-formula/src/latex/matrix.rs new file mode 100644 index 0000000..20a790f --- /dev/null +++ b/crates/litchi-formula/src/latex/matrix.rs @@ -0,0 +1,345 @@ +// Matrix conversion to LaTeX +// +// This module handles conversion of matrix nodes to LaTeX format with +// performance optimizations and proper fence handling. + +use super::LatexError; +use crate::ast::{MathNode, MatrixFence}; + +/// Convert matrix fence type to LaTeX environment name +#[inline] +pub fn matrix_fence_to_env(fence_type: MatrixFence) -> &'static str { + match fence_type { + MatrixFence::None => "matrix", + MatrixFence::Paren => "pmatrix", + MatrixFence::Bracket => "bmatrix", + MatrixFence::Brace => "Bmatrix", + MatrixFence::Pipe => "vmatrix", + MatrixFence::DoublePipe => "Vmatrix", + } +} + +/// Convert matrix to LaTeX with optimized string building +/// +/// Uses pre-allocated capacity and efficient string operations for performance. +/// This function provides a public API for matrix conversion with a custom node converter. +/// +/// # Arguments +/// * `buffer` - The output buffer to write LaTeX to +/// * `rows` - Matrix rows containing cells with MathNode vectors +/// * `fence_type` - The type of fence to use (parentheses, brackets, etc.) +/// * `node_converter` - Function to convert individual MathNodes to LaTeX +/// +/// # Performance +/// Pre-allocates buffer capacity and uses efficient string operations. +#[allow(dead_code)] +pub fn convert_matrix( + buffer: &mut String, + rows: &[Vec>], + fence_type: MatrixFence, + node_converter: &dyn Fn(&mut String, &MathNode) -> Result<(), LatexError>, +) -> Result<(), LatexError> { + use std::fmt::Write; + + if rows.is_empty() { + return Ok(()); + } + + let env = matrix_fence_to_env(fence_type); + + // Pre-calculate approximate capacity needed for better performance + let estimated_capacity = estimate_matrix_capacity(rows); + buffer.reserve(estimated_capacity); + + // Begin environment + write!(buffer, "\\begin{{{}}}", env).map_err(|e| LatexError::FormatError(e.to_string()))?; + + // Convert each row + for (i, row) in rows.iter().enumerate() { + if i > 0 { + buffer.push_str(" \\\\ "); + } + + // Convert each cell in the row + for (j, cell) in row.iter().enumerate() { + if j > 0 { + buffer.push_str(" & "); + } + + // Convert all nodes in this cell using the provided converter + for node in cell { + node_converter(buffer, node)?; + } + } + } + + // End environment + write!(buffer, "\\end{{{}}}", env).map_err(|e| LatexError::FormatError(e.to_string()))?; + + Ok(()) +} + +/// Estimate capacity needed for matrix conversion to avoid reallocations +#[allow(dead_code)] +pub fn estimate_matrix_capacity(rows: &[Vec>]) -> usize { + if rows.is_empty() { + return 0; + } + + let num_rows = rows.len(); + let num_cols = rows[0].len(); + + // Estimate: environment markers + row separators + column separators + content + let env_overhead = 20; // \begin{matrix}\end{matrix} + let row_separators = (num_rows.saturating_sub(1)) * 4; // " \\\\ " + let col_separators = num_rows * (num_cols.saturating_sub(1)) * 3; // " & " + + // Rough estimate for content (average 5 chars per node) + let content_estimate = rows.iter().flatten().flatten().count() * 5; + + env_overhead + row_separators + col_separators + content_estimate +} + +/// Convert matrix with alignment specification for columns +/// +/// For matrices that require specific column alignment (left, center, right). +/// Uses LaTeX array environment instead of standard matrix environments when alignment is specified. +/// +/// # Arguments +/// * `buffer` - The output buffer to write LaTeX to +/// * `rows` - Matrix rows containing cells with MathNode vectors +/// * `fence_type` - The type of fence to use (parentheses, brackets, etc.) +/// * `alignments` - Optional column alignment specifications ('l', 'c', 'r') +/// * `node_converter` - Function to convert individual MathNodes to LaTeX +/// +/// # Performance +/// Pre-allocates buffer capacity and uses efficient string operations. +/// When alignments are specified, uses array environment for precise control. +#[allow(dead_code)] +pub fn convert_matrix_with_alignment( + buffer: &mut String, + rows: &[Vec>], + fence_type: MatrixFence, + alignments: Option<&[char]>, + node_converter: &dyn Fn(&mut String, &MathNode) -> Result<(), LatexError>, +) -> Result<(), LatexError> { + use std::fmt::Write; + + if rows.is_empty() { + return Ok(()); + } + + // Determine if we need alignment-specific environment + let use_array_env = alignments.is_some(); + + let env = if use_array_env { + // Use array environment for alignment control + "array" + } else { + matrix_fence_to_env(fence_type) + }; + + // Pre-calculate capacity with extra space for alignment specifications + let mut estimated_capacity = estimate_matrix_capacity(rows); + if use_array_env { + estimated_capacity += 20; // Extra for alignment spec + } + buffer.reserve(estimated_capacity); + + // Begin environment + if use_array_env { + // For array environment, we need alignment specification + if let Some(aligns) = alignments { + write!(buffer, "\\begin{{{}}}", env) + .map_err(|e| LatexError::FormatError(e.to_string()))?; + buffer.push('{'); + for &align in aligns { + buffer.push(align); + } + buffer.push('}'); + } else { + // Default to centered alignment if array but no alignments specified + let num_cols = rows[0].len(); + write!(buffer, "\\begin{{{}}}", env) + .map_err(|e| LatexError::FormatError(e.to_string()))?; + buffer.push('{'); + for _ in 0..num_cols { + buffer.push('c'); + } + buffer.push('}'); + } + + // Add fence manually for array environment + match fence_type { + MatrixFence::Paren => buffer.push_str("\\left("), + MatrixFence::Bracket => buffer.push_str("\\left["), + MatrixFence::Brace => buffer.push_str("\\left\\{"), + MatrixFence::Pipe => buffer.push_str("\\left|"), + MatrixFence::DoublePipe => buffer.push_str("\\left\\|"), + MatrixFence::None => {}, // No fence + } + } else { + write!(buffer, "\\begin{{{}}}", env).map_err(|e| LatexError::FormatError(e.to_string()))?; + } + + // Convert each row + for (i, row) in rows.iter().enumerate() { + if i > 0 { + buffer.push_str(" \\\\ "); + } + + // Convert each cell in the row + for (j, cell) in row.iter().enumerate() { + if j > 0 { + buffer.push_str(" & "); + } + + // Convert all nodes in this cell using the provided converter + for node in cell { + node_converter(buffer, node)?; + } + } + } + + // Close fence for array environment + if use_array_env { + match fence_type { + MatrixFence::Paren => buffer.push_str("\\right)"), + MatrixFence::Bracket => buffer.push_str("\\right]"), + MatrixFence::Brace => buffer.push_str("\\right\\}"), + MatrixFence::Pipe => buffer.push_str("\\right|"), + MatrixFence::DoublePipe => buffer.push_str("\\right\\|"), + MatrixFence::None => {}, // No fence + } + } + + // End environment + write!(buffer, "\\end{{{}}}", env).map_err(|e| LatexError::FormatError(e.to_string()))?; + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::{MathNode, Operator}; + + fn dummy_converter(buffer: &mut String, node: &MathNode) -> Result<(), LatexError> { + match node { + MathNode::Number(n) => buffer.push_str(n), + MathNode::Operator(op) => buffer.push_str(match op { + Operator::Plus => "+", + Operator::Minus => "-", + _ => "?", + }), + _ => buffer.push('?'), + } + Ok(()) + } + + #[test] + fn test_matrix_fence_to_env() { + assert_eq!(matrix_fence_to_env(MatrixFence::None), "matrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Paren), "pmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Bracket), "bmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Brace), "Bmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Pipe), "vmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::DoublePipe), "Vmatrix"); + } + + #[test] + fn test_convert_simple_matrix() { + let mut buffer = String::new(); + let rows = vec![ + vec![ + vec![MathNode::Number("1".into())], + vec![MathNode::Number("2".into())], + ], + vec![ + vec![MathNode::Number("3".into())], + vec![MathNode::Number("4".into())], + ], + ]; + + convert_matrix(&mut buffer, &rows, MatrixFence::Bracket, &dummy_converter).unwrap(); + + assert_eq!(buffer, "\\begin{bmatrix}1 & 2 \\\\ 3 & 4\\end{bmatrix}"); + } + + #[test] + fn test_convert_empty_matrix() { + let mut buffer = String::new(); + let rows: Vec>> = vec![]; + + convert_matrix(&mut buffer, &rows, MatrixFence::None, &dummy_converter).unwrap(); + + assert_eq!(buffer, ""); + } + + #[test] + fn test_convert_matrix_with_alignment() { + let mut buffer = String::new(); + let rows = vec![ + vec![ + vec![MathNode::Number("1".into())], + vec![MathNode::Number("2".into())], + vec![MathNode::Number("3".into())], + ], + vec![ + vec![MathNode::Number("4".into())], + vec![MathNode::Number("5".into())], + vec![MathNode::Number("6".into())], + ], + ]; + let alignments = vec!['l', 'c', 'r']; + + convert_matrix_with_alignment( + &mut buffer, + &rows, + MatrixFence::Bracket, + Some(&alignments), + &dummy_converter, + ) + .unwrap(); + + // Should use array environment with alignment specification + assert!(buffer.contains("\\begin{array}{lcr}")); + assert!(buffer.contains("\\left[")); + assert!(buffer.contains("\\right]")); + assert!(buffer.contains("\\end{array}")); + assert!(buffer.contains("1 & 2 & 3")); + assert!(buffer.contains("4 & 5 & 6")); + } + + #[test] + fn test_convert_matrix_with_alignment_default() { + let mut buffer = String::new(); + let rows = vec![vec![ + vec![MathNode::Number("1".into())], + vec![MathNode::Number("2".into())], + ]]; + + // No alignments specified, should use standard matrix environment + convert_matrix_with_alignment( + &mut buffer, + &rows, + MatrixFence::Paren, + None, + &dummy_converter, + ) + .unwrap(); + + assert!(buffer.contains("\\begin{pmatrix}")); + assert!(buffer.contains("\\end{pmatrix}")); + } + + #[test] + fn test_matrix_fence_to_env_comprehensive() { + assert_eq!(matrix_fence_to_env(MatrixFence::None), "matrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Paren), "pmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Bracket), "bmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Brace), "Bmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::Pipe), "vmatrix"); + assert_eq!(matrix_fence_to_env(MatrixFence::DoublePipe), "Vmatrix"); + } +} diff --git a/src/formula/latex/mod.rs b/crates/litchi-formula/src/latex/mod.rs similarity index 100% rename from src/formula/latex/mod.rs rename to crates/litchi-formula/src/latex/mod.rs diff --git a/src/formula/latex/operators.rs b/crates/litchi-formula/src/latex/operators.rs similarity index 99% rename from src/formula/latex/operators.rs rename to crates/litchi-formula/src/latex/operators.rs index 37976da..25a8115 100644 --- a/src/formula/latex/operators.rs +++ b/crates/litchi-formula/src/latex/operators.rs @@ -1,6 +1,6 @@ // Operator and symbol conversion to LaTeX -use crate::formula::ast::{AccentType, Fence, LargeOperator, Operator, SpaceType, StyleType}; +use crate::ast::{AccentType, Fence, LargeOperator, Operator, SpaceType, StyleType}; /// Convert operator to LaTeX string pub fn operator_to_latex(op: Operator) -> &'static str { diff --git a/src/formula/latex/symbols.rs b/crates/litchi-formula/src/latex/symbols.rs similarity index 99% rename from src/formula/latex/symbols.rs rename to crates/litchi-formula/src/latex/symbols.rs index 1d80fdf..8d21b27 100644 --- a/src/formula/latex/symbols.rs +++ b/crates/litchi-formula/src/latex/symbols.rs @@ -5,7 +5,7 @@ // Uses efficient lookup tables and SIMD operations for performance. use super::LatexError; -use crate::formula::ast::Symbol; +use crate::ast::Symbol; // Static lookup table for common Greek letters and symbols static GREEK_SYMBOLS: phf::Map<&'static str, &'static str> = phf::phf_map! { @@ -276,7 +276,7 @@ pub fn convert_symbol(buffer: &mut String, symbol: &Symbol) -> Result<(), LatexE #[cfg(test)] mod tests { use super::*; - use crate::formula::ast::Symbol; + use crate::ast::Symbol; use std::borrow::Cow; #[test] diff --git a/crates/litchi-formula/src/latex/templates.rs b/crates/litchi-formula/src/latex/templates.rs new file mode 100644 index 0000000..b83a6f3 --- /dev/null +++ b/crates/litchi-formula/src/latex/templates.rs @@ -0,0 +1,34 @@ +// Template conversion helpers for LaTeX + +use crate::ast::MathNode; + +/// Check if base needs grouping for scripts (subscript/superscript) +#[inline] +pub fn needs_grouping_for_scripts(nodes: &[MathNode]) -> bool { + nodes.len() > 1 +} + +#[cfg(test)] +mod tests { + use super::*; + use std::borrow::Cow; + + #[test] + fn test_needs_grouping_for_scripts() { + // Empty slice - no grouping needed + let empty: &[MathNode] = &[]; + assert!(!needs_grouping_for_scripts(empty)); + + // Single element - no grouping needed + let single: &[MathNode<'_>] = &[MathNode::Text(Cow::Borrowed("x"))]; + assert!(!needs_grouping_for_scripts(single)); + + // Multiple elements - grouping needed + let multiple: &[MathNode<'_>] = &[ + MathNode::Text(Cow::Borrowed("x")), + MathNode::Operator(crate::ast::Operator::Plus), + MathNode::Text(Cow::Borrowed("y")), + ]; + assert!(needs_grouping_for_scripts(multiple)); + } +} diff --git a/src/formula/latex/utils.rs b/crates/litchi-formula/src/latex/utils.rs similarity index 100% rename from src/formula/latex/utils.rs rename to crates/litchi-formula/src/latex/utils.rs diff --git a/crates/litchi-formula/src/lib.rs b/crates/litchi-formula/src/lib.rs new file mode 100644 index 0000000..95e0d0f --- /dev/null +++ b/crates/litchi-formula/src/lib.rs @@ -0,0 +1,190 @@ +#![allow(missing_docs)] +// Formula Module - Mathematical Formula Parsing and Conversion +// +// This module provides comprehensive support for parsing and converting +// mathematical formulas between different formats: +// +// - **OMML** (Office Math Markup Language): XML-based format used in modern Office files +// - **LaTeX**: Standard mathematical typesetting format +// - **MTEF** (MathType Equation Format): Binary format used in legacy OLE files +// +// The module uses a common Abstract Syntax Tree (AST) representation to enable +// efficient conversion between formats. +// +// # Example +// +// ```ignore +// use litchi::formula::{Formula, OmmlParser, LatexConverter}; +// +// // Parse OMML +// let formula = Formula::new(); +// let parser = OmmlParser::new(formula.arena()); +// let nodes = parser.parse("x")?; +// +// // Convert to LaTeX +// let mut formula = Formula::new(); +// formula.set_root(nodes); +// let mut converter = LatexConverter::new(); +// let latex = converter.convert(&formula)?; +// ``` + +/// Abstract Syntax Tree for Mathematical Formulas +/// +/// This module defines a comprehensive AST for representing mathematical formulas +/// that can be parsed from OMML, LaTeX, and MTEF formats and converted between them. +/// +/// The design is inspired by the plurimath Ruby project but adapted for Rust's +/// type system and performance characteristics. +pub mod ast; +/// LaTeX Converter +/// +/// This module converts our formula AST to LaTeX format. +/// LaTeX is a widely-used typesetting system for mathematical formulas. +pub mod latex; +/// MTEF (MathType Equation Format) Parser +/// +/// This module parses the binary MathType Equation Format (MTEF) used in +/// legacy OLE documents (.doc, .ppt, etc.) into our formula AST. +/// +/// MTEF is a private data stream format developed by Design Science for +/// storing mathematical equations. +/// +/// References: +/// - http://rtf2latex2e.sourceforge.net/MTEF5.html +/// - rtf2latex2e source code +mod mtef; +/// OMML (Office Math Markup Language) Parser +/// +/// This module parses Microsoft Office Math Markup Language (OMML) into our AST. +/// OMML is used in modern Office documents (.docx, .pptx, etc.) to represent +/// mathematical formulas. +/// +/// This implementation provides comprehensive OMML parsing with: +/// - High-performance streaming XML parsing +/// - Modular element handlers for different OMML constructs +/// - Comprehensive attribute parsing +/// - Memory-efficient arena-based allocation +/// - Support for all OMML elements and properties +/// +/// Reference: https://devblogs.microsoft.com/math-in-office/officemath/ +mod omml; + +// Re-export public API +pub use ast::{ + AccentType, Fence, Formula, FormulaBuilder, LargeOperator, MathNode, MatrixFence, Operator, + SpaceType, StyleType, Symbol, +}; +pub use latex::{LatexConverter, LatexError}; +pub use mtef::{MtefError, MtefParser}; +pub use omml::{OmmlError, OmmlParser}; + +/// Conversion error that wraps all possible formula errors +#[derive(Debug)] +#[non_exhaustive] +pub enum FormulaError { + Omml(OmmlError), + Latex(LatexError), + Mtef(MtefError), +} + +impl std::fmt::Display for FormulaError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + FormulaError::Omml(e) => write!(f, "OMML error: {}", e), + FormulaError::Latex(e) => write!(f, "LaTeX error: {}", e), + FormulaError::Mtef(e) => write!(f, "MTEF error: {}", e), + } + } +} + +impl std::error::Error for FormulaError {} + +impl From for FormulaError { + fn from(e: OmmlError) -> Self { + FormulaError::Omml(e) + } +} + +impl From for FormulaError { + fn from(e: LatexError) -> Self { + FormulaError::Latex(e) + } +} + +impl From for FormulaError { + fn from(e: MtefError) -> Self { + FormulaError::Mtef(e) + } +} + +/// High-level conversion functions +/// Convert OMML to LaTeX +/// +/// # Example +/// ```ignore +/// let latex = omml_to_latex("x")?; +/// println!("LaTeX: {}", latex); +/// ``` +pub fn omml_to_latex(omml: &str) -> Result { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + let nodes = parser.parse(omml)?; + + let mut formula = Formula::new(); + formula.set_root(nodes); + + let mut converter = LatexConverter::new(); + Ok(converter.convert(&formula)?.to_string()) +} + +/// Convert MTEF binary data to LaTeX +/// +/// # Example +/// ```ignore +/// let latex = mtef_to_latex(mtef_data)?; +/// println!("LaTeX: {}", latex); +/// ``` +pub fn mtef_to_latex(mtef_data: &[u8]) -> Result { + let formula = Formula::new(); + let mut parser = MtefParser::new(formula.arena(), mtef_data); + let nodes = parser.parse()?; + + let mut formula = Formula::new(); + formula.set_root(nodes); + + let mut converter = LatexConverter::new(); + Ok(converter.convert(&formula)?.to_string()) +} + +/// Convert OMML to MTEF (not yet implemented) +/// +/// This function is planned for future implementation. +pub fn omml_to_mtef(_omml: &str) -> Result, FormulaError> { + unimplemented!("OMML to MTEF conversion is not yet implemented") +} + +/// Convert MTEF to OMML (not yet implemented) +/// +/// This function is planned for future implementation. +pub fn mtef_to_omml(_mtef_data: &[u8]) -> Result { + unimplemented!("MTEF to OMML conversion is not yet implemented") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_omml_to_latex() { + let omml = r#"x"#; + let result = omml_to_latex(omml); + assert!(result.is_ok()); + } + + #[test] + fn test_formula_creation() { + let formula = Formula::new(); + assert!(formula.root().is_empty()); + assert!(formula.display_style()); + } +} diff --git a/src/formula/mtef/binary/charset.rs b/crates/litchi-formula/src/mtef/binary/charset.rs similarity index 98% rename from src/formula/mtef/binary/charset.rs rename to crates/litchi-formula/src/mtef/binary/charset.rs index 23930c1..ec6dcd3 100644 --- a/src/formula/mtef/binary/charset.rs +++ b/crates/litchi-formula/src/mtef/binary/charset.rs @@ -7,7 +7,7 @@ //! (Greek, symbols, text, functions, etc.). This module maps typeface/character //! combinations to their LaTeX equivalents. -use crate::formula::mtef::constants::*; +use crate::mtef::constants::*; /// Character set attributes for typeface handling /// @@ -654,12 +654,12 @@ pub fn lookup_character(typeface: usize, character: u16, math_attr: i32) -> Opti // For special cases with math attribute variations (like spaces) // Following rtf2latex2e logic for mode-dependent character lookup - if typeface == 152 && math_attr == crate::formula::mtef::constants::MA_TEXT { + if typeface == 152 && math_attr == crate::mtef::constants::MA_TEXT { let text_key = format!("{}.{}{}", typeface, character, 't'); if let Some(result) = CHAR_LOOKUP_TABLE.get(&text_key) { return Some(*result); } - } else if typeface == 152 && math_attr == crate::formula::mtef::constants::MA_MATH { + } else if typeface == 152 && math_attr == crate::mtef::constants::MA_MATH { let math_key = format!("{}.{}{}", typeface, character, 'm'); if let Some(result) = CHAR_LOOKUP_TABLE.get(&math_key) { return Some(*result); diff --git a/crates/litchi-formula/src/mtef/binary/converter.rs b/crates/litchi-formula/src/mtef/binary/converter.rs new file mode 100644 index 0000000..e66d30a --- /dev/null +++ b/crates/litchi-formula/src/mtef/binary/converter.rs @@ -0,0 +1,1220 @@ +//! MTEF to AST conversion logic +//! +//! This module implements the conversion from parsed MTEF objects to formula AST nodes. +//! Based on rtf2latex2e Eqn_TranslateObjects and related conversion functions. +//! +//! The conversion process involves: +//! - Character translation using typeface lookup tables +//! - Template parsing and AST node construction +//! - Embellishment application +//! - Mode switching (math/text) based on typeface attributes + +use super::charset::*; +use super::objects::*; +use crate::ast::{Fence, LargeOperator, LineStyle, MathNode, MatrixFence}; +use crate::mtef::MtefError; +use crate::mtef::constants::*; +use crate::mtef::templates::{TemplateArgs, TemplateParser}; +use std::borrow::Cow; + +/// Type alias for subscript/superscript parsing result (base, subscript, superscript) +type SubSupResult<'a> = + Result<(Vec>, Vec>, Vec>), MtefError>; + +/// Type alias for large operator parsing result (lower_limit, upper_limit, integrand) +type LargeOpResult<'a> = Result< + ( + Option>>, + Option>>, + Vec>, + ), + MtefError, +>; + +/// Implementation of AST conversion methods for MtefBinaryParser +impl<'arena> super::parser::MtefBinaryParser<'arena> { + pub fn convert_objects_to_ast( + &self, + obj_list: &MtefObjectList, + ) -> Result>, MtefError> { + let mut nodes = Vec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + match obj.tag { + MtefRecordType::Char => { + if let Some(char_obj) = obj.obj_ptr.as_any().downcast_ref::() { + // Special handling based on rtf2latex2e Eqn_TranslateObjects logic + match char_obj.typeface { + 130 => { + // Function typeface - auto-recognize functions + let (node, skip_count) = self.convert_function_to_node(current)?; + nodes.push(node); + // Skip the consumed characters + for _ in 0..skip_count { + current = current.and_then(|c| c.next.as_deref()); + } + continue; + }, + 129 if self.mode != crate::mtef::constants::EQN_MODE_TEXT => { + // Text in math mode + let (node, skip_count) = self.convert_text_run_to_node(current)?; + nodes.push(node); + // Skip the consumed characters + for _ in 0..skip_count { + current = current.and_then(|c| c.next.as_deref()); + } + continue; + }, + _ => { + // Regular character + nodes.push(self.convert_char_to_node(char_obj)?); + }, + } + } + }, + MtefRecordType::Tmpl => { + if let Some(tmpl_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_template_to_node(tmpl_obj)?); + } + }, + MtefRecordType::Line => { + if let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + nodes.extend(line_nodes); + } + }, + MtefRecordType::Pile => { + if let Some(pile_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_pile_to_node(pile_obj)?); + } + }, + MtefRecordType::Matrix => { + if let Some(matrix_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_matrix_to_node(matrix_obj)?); + } + }, + MtefRecordType::Font => { + // Font objects affect character rendering but don't generate output + // In a full implementation, this would update the current font context + }, + MtefRecordType::Size + | MtefRecordType::Full + | MtefRecordType::Sub + | MtefRecordType::Sub2 + | MtefRecordType::Sym + | MtefRecordType::SubSym => { + // Size objects affect character size but don't generate output + // In a full implementation, this would update the current size context + }, + _ => { + // Skip other record types for now + }, + } + current = obj.next.as_deref(); + } + + Ok(nodes) + } + + fn convert_char_to_node(&self, char_obj: &MtefChar) -> Result, MtefError> { + let text = self.convert_char_to_text(char_obj).map_err(|e| { + MtefError::ParseError(format!( + "Failed to convert character (typeface={}, char={}): {}", + char_obj.typeface, char_obj.character, e + )) + })?; + Ok(MathNode::Text(text)) + } + + /// Convert a function sequence to a MathNode (handles typeface 130 functions) + fn convert_function_to_node( + &self, + start_obj: Option<&MtefObjectList>, + ) -> Result<(MathNode<'arena>, usize), MtefError> { + use crate::mtef::binary::charset::lookup_function; + + let mut function_name = String::new(); + let mut current = start_obj; + let mut skip_count = 0; + + // Gather function name from consecutive characters with typeface 130 + while let Some(obj) = current { + if let MtefRecordType::Char = obj.tag + && let Some(char_obj) = obj.obj_ptr.as_any().downcast_ref::() + && char_obj.typeface == 130 + && (char_obj.character as u8).is_ascii_alphabetic() + && let Some(ch) = char::from_u32(char_obj.character as u32) + { + function_name.push(ch); + skip_count += 1; + current = obj.next.as_deref(); + continue; + } + break; + } + + if function_name.is_empty() { + return Err(MtefError::ParseError("Empty function name".to_string())); + } + + // Look up the function in the table + let latex_text = if let Some(func) = lookup_function(&function_name) { + Cow::Borrowed(func.trim_end()) // Remove trailing space + } else { + // Fallback: wrap in \mathrm{} + Cow::Owned(format!("\\mathrm{{{}}}", function_name)) + }; + + Ok((MathNode::Text(latex_text), skip_count)) + } + + /// Convert a text run to a MathNode (handles typeface 129 text in math) + fn convert_text_run_to_node( + &self, + start_obj: Option<&MtefObjectList>, + ) -> Result<(MathNode<'arena>, usize), MtefError> { + let mut text_run = String::new(); + let mut current = start_obj; + let mut skip_count = 0; + + // Gather text from consecutive characters with typeface 129, also skip SIZE objects + while let Some(obj) = current { + match obj.tag { + MtefRecordType::Char => { + if let Some(char_obj) = obj.obj_ptr.as_any().downcast_ref::() + && char_obj.typeface == 129 + && let Some(ch) = char::from_u32(char_obj.character as u32) + { + text_run.push(ch); + skip_count += 1; + current = obj.next.as_deref(); + continue; + } + break; + }, + MtefRecordType::Size + | MtefRecordType::Full + | MtefRecordType::Sub + | MtefRecordType::Sub2 + | MtefRecordType::Sym + | MtefRecordType::SubSym => { + // Skip size objects + skip_count += 1; + current = obj.next.as_deref(); + continue; + }, + _ => break, + } + } + + // Wrap text in \text{} for LaTeX + let latex_text = format!("\\text{{{}}}", text_run); + Ok((MathNode::Text(Cow::Owned(latex_text)), skip_count)) + } + + fn convert_char_to_text(&self, char_obj: &MtefChar) -> Result, MtefError> { + // Implement proper character translation based on rtf2latex2e Eqn_GetTexChar logic + let typeface = char_obj.typeface as usize; + let character = char_obj.character; + + let mut _math_attr = 0; // Default math attribute (MA_NONE) + let mut current_mode = self.mode; // Current mode for this character + + // Get base character representation + let mut base_text = if (129..129 + NUM_TYPEFACE_SLOTS).contains(&typeface) { + let charset_index = typeface - 129; + let charset_atts = get_charset_attributes(charset_index); + + _math_attr = charset_atts.math_attr; + + // Handle mode switching based on _math_attr (following rtf2latex2e logic) + let _mode_changed = match _math_attr { + MA_FORCE_TEXT => { + let old_mode = current_mode; + current_mode = EQN_MODE_TEXT; + Some(old_mode) + }, + MA_FORCE_MATH => { + let old_mode = current_mode; + // For forced math mode, use inline if equation is inline, otherwise display + current_mode = if self.inline != 0 { + EQN_MODE_INLINE + } else { + EQN_MODE_DISPLAY + }; + Some(old_mode) + }, + MA_TEXT | MA_MATH => { + // For special case: mode depends on variation (like spaces) + if typeface == 152 && _math_attr == MA_TEXT { + let old_mode = current_mode; + current_mode = EQN_MODE_TEXT; + Some(old_mode) + } else if typeface == 152 && _math_attr == MA_MATH { + let old_mode = current_mode; + current_mode = if self.inline != 0 { + EQN_MODE_INLINE + } else { + EQN_MODE_DISPLAY + }; + Some(old_mode) + } else { + None + } + }, + _ => None, + }; + + // Try character lookup first using PHF map + let lookup_result = if charset_atts.do_lookup { + // Special handling for typefaces with mode-dependent lookups + let lookup_math_attr = if typeface == 152 { + // Space characters have different meanings in math vs text + _math_attr + } else { + _math_attr + }; + + lookup_character(typeface, character, lookup_math_attr) + } else { + None + }; + + if let Some(latex_char) = lookup_result { + latex_char.to_string() + } else if charset_atts.use_codepoint { + self.convert_codepoint(character, typeface)?.to_string() + } else { + format!("\\char{}", character) + } + } else { + // Fallback for unknown typefaces + format!("\\char{}", character) + }; + + // Apply embellishments if present (following rtf2latex2e logic) + if let Some(embellishments) = &char_obj.embellishment_list { + self.apply_embellishments(&mut base_text, embellishments, current_mode)?; + } + + Ok(Cow::Owned(base_text)) + } + + fn apply_embellishments( + &self, + base_text: &mut String, + embellishments: &MtefEmbell, + mode: i32, + ) -> Result<(), MtefError> { + // Apply embellishments to the base character, following rtf2latex2e Eqn_GetTexChar logic + let mut current = Some(embellishments); + + while let Some(embell) = current { + if embell.embell > 0 + && usize::from(embell.embell) + < crate::mtef::binary::charset::EMBELLISHMENT_TEMPLATES.len() + { + let template = get_embellishment_template(embell.embell); + if !template.is_empty() { + // Split template on comma to get math and text versions + // Use appropriate version based on current mode + let template_part = if let Some(comma_pos) = template.find(',') { + if mode != EQN_MODE_TEXT { + &template[..comma_pos] // Math version + } else { + &template[comma_pos + 1..] // Text version + } + } else { + template // Whole template if no comma + }; + + // Replace %1 with the base character + let new_text = template_part.replace("%1", base_text); + *base_text = new_text; + } + } + current = embell.next.as_deref(); + } + + Ok(()) + } + + fn convert_codepoint( + &self, + character: u16, + typeface: usize, + ) -> Result, MtefError> { + // Handle special characters and formatting based on rtf2latex2e logic + if (32..=127).contains(&character) { + let ch = character as u8 as char; + + // Special handling for ampersand + if character == 38 { + // '&' + return Ok(Cow::Borrowed("\\&")); + } + + // Special handling for certain typefaces (like bold) + if typeface == 135 { + // Bold typeface - matches rtf2latex2e logic + return Ok(Cow::Owned(format!("\\mathbf{{{}}}", ch))); + } + + // Regular character + return Ok(Cow::Owned(ch.to_string())); + } + + // For non-ASCII characters, try to convert as Unicode + if let Some(c) = char::from_u32(character as u32) { + Ok(Cow::Owned(c.to_string())) + } else { + // Fallback for unmappable characters + Ok(Cow::Owned(format!("\\char{}", character))) + } + } + + fn convert_template_to_node( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Handle templates based on selector type + // Some templates have specific AST representations, others use generic template parsing + match tmpl_obj.selector { + 0..=9 => { + // Fences (parentheses, brackets, braces, etc.) + self.convert_fence_template(tmpl_obj) + }, + 10 => { + // Root + self.convert_legacy_template(tmpl_obj) + }, + 11 => { + // Fraction + self.convert_legacy_template(tmpl_obj) + }, + 12..=13 => { + // Underline/overline + self.convert_decoration_template(tmpl_obj) + }, + 14 => { + // Arrows + self.convert_arrow_template(tmpl_obj) + }, + 15 | 21 => { + // Integrals + self.convert_legacy_template(tmpl_obj) + }, + 16..=20 => { + // Large operators (sum, product, etc.) + self.convert_large_op_template(tmpl_obj) + }, + 22 => { + // Sum (alternate form) + self.convert_large_op_template(tmpl_obj) + }, + 23 => { + // Limit + self.convert_limit_template(tmpl_obj) + }, + 24..=25 => { + // Horizontal braces + self.convert_brace_template(tmpl_obj) + }, + 27..=29 => { + // Scripts (subscript, superscript, sub+sup) + self.convert_legacy_template(tmpl_obj) + }, + _ => { + // Try template table lookup for unknown templates + let variation = tmpl_obj.variation; + if let Some(template_def) = + TemplateParser::find_template(tmpl_obj.selector, variation) + { + // Parse subobjects into arguments + let args = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_template_arguments(obj_list)? + } else { + smallvec::SmallVec::new() + }; + + // Apply the template + Ok(TemplateParser::parse_template_arguments( + template_def.template, + &args, + )) + } else { + // Fallback for completely unknown templates + Ok(MathNode::Text(Cow::Owned(format!( + "\\unknown_template_{}_{{{}}}", + tmpl_obj.selector, tmpl_obj.variation + )))) + } + }, + } + } + + fn convert_legacy_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Template handling based on MTEF selector values from rtf2latex2e + match tmpl_obj.selector { + 14 => { + // Fraction (ffract) + // Fraction template - should have numerator and denominator subobjects + if let Some(obj_list) = &tmpl_obj.subobject_list { + let (numerator, denominator) = self.parse_fraction_subobjects(obj_list)?; + Ok(TemplateParser::parse_fraction(numerator, denominator)) + } else { + Ok(MathNode::Text(Cow::Borrowed("\\frac{}{}"))) + } + }, + 13 => { + // Root (sqroot/nthroot) + // Root template - may have index and base + if let Some(obj_list) = &tmpl_obj.subobject_list { + let (base, index) = self.parse_root_subobjects(obj_list)?; + Ok(TemplateParser::parse_root( + base, + if index.is_empty() { None } else { Some(index) }, + )) + } else { + Ok(MathNode::Text(Cow::Borrowed("\\sqrt{}"))) + } + }, + 15 => { + // Scripts (super, sub, subsup based on variation) + match tmpl_obj.variation { + 0 => { + // Superscript + if let Some(obj_list) = &tmpl_obj.subobject_list { + let (base, superscript) = + self.parse_superscript_subobjects(obj_list)?; + Ok(TemplateParser::parse_superscript(base, superscript)) + } else { + Ok(MathNode::Text(Cow::Borrowed("^{}"))) + } + }, + 1 => { + // Subscript + if let Some(obj_list) = &tmpl_obj.subobject_list { + let (base, subscript) = self.parse_subscript_subobjects(obj_list)?; + Ok(TemplateParser::parse_subscript(base, subscript)) + } else { + Ok(MathNode::Text(Cow::Borrowed("_{}"))) + } + }, + 2 => { + // Sub+Sup + if let Some(obj_list) = &tmpl_obj.subobject_list { + let (base, subscript, superscript) = + self.parse_subsup_subobjects(obj_list)?; + Ok(TemplateParser::parse_subsup(base, subscript, superscript)) + } else { + Ok(MathNode::Text(Cow::Borrowed("_{}^{}"))) + } + }, + _ => Ok(MathNode::Text(Cow::Borrowed("_{}^{}"))), // fallback + } + }, + 21 => { + // Integrals + // For now, just create a simple integral node + // This should be expanded to handle limits properly + if let Some(obj_list) = &tmpl_obj.subobject_list { + let integrand = self.parse_single_subobject(obj_list)?; + Ok(MathNode::LargeOp { + operator: crate::ast::LargeOperator::Integral, + lower_limit: None, + upper_limit: None, + integrand: Some(integrand), + hide_lower: true, + hide_upper: true, + }) + } else { + Ok(MathNode::Text(Cow::Borrowed("\\int "))) + } + }, + _ => { + // Unknown template - return as placeholder + Ok(MathNode::Text(Cow::Owned(format!( + "\\unknown_template_{}_{{{}}}", + tmpl_obj.selector, tmpl_obj.variation + )))) + }, + } + } + + fn parse_template_arguments( + &self, + obj_list: &MtefObjectList, + ) -> Result, MtefError> { + // Parse template arguments from subobjects + // This follows the rtf2latex2e pattern where arguments are separated by LINE objects + let mut args = TemplateArgs::new(); + let mut current_arg = smallvec::SmallVec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + match obj.tag { + MtefRecordType::Line => { + if let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + current_arg.extend(line_nodes); + } + }, + MtefRecordType::Pile => { + // Piles can separate arguments + if !current_arg.is_empty() { + args.push(current_arg); + current_arg = smallvec::SmallVec::new(); + } + if let Some(pile_obj) = obj.obj_ptr.as_any().downcast_ref::() { + let pile_node = self.convert_pile_to_node(pile_obj)?; + current_arg.push(pile_node); + } + }, + _ => { + // Other objects go into current argument + let nodes = self.convert_single_object_to_ast(obj)?; + current_arg.extend(nodes); + }, + } + current = obj.next.as_deref(); + } + + // Add the last argument if not empty + if !current_arg.is_empty() { + args.push(current_arg); + } + + Ok(args) + } + + fn parse_fraction_subobjects( + &self, + obj_list: &MtefObjectList, + ) -> Result<(Vec>, Vec>), MtefError> { + // Parse LINE objects as numerator and denominator + let mut numerator = Vec::new(); + let mut denominator = Vec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + if numerator.is_empty() { + numerator = line_nodes; + } else { + denominator = line_nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((numerator, denominator)) + } + + fn parse_root_subobjects( + &self, + obj_list: &MtefObjectList, + ) -> Result<(Vec>, Vec>), MtefError> { + // Parse LINE objects as index and base + let mut index = Vec::new(); + let mut base = Vec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + if index.is_empty() { + index = line_nodes; + } else { + base = line_nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((base, index)) + } + + fn parse_subscript_subobjects( + &self, + obj_list: &MtefObjectList, + ) -> Result<(Vec>, Vec>), MtefError> { + // Parse LINE objects as base and subscript + let mut base = Vec::new(); + let mut subscript = Vec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + if base.is_empty() { + base = line_nodes; + } else { + subscript = line_nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((base, subscript)) + } + + fn parse_superscript_subobjects( + &self, + obj_list: &MtefObjectList, + ) -> Result<(Vec>, Vec>), MtefError> { + // Parse LINE objects as base and superscript + let mut base = Vec::new(); + let mut superscript = Vec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + if base.is_empty() { + base = line_nodes; + } else { + superscript = line_nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((base, superscript)) + } + + fn parse_subsup_subobjects(&self, obj_list: &MtefObjectList) -> SubSupResult<'arena> { + // Parse LINE objects as base, subscript, and superscript + let mut base = Vec::new(); + let mut subscript = Vec::new(); + let mut superscript = Vec::new(); + let mut current = Some(obj_list); + + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + if base.is_empty() { + base = line_nodes; + } else if subscript.is_empty() { + subscript = line_nodes; + } else { + superscript = line_nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((base, subscript, superscript)) + } + + fn parse_single_subobject( + &self, + obj_list: &MtefObjectList, + ) -> Result>, MtefError> { + // Parse a single subobject (typically for templates with one content area) + let mut current = Some(obj_list); + let mut result = Vec::new(); + + while let Some(obj) = current { + match obj.tag { + MtefRecordType::Line => { + if let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + result.extend(line_nodes); + } + }, + _ => { + // Convert other object types directly + let nodes = self.convert_single_object_to_ast(obj)?; + result.extend(nodes); + }, + } + current = obj.next.as_deref(); + } + + Ok(result) + } + + fn convert_single_object_to_ast( + &self, + obj: &MtefObjectList, + ) -> Result>, MtefError> { + // Convert a single object to AST nodes + let mut nodes = Vec::new(); + + match obj.tag { + MtefRecordType::Char => { + if let Some(char_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_char_to_node(char_obj)?); + } + }, + MtefRecordType::Tmpl => { + if let Some(tmpl_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_template_to_node(tmpl_obj)?); + } + }, + MtefRecordType::Pile => { + if let Some(pile_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_pile_to_node(pile_obj)?); + } + }, + MtefRecordType::Matrix => { + if let Some(matrix_obj) = obj.obj_ptr.as_any().downcast_ref::() { + nodes.push(self.convert_matrix_to_node(matrix_obj)?); + } + }, + MtefRecordType::Font => { + // Font objects affect character rendering but don't generate output + }, + MtefRecordType::Size + | MtefRecordType::Full + | MtefRecordType::Sub + | MtefRecordType::Sub2 + | MtefRecordType::Sym + | MtefRecordType::SubSym => { + // Size objects affect character size but don't generate output + }, + _ => { + // Skip other record types for now + }, + } + + Ok(nodes) + } + + fn convert_line_to_nodes( + &self, + line_obj: &MtefLine, + ) -> Result>>, MtefError> { + if let Some(obj_list) = &line_obj.object_list { + Ok(Some(self.convert_objects_to_ast(obj_list)?)) + } else { + Ok(None) + } + } + + fn convert_pile_to_node(&self, pile_obj: &MtefPile) -> Result, MtefError> { + // Convert pile to appropriate AST node + // Piles are vertical stacks of elements, often used for fractions, limits, etc. + if let Some(line_list) = &pile_obj.line_list { + let mut rows = Vec::new(); + let mut current: Option<&MtefObjectList> = Some(line_list); + + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + { + if let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? { + // Each line becomes a row in the pile + rows.push(vec![line_nodes]); + } else { + // Empty line - add empty row + rows.push(vec![Vec::new()]); + } + } + current = obj.next.as_deref(); + } + + if rows.len() == 1 { + // Single row - just return the content + Ok(MathNode::Row( + rows.into_iter().flatten().flatten().collect(), + )) + } else if rows.len() == 2 { + // Two rows - could be a fraction or other binary operation + // For now, represent as a simple vertical stack + Ok(MathNode::Matrix { + rows, + fence_type: MatrixFence::None, + properties: None, + }) + } else if !rows.is_empty() { + // Multiple rows - create a matrix structure + Ok(MathNode::Matrix { + rows, + fence_type: MatrixFence::None, + properties: None, + }) + } else { + Ok(MathNode::Text(Cow::Borrowed("\\pile"))) + } + } else { + Ok(MathNode::Text(Cow::Borrowed("\\pile"))) + } + } + + fn convert_matrix_to_node( + &self, + matrix_obj: &MtefMatrix, + ) -> Result, MtefError> { + // Convert matrix to proper matrix AST node + // MTEF matrices store elements in row-major order + if let Some(element_list) = &matrix_obj.element_list { + let mut rows = Vec::new(); + let mut current: Option<&MtefObjectList> = Some(element_list); + let mut cell_index = 0; + let total_cells = (matrix_obj.rows as usize) * (matrix_obj.cols as usize); + + // Initialize rows + for _ in 0..(matrix_obj.rows as usize) { + let mut row = Vec::new(); + for _ in 0..(matrix_obj.cols as usize) { + row.push(Vec::new()); // Initialize empty cells + } + rows.push(row); + } + + // Fill matrix cells + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(line_nodes) = self.convert_line_to_nodes(line_obj)? + { + // Calculate row and column from cell index + let row_idx = cell_index / (matrix_obj.cols as usize); + let col_idx = cell_index % (matrix_obj.cols as usize); + + if row_idx < rows.len() && col_idx < rows[row_idx].len() { + rows[row_idx][col_idx] = line_nodes; + } + cell_index += 1; + } + current = obj.next.as_deref(); + + // Safety check to prevent infinite loops + if cell_index >= total_cells { + break; + } + } + + // Determine fence type based on matrix properties + // This is a simplified approach - in a full implementation, + // this might be determined by context or additional MTEF data + let fence_type = match (matrix_obj.rows, matrix_obj.cols) { + (1, _) => MatrixFence::None, // Row vector + (_, 1) => MatrixFence::None, // Column vector + _ => MatrixFence::Paren, // General matrix with parentheses + }; + + Ok(MathNode::Matrix { + rows, + fence_type, + properties: None, + }) + } else { + // Empty matrix + Ok(MathNode::Matrix { + rows: Vec::new(), + fence_type: MatrixFence::None, + properties: None, + }) + } + } + + fn convert_fence_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Convert fence templates (parentheses, brackets, braces, etc.) to Fence AST nodes + let fence_type = match tmpl_obj.selector { + 0 => match tmpl_obj.variation { + 1 | 2 => Fence::Angle, // left/right only or both + 3 => Fence::Angle, + _ => Fence::Angle, + }, + 1 => Fence::Paren, + 2 => Fence::Brace, + 3 => Fence::Bracket, + 4 => Fence::Pipe, + 5 => Fence::DoublePipe, + 6 => Fence::Floor, + 7 => Fence::Ceiling, + 8 => Fence::SquareBracket, + 9 => match tmpl_obj.variation { + 0 => Fence::SquareBracket, + 16 => Fence::Paren, + 17 => Fence::Paren, + 18 => Fence::Bracket, + 19 => Fence::Bracket, + 32 => Fence::Paren, + 33 => Fence::Paren, + 34 => Fence::SquareBracket, + 35 => Fence::SquareBracket, + 48 => Fence::Paren, + 49 => Fence::Paren, + 50 => Fence::Bracket, + 51 => Fence::Bracket, + _ => Fence::Paren, + }, + _ => Fence::Paren, + }; + + // Parse the content inside the fence + let content = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_single_subobject(obj_list)? + } else { + Vec::new() + }; + + Ok(TemplateParser::parse_fence(fence_type, content)) + } + + fn convert_decoration_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Convert underline/overline templates + let content = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_single_subobject(obj_list)? + } else { + Vec::new() + }; + + match tmpl_obj.selector { + 12 => { + // Underline + let underline_style = if tmpl_obj.variation == 1 { + LineStyle::Double + } else { + LineStyle::Single + }; + Ok(MathNode::Run { + content, + literal: None, + style: None, + font: None, + color: None, + underline: Some(underline_style), + overline: None, + strike_through: None, + double_strike_through: None, + }) + }, + 13 => { + // Overline + let overline_style = if tmpl_obj.variation == 1 { + LineStyle::Double + } else { + LineStyle::Single + }; + Ok(MathNode::Run { + content, + literal: None, + style: None, + font: None, + color: None, + underline: None, + overline: Some(overline_style), + strike_through: None, + double_strike_through: None, + }) + }, + _ => Ok(MathNode::Text(Cow::Borrowed("\\decoration"))), + } + } + + fn convert_arrow_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Convert arrow templates to appropriate AST nodes + // For now, fall back to template parsing + let variation = tmpl_obj.variation; + if let Some(template_def) = TemplateParser::find_template(tmpl_obj.selector, variation) { + let args = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_template_arguments(obj_list)? + } else { + smallvec::SmallVec::new() + }; + Ok(TemplateParser::parse_template_arguments( + template_def.template, + &args, + )) + } else { + Ok(MathNode::Text(Cow::Borrowed("\\arrow"))) + } + } + + fn convert_large_op_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Convert large operator templates (sum, product, etc.) + let operator = match tmpl_obj.selector { + 16 | 22 => LargeOperator::Sum, + 17 => LargeOperator::Product, + 18 => LargeOperator::Coproduct, + 19 => LargeOperator::Union, + 20 => LargeOperator::Intersection, + _ => LargeOperator::Sum, + }; + + // Parse limits from subobjects + let (lower_limit, upper_limit, integrand) = if let Some(obj_list) = &tmpl_obj.subobject_list + { + self.parse_large_op_subobjects(obj_list)? + } else { + (None, None, Vec::new()) + }; + + Ok(TemplateParser::parse_large_op( + operator, + lower_limit.unwrap_or_default(), + upper_limit.unwrap_or_default(), + integrand, + )) + } + + fn convert_limit_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Convert limit templates + // Parse the limit expression and the approaching value + let (function, approaching) = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_limit_subobjects(obj_list)? + } else { + (Vec::new(), Vec::new()) + }; + + // For now, create a simple limit node - combine function and approaching value + let mut content = function; + if !approaching.is_empty() { + content.push(MathNode::Text(Cow::Borrowed(" \\to "))); + content.extend(approaching); + } + + Ok(MathNode::Limit { + content: Box::new(content), + limit_type: crate::ast::LimitType::Upper, // Default to upper for general limits + }) + } + + fn convert_brace_template( + &self, + tmpl_obj: &MtefTemplate, + ) -> Result, MtefError> { + // Convert horizontal brace templates + let _is_upper = tmpl_obj.variation == 1; + + let (_content, _brace_text) = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_brace_subobjects(obj_list)? + } else { + (Vec::new(), Vec::new()) + }; + + // For now, fall back to template parsing + let variation = tmpl_obj.variation; + if let Some(template_def) = TemplateParser::find_template(tmpl_obj.selector, variation) { + let args = if let Some(obj_list) = &tmpl_obj.subobject_list { + self.parse_template_arguments(obj_list)? + } else { + smallvec::SmallVec::new() + }; + Ok(TemplateParser::parse_template_arguments( + template_def.template, + &args, + )) + } else { + Ok(MathNode::Text(Cow::Borrowed("\\brace"))) + } + } + + fn parse_large_op_subobjects(&self, obj_list: &MtefObjectList) -> LargeOpResult<'arena> { + // Parse subobjects for large operators: lower_limit, upper_limit, integrand + let mut lower_limit = None; + let mut upper_limit = None; + let mut integrand = Vec::new(); + + // Large operators typically have integrand first, then limits + // This is a simplified parsing - real implementation would be more complex + let mut current = Some(obj_list); + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(nodes) = self.convert_line_to_nodes(line_obj)? + { + if integrand.is_empty() { + integrand = nodes; + } else if lower_limit.is_none() { + lower_limit = Some(nodes); + } else if upper_limit.is_none() { + upper_limit = Some(nodes); + } + } + current = obj.next.as_deref(); + } + + Ok((lower_limit, upper_limit, integrand)) + } + + fn parse_limit_subobjects( + &self, + obj_list: &MtefObjectList, + ) -> Result<(Vec>, Vec>), MtefError> { + // Parse subobjects for limits: function and approaching value + let mut function = Vec::new(); + let mut approaching = Vec::new(); + + let mut current = Some(obj_list); + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(nodes) = self.convert_line_to_nodes(line_obj)? + { + if function.is_empty() { + function = nodes; + } else { + approaching = nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((function, approaching)) + } + + fn parse_brace_subobjects( + &self, + obj_list: &MtefObjectList, + ) -> Result<(Vec>, Vec>), MtefError> { + // Parse subobjects for braces: content and brace symbol + let mut content = Vec::new(); + let mut brace_text = Vec::new(); + + let mut current = Some(obj_list); + while let Some(obj) = current { + if obj.tag == MtefRecordType::Line + && let Some(line_obj) = obj.obj_ptr.as_any().downcast_ref::() + && let Some(nodes) = self.convert_line_to_nodes(line_obj)? + { + if content.is_empty() { + content = nodes; + } else { + brace_text = nodes; + } + } + current = obj.next.as_deref(); + } + + Ok((content, brace_text)) + } +} diff --git a/src/formula/mtef/binary/headers.rs b/crates/litchi-formula/src/mtef/binary/headers.rs similarity index 100% rename from src/formula/mtef/binary/headers.rs rename to crates/litchi-formula/src/mtef/binary/headers.rs diff --git a/src/formula/mtef/binary/mod.rs b/crates/litchi-formula/src/mtef/binary/mod.rs similarity index 100% rename from src/formula/mtef/binary/mod.rs rename to crates/litchi-formula/src/mtef/binary/mod.rs diff --git a/src/formula/mtef/binary/objects.rs b/crates/litchi-formula/src/mtef/binary/objects.rs similarity index 100% rename from src/formula/mtef/binary/objects.rs rename to crates/litchi-formula/src/mtef/binary/objects.rs diff --git a/crates/litchi-formula/src/mtef/binary/parser.rs b/crates/litchi-formula/src/mtef/binary/parser.rs new file mode 100644 index 0000000..00b280b --- /dev/null +++ b/crates/litchi-formula/src/mtef/binary/parser.rs @@ -0,0 +1,844 @@ +//! MTEF Binary Parser - Main parsing logic +//! +//! This module implements the core MTEF binary parsing functionality. +//! Based on rtf2latex2e Eqn_GetObjectList and related parsing functions. +//! +//! The parser reads MTEF binary data sequentially, parsing different record types +//! (characters, templates, lines, etc.) and building a linked list of objects +//! that can be converted to AST nodes. + +use super::objects::*; +use crate::mtef::MtefError; +use crate::mtef::constants::*; +use zerocopy::{FromBytes, I16, LE, U16}; + +/// Binary MTEF parser +/// +/// Maintains parser state including current position, version information, +/// and math/text mode context. +pub struct MtefBinaryParser<'arena> { + /// Arena allocator for lifetime-managed memory + /// Part of parser infrastructure, kept for future arena-based node allocation + #[allow(dead_code)] + arena: &'arena bumpalo::Bump, + /// MTEF binary data being parsed + data: &'arena [u8], + /// Current position in the data stream + pos: usize, + /// MTEF version number (typically 5) + pub mtef_version: u8, + /// Platform identifier (0=Mac, 1=Windows) + pub platform: u8, + /// Product identifier + pub product: u8, + /// Product version + pub version: u8, + /// Product sub-version + pub version_sub: u8, + /// Inline mode flag (0=display, 1=inline) + pub inline: u8, + /// Current math/text mode (EQN_MODE_TEXT, EQN_MODE_INLINE, EQN_MODE_DISPLAY) + pub mode: i32, +} + +impl<'arena> MtefBinaryParser<'arena> { + /// Get attribute byte(s) according to MTEF version (matches rtf2latex2e GetAttribute) + fn get_attribute(&mut self) -> Result { + if self.mtef_version < 5 { + // For MTEF < 5, attribute is in high nibble of current byte + let byte = self.read_u8()?; + Ok((byte & 0xF0) >> 4) // HiNibble equivalent + } else { + // For MTEF >= 5, attribute is the next byte + self.read_u8() + } + } + + /// Get nudge values (matches rtf2latex2e GetNudge) + fn get_nudge(&mut self) -> Result<(i16, i16), MtefError> { + let b1 = self.read_u8()?; + let b2 = self.read_u8()?; + + if b1 == 128 && b2 == 128 { + // Extended nudge format + let x = self.read_i16()?; + let y = self.read_i16()?; + Ok((x, y)) + } else { + // Simple nudge format + Ok((b1 as i16, b2 as i16)) + } + } + + /// Create a new MTEF binary parser + pub fn new(arena: &'arena bumpalo::Bump, data: &'arena [u8]) -> Result { + if data.len() < 28 { + return Err(MtefError::InvalidFormat( + "Data too short for OLE header".to_string(), + )); + } + + // Parse OLE header manually for better compatibility + // Read fields in little-endian order + let cb_hdr = u16::from_le_bytes([data[0], data[1]]); + let version = u32::from_le_bytes([data[2], data[3], data[4], data[5]]); + + if cb_hdr != 28 { + return Err(MtefError::InvalidFormat(format!( + "Invalid OLE header length: {}", + cb_hdr + ))); + } + + // Accept both 0x00020000 and 0x00000200 as valid versions (observed in real files) + if version != 0x00020000 && version != 0x00000200 { + return Err(MtefError::InvalidFormat(format!( + "Invalid OLE version: 0x{:08X}", + version + ))); + } + + // Note: The clipboard format can vary (0xC2D3, 0xC1B0, 0xC1E1, 0xC1AE, etc.) + // so we don't validate it strictly. The MTEF signature check below is sufficient. + + let mut parser = Self { + arena, + data, + pos: 28, + mtef_version: 0, + platform: 0, + product: 0, + version: 0, + version_sub: 0, + inline: 0, + mode: EQN_MODE_DISPLAY, // Default to display mode + }; + + parser.read_mtef_header()?; + Ok(parser) + } + + fn read_mtef_header(&mut self) -> Result<(), MtefError> { + if self.data.len() < self.pos + 5 { + return Err(MtefError::UnexpectedEof); + } + + // Check if we have the full MTEF signature "(\x04mt" (0x28 0x04 0x6D 0x74) + // or if this is a headerless/embedded format that starts directly with the version + let has_signature = self.pos + 4 <= self.data.len() + && self.data[self.pos] == 0x28 + && self.data[self.pos + 1] == 0x04 + && self.data[self.pos + 2] == 0x6D + && self.data[self.pos + 3] == 0x74; + + if has_signature { + // Full format with signature + self.pos += 4; + self.mtef_version = self.read_u8()?; + } else { + // Headerless/embedded format - starts directly with version byte + // This format is used in some embedded equations + self.mtef_version = self.read_u8()?; + } + + // Handle different MTEF versions + match self.mtef_version { + 0 => { + self.mtef_version = 5; + self.platform = 0; + self.product = 0; + self.version = 0; + self.version_sub = 0; + }, + 1 | 101 => { + self.platform = if self.mtef_version == 101 { 1 } else { 0 }; + self.product = 0; + self.version = 1; + self.version_sub = 0; + }, + 2..=4 => { + self.platform = self.read_u8()?; + self.product = self.read_u8()?; + self.version = self.read_u8()?; + self.version_sub = self.read_u8()?; + }, + 5 => { + self.platform = self.read_u8()?; + self.product = self.read_u8()?; + self.version = self.read_u8()?; + self.version_sub = self.read_u8()?; + + // Application key (null-terminated string) + while self.pos < self.data.len() && self.data[self.pos] != 0 { + self.pos += 1; + } + if self.pos >= self.data.len() { + return Err(MtefError::UnexpectedEof); + } + self.pos += 1; // Skip null terminator + + self.inline = self.read_u8()?; + }, + _ => { + return Err(MtefError::InvalidFormat(format!( + "Unsupported MTEF version: {}", + self.mtef_version + ))); + }, + } + + Ok(()) + } + + /// Parse the MTEF equation into AST nodes + pub fn parse(&mut self) -> Result>, MtefError> { + let object_list = self.parse_object_list(2)?; // Expect at least 2 objects (SIZE + LINE/PILE) + + if let Some(obj_list) = object_list { + self.convert_objects_to_ast(&obj_list) + } else { + Ok(Vec::new()) + } + } + + fn parse_object_list( + &mut self, + num_objs: usize, + ) -> Result>, MtefError> { + let mut head: Option> = None; + let mut curr: Option<*mut MtefObjectList> = None; + let mut tally = 0; + let start_pos = self.pos; // For error reporting + + // Prevent infinite loops by limiting iterations + let mut iterations = 0; + const MAX_ITERATIONS: usize = 10000; + + loop { + // Lenient EOF handling: if we reach the end of data, treat it as implicit END tag + // This matches rtf2latex2e behavior and handles slightly truncated MTEF data + if self.pos >= self.data.len() { + // Return what we've parsed so far + break; + } + + // Prevent infinite loops + iterations += 1; + if iterations > MAX_ITERATIONS { + return Err(MtefError::ParseError(format!( + "Too many objects parsed (>{}), possible infinite loop at position {}", + MAX_ITERATIONS, start_pos + ))); + } + + // Get current tag based on MTEF version + let curr_tag = if self.mtef_version == 5 { + self.data[self.pos] + } else { + self.data[self.pos] & 0x0F + }; + + // END tag handling - return immediately + if curr_tag == crate::mtef::constants::END { + self.pos += 1; + break; + } + + let record_type = match curr_tag { + crate::mtef::constants::LINE => MtefRecordType::Line, + crate::mtef::constants::CHAR => MtefRecordType::Char, + crate::mtef::constants::TMPL => MtefRecordType::Tmpl, + crate::mtef::constants::PILE => MtefRecordType::Pile, + crate::mtef::constants::MATRIX => MtefRecordType::Matrix, + crate::mtef::constants::EMBELL => MtefRecordType::Embell, + crate::mtef::constants::RULER => MtefRecordType::Ruler, + crate::mtef::constants::FONT => MtefRecordType::Font, + crate::mtef::constants::SIZE => MtefRecordType::Size, + crate::mtef::constants::FULL => MtefRecordType::Full, + crate::mtef::constants::SUB => MtefRecordType::Sub, + crate::mtef::constants::SUB2 => MtefRecordType::Sub2, + crate::mtef::constants::SYM => MtefRecordType::Sym, + crate::mtef::constants::SUBSYM => MtefRecordType::SubSym, + crate::mtef::constants::COLOR => MtefRecordType::Color, + crate::mtef::constants::COLOR_DEF => MtefRecordType::ColorDef, + crate::mtef::constants::FONT_DEF => MtefRecordType::FontDef, + crate::mtef::constants::EQN_PREFS => MtefRecordType::EqnPrefs, + crate::mtef::constants::ENCODING_DEF => MtefRecordType::EncodingDef, + _ => MtefRecordType::Future, + }; + + // Parse the object based on its type + // Handle EOF gracefully by catching errors and breaking the loop + let obj_ptr: Option> = match record_type { + MtefRecordType::Char => match self.parse_char() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, // EOF hit, return what we have + Err(e) => return Err(e), + }, + MtefRecordType::Tmpl => match self.parse_template() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Line => match self.parse_line() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Pile => match self.parse_pile() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Matrix => match self.parse_matrix() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Embell => match self.parse_embell() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Ruler => match self.parse_ruler() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Font => match self.parse_font() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::Size + | MtefRecordType::Full + | MtefRecordType::Sub + | MtefRecordType::Sub2 + | MtefRecordType::Sym + | MtefRecordType::SubSym => match self.parse_size() { + Ok(obj) => Some(Box::new(obj)), + Err(MtefError::UnexpectedEof) => break, + Err(e) => return Err(e), + }, + MtefRecordType::ColorDef => { + // Skip color definition - just skip the tag + if self.pos < self.data.len() { + self.pos += 1; + } + None + }, + MtefRecordType::FontDef => { + if self.skip_font_def().is_err() { + break; // EOF hit + } + None + }, + MtefRecordType::EqnPrefs => { + if self.skip_eqn_prefs().is_err() { + break; // EOF hit + } + None + }, + MtefRecordType::EncodingDef => { + if self.skip_encoding_def().is_err() { + break; // EOF hit + } + None + }, + MtefRecordType::Future => { + if self.skip_future_record().is_err() { + break; // EOF hit + } + None + }, + _ => { + // Unknown record type - skip it + if self.skip_unknown_record().is_err() { + break; // EOF hit + } + None + }, + }; + + // Only create a node if we have an object + if let Some(obj) = obj_ptr { + // Create object list node + let new_node = Box::new(MtefObjectList { + tag: record_type, + obj_ptr: obj, + next: None, + }); + + // Link into the list + match curr { + Some(curr_ptr) => unsafe { + (*curr_ptr).next = Some(new_node); + curr = (*curr_ptr).next.as_mut().map(|n| n.as_mut() as *mut _); + }, + None => { + head = Some(new_node); + curr = head.as_mut().map(|n| n.as_mut() as *mut _); + }, + } + + tally += 1; + + if num_objs > 0 && tally == num_objs { + break; + } + } + } + + Ok(head) + } + + fn parse_char(&mut self) -> Result { + let attrs = self.get_attribute()?; + + let mut nudge_x = 0i16; + let mut nudge_y = 0i16; + if attrs & CHAR_NUDGE != 0 { + let nudge_result = self.get_nudge()?; + nudge_x = nudge_result.0; + nudge_y = nudge_result.1; + } + + let typeface = self.read_u8()?; + + let mut character = 0u16; + let mut bits16 = 0u16; + + if self.mtef_version < 5 { + character = self.read_u8()? as u16; + if self.platform == 1 { + // PLATFORM_WIN + character |= (self.read_u8()? as u16) << 8; + } + } else { + // Nearly always have a 16 bit MT character + if attrs & CHAR_ENC_NO_MTCODE == 0 { + character = self.read_u16()?; + } + + if attrs & CHAR_ENC_CHAR_8 != 0 { + character = self.read_u8()? as u16; + } + + if attrs & CHAR_ENC_CHAR_16 != 0 { + bits16 = self.read_u16()?; + } + } + + let embellishment_list = if self.mtef_version == 5 { + if attrs & CHAR_EMBELL != 0 { + Some(Box::new(self.parse_embell()?)) + } else { + None + } + } else if attrs & crate::mtef::constants::XF_EMBELL != 0 { + Some(Box::new(self.parse_embell()?)) + } else { + None + }; + + Ok(MtefChar { + nudge_x, + nudge_y, + atts: attrs, + typeface, + character, + bits16, + embellishment_list, + }) + } + + fn parse_template(&mut self) -> Result { + let attrs = self.get_attribute()?; + + let mut nudge_x = 0i16; + let mut nudge_y = 0i16; + if attrs & XF_LMOVE != 0 { + let nudge_result = self.get_nudge()?; + nudge_x = nudge_result.0; + nudge_y = nudge_result.1; + } + + let selector = self.read_u8()?; + let mut variation = self.read_u8()? as u16; + + if self.mtef_version == 5 && (variation & 0x80) != 0 { + variation &= 0x7F; + variation |= (self.read_u8()? as u16) << 7; + } + + let options = self.read_u8()?; + + let subobject_list = if attrs & XF_NULL != 0 { + None + } else { + self.parse_object_list(0)? + }; + + Ok(MtefTemplate { + nudge_x, + nudge_y, + selector, + variation, + options, + subobject_list, + }) + } + + fn parse_line(&mut self) -> Result { + let attrs = self.get_attribute()?; + + let mut nudge_x = 0i16; + let mut nudge_y = 0i16; + if attrs & XF_LMOVE != 0 { + let nudge_result = self.get_nudge()?; + nudge_x = nudge_result.0; + nudge_y = nudge_result.1; + } + + let line_spacing = if attrs & XF_LSPACE != 0 { + self.read_u8()? + } else { + 0 + }; + + let ruler = if attrs & XF_RULER != 0 { + Some(Box::new(self.parse_ruler()?)) + } else { + None + }; + + let object_list = self.parse_object_list(0)?; + + Ok(MtefLine { + nudge_x, + nudge_y, + line_spacing, + ruler, + object_list, + }) + } + + fn parse_pile(&mut self) -> Result { + let attrs = self.get_attribute()?; + + let mut nudge_x = 0i16; + let mut nudge_y = 0i16; + if attrs & XF_LMOVE != 0 { + let nudge_result = self.get_nudge()?; + nudge_x = nudge_result.0; + nudge_y = nudge_result.1; + } + + let halign = self.read_u8()?; + let valign = self.read_u8()?; + + let ruler = if attrs & XF_RULER != 0 { + Some(Box::new(self.parse_ruler()?)) + } else { + None + }; + + let line_list = self.parse_object_list(0)?; + + Ok(MtefPile { + nudge_x, + nudge_y, + halign, + valign, + ruler, + line_list, + }) + } + + fn parse_matrix(&mut self) -> Result { + let attrs = self.get_attribute()?; + + let mut nudge_x = 0i16; + let mut nudge_y = 0i16; + if attrs & XF_LMOVE != 0 { + let nudge_result = self.get_nudge()?; + nudge_x = nudge_result.0; + nudge_y = nudge_result.1; + } + + let valign = self.read_u8()?; + let h_just = self.read_u8()?; + let v_just = self.read_u8()?; + let rows = self.read_u8()?; + let cols = self.read_u8()?; + + // Read row and column partitions + let mut row_parts = [0u8; 16]; + let mut col_parts = [0u8; 16]; + + // Row partition consists of (rows+1) two-bit values + let row_bytes = (2 * (rows as usize + 1)).div_ceil(8); + for i in 0..row_bytes { + if i < row_parts.len() { + row_parts[i] = self.read_u8()?; + } + } + + // Col partition consists of (cols+1) two-bit values + let col_bytes = (2 * (cols as usize + 1)).div_ceil(8); + for i in 0..col_bytes { + if i < col_parts.len() { + col_parts[i] = self.read_u8()?; + } + } + + let element_list = self.parse_object_list(0)?; + + Ok(MtefMatrix { + nudge_x, + nudge_y, + valign, + h_just, + v_just, + rows, + cols, + row_parts, + col_parts, + element_list, + }) + } + + fn parse_embell(&mut self) -> Result { + let attrs = self.get_attribute()?; + + let mut nudge_x = 0i16; + let mut nudge_y = 0i16; + if attrs & XF_LMOVE != 0 { + let nudge_result = self.get_nudge()?; + nudge_x = nudge_result.0; + nudge_y = nudge_result.1; + } + + let embell = self.read_u8()?; + + Ok(MtefEmbell { + nudge_x, + nudge_y, + embell, + next: None, // Chaining is handled at a higher level + }) + } + + fn parse_ruler(&mut self) -> Result { + // If we arrived here from LINE, skip the RULER tag if present + let tag = if self.mtef_version == 5 { + self.data[self.pos] + } else { + self.data[self.pos] & 0x0F + }; + if tag == crate::mtef::constants::RULER { + self.pos += 1; // Skip the ruler tag + } + + let n_stops = self.read_u8()? as i16; + let mut head: Option> = None; + let mut curr: Option<*mut MtefTabstop> = None; + + for _ in 0..n_stops { + let r#type = self.read_u8()? as i16; + let offset = self.read_i16()?; + + let new_tabstop = Box::new(MtefTabstop { + r#type, + offset, + next: None, + }); + + match curr { + Some(curr_ptr) => unsafe { + (*curr_ptr).next = Some(new_tabstop); + curr = Some((*curr_ptr).next.as_mut().unwrap().as_mut() as *mut _); + }, + None => { + head = Some(new_tabstop); + curr = head.as_mut().map(|n| n.as_mut() as *mut _); + }, + } + } + + Ok(MtefRuler { + n_stops, + tabstop_list: head, + }) + } + + fn parse_font(&mut self) -> Result { + let tface = self.read_u8()? as i32; + let style = self.read_u8()? as i32; + + // Read null-terminated font name + let start_pos = self.pos; + while self.pos < self.data.len() && self.data[self.pos] != 0 { + self.pos += 1; + } + if self.pos >= self.data.len() { + return Err(MtefError::UnexpectedEof); + } + + let font_name = std::str::from_utf8(&self.data[start_pos..self.pos]) + .map_err(|_| MtefError::ParseError("Invalid font name encoding".to_string()))? + .to_string(); + + self.pos += 1; // Skip null terminator + + Ok(MtefFont { + tface, + style, + zname: font_name, + }) + } + + fn parse_size(&mut self) -> Result { + // Also works in MTEF5 because all supported tags are less than 16 + let tag = self.read_u8()? & 0x0F; + + // FULL or SUB or SUB2 or SYM or SUBSYM + if (FULL..=SUBSYM).contains(&tag) { + return Ok(MtefSize { + r#type: tag as i32, + lsize: (tag - FULL) as i32, + dsize: 0, + }); + } + + let option = self.read_u8()?; + + // Large dsize + if option == 100 { + let lsize = self.read_u8()? as i32; + let mut dsize = self.read_u8()? as i32; + dsize += (self.read_u8()? as i32) << 8; + return Ok(MtefSize { + r#type: option as i32, + lsize, + dsize, + }); + } + + // Explicit point size + if option == 101 { + let mut lsize = self.read_u8()? as i32; + lsize += (self.read_u8()? as i32) << 8; + return Ok(MtefSize { + r#type: option as i32, + lsize, + dsize: 0, + }); + } + + // -128 < dsize < 128 + let dsize = (self.read_u8()? as i32) - 128; + Ok(MtefSize { + r#type: 0, + lsize: option as i32, + dsize, + }) + } + + fn skip_font_def(&mut self) -> Result<(), MtefError> { + self.pos += 1; // Skip tag + let _id = self.read_u8()?; + while self.pos < self.data.len() && self.data[self.pos] != 0 { + self.pos += 1; + } + self.pos += 1; // Skip null terminator + Ok(()) + } + + fn skip_eqn_prefs(&mut self) -> Result<(), MtefError> { + self.pos += 1; // Skip tag + let _options = self.read_u8()?; // Options byte + + let size_count = self.read_u8()? as usize; + self.pos += self.skip_nibbles(size_count)?; // Skip size array + + let space_count = self.read_u8()? as usize; + self.pos += self.skip_nibbles(space_count)?; // Skip space array + + let style_count = self.read_u8()? as usize; + for _ in 0..style_count { + let c = self.read_u8()?; + if c != 0 { + self.pos += 1; // Skip style data + } + } + + Ok(()) + } + + fn skip_encoding_def(&mut self) -> Result<(), MtefError> { + self.pos += 1; // Skip tag + while self.pos < self.data.len() && self.data[self.pos] != 0 { + self.pos += 1; + } + self.pos += 1; // Skip null terminator + Ok(()) + } + + fn skip_future_record(&mut self) -> Result<(), MtefError> { + self.pos += 1; // Skip tag + let size = self.read_u16()? as usize; + self.pos += size; + Ok(()) + } + + fn skip_unknown_record(&mut self) -> Result<(), MtefError> { + self.pos += 1; // Skip tag + let size = self.read_u16()? as usize; + self.pos += size; + Ok(()) + } + + fn skip_nibbles(&mut self, count: usize) -> Result { + let bytes = count.div_ceil(2); // 2 nibbles per byte + for _ in 0..bytes { + self.read_u8()?; + } + Ok(bytes) + } + + // Helper methods for reading binary data with bounds checking + #[inline] + fn read_u8(&mut self) -> Result { + if self.pos >= self.data.len() { + return Err(MtefError::UnexpectedEof); + } + let val = unsafe { *self.data.get_unchecked(self.pos) }; + self.pos += 1; + Ok(val) + } + + #[inline] + fn read_i16(&mut self) -> Result { + if self.pos + 2 > self.data.len() { + return Err(MtefError::UnexpectedEof); + } + let val = I16::::read_from_bytes(&self.data[self.pos..self.pos + 2]) + .map_err(|_| MtefError::InvalidFormat("Failed to read i16".to_string()))? + .get(); + self.pos += 2; + Ok(val) + } + + #[inline] + fn read_u16(&mut self) -> Result { + if self.pos + 2 > self.data.len() { + return Err(MtefError::UnexpectedEof); + } + let val = U16::::read_from_bytes(&self.data[self.pos..self.pos + 2]) + .map_err(|_| MtefError::InvalidFormat("Failed to read u16".to_string()))? + .get(); + self.pos += 2; + Ok(val) + } +} diff --git a/src/formula/mtef/constants.rs b/crates/litchi-formula/src/mtef/constants.rs similarity index 100% rename from src/formula/mtef/constants.rs rename to crates/litchi-formula/src/mtef/constants.rs diff --git a/crates/litchi-formula/src/mtef/mod.rs b/crates/litchi-formula/src/mtef/mod.rs new file mode 100644 index 0000000..2082c41 --- /dev/null +++ b/crates/litchi-formula/src/mtef/mod.rs @@ -0,0 +1,311 @@ +mod binary; +mod constants; +mod templates; + +use crate::ast::MathNode; + +/// MTEF parser using proper binary parsing +pub struct MtefParser<'arena> { + // Arena for lifetime-managed allocations - kept for future use + #[allow(dead_code)] + arena: &'arena bumpalo::Bump, + binary_parser: Option>, +} + +impl<'arena> MtefParser<'arena> { + /// Create a new MTEF parser + pub fn new(arena: &'arena bumpalo::Bump, data: &'arena [u8]) -> Self { + let binary_parser = binary::MtefBinaryParser::new(arena, data).ok(); + Self { + arena, + binary_parser, + } + } + + /// Parse MTEF data into formula nodes + /// + /// # Example + /// ```ignore + /// let formula = Formula::new(); + /// let parser = MtefParser::new(formula.arena(), mtef_data); + /// let nodes = parser.parse()?; + /// ``` + pub fn parse(&mut self) -> Result>, MtefError> { + if let Some(ref mut parser) = self.binary_parser { + parser.parse() + } else { + // Fallback to simple heuristic for invalid MTEF data + // In a real implementation, this would need access to the actual binary data + Ok(Vec::new()) + } + } + + /// Check if the MTEF data is valid and can be parsed + pub fn is_valid(&self) -> bool { + self.binary_parser.is_some() + } + + /// Get MTEF version information if available + pub fn version_info(&self) -> Option<(u8, u8, u8, u8, u8)> { + self.binary_parser.as_ref().map(|p| { + ( + p.mtef_version, + p.platform, + p.product, + p.version, + p.version_sub, + ) + }) + } +} + +/// Errors that can occur during MTEF parsing +#[derive(Debug)] +#[non_exhaustive] +pub enum MtefError { + InvalidFormat(String), + UnexpectedEof, + UnknownTag(u8), + ParseError(String), +} + +impl std::fmt::Display for MtefError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + MtefError::InvalidFormat(msg) => write!(f, "Invalid format: {}", msg), + MtefError::UnexpectedEof => write!(f, "Unexpected end of file"), + MtefError::UnknownTag(tag) => write!(f, "Unknown tag: {:#x}", tag), + MtefError::ParseError(msg) => write!(f, "Parse error: {}", msg), + } + } +} + +impl std::error::Error for MtefError {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::Formula; + use smallvec::smallvec; + use std::borrow::Cow; + + #[test] + fn test_mtef_parser_creation() { + let formula = Formula::new(); + let parser = MtefParser::new(formula.arena(), &[0u8; 100]); + + // Should not be valid with random data + assert!(!parser.is_valid()); + } + + #[test] + fn test_mtef_parser_with_valid_header() { + // Create a minimal valid MTEF header with proper structure + let data = vec![ + // OLE header (28 bytes) + 0x1C, 0x00, // cb_hdr = 28 + 0x00, 0x00, 0x02, 0x00, // version = 0x00020000 (little endian) + 0xD3, 0xC2, // format = 0xC2D3 + 0x0B, 0x00, 0x00, 0x00, // size = 11 (MTEF header + minimal content) + 0x00, 0x00, 0x00, 0x00, // reserved[0] + 0x00, 0x00, 0x00, 0x00, // reserved[1] + 0x00, 0x00, 0x00, 0x00, // reserved[2] + 0x00, 0x00, 0x00, 0x00, // reserved[3] + // MTEF header with signature + 0x28, 0x04, 0x6D, 0x74, // signature "(04mt" + 0x05, // version = 5 + 0x01, // platform = 1 (Windows) + 0x01, // product = 1 (MathType) + 0x01, // version = 1 + 0x00, // version_sub = 0 + 0x00, // application_key (empty null-terminated string) + 0x00, // inline = 0 + // Minimal MTEF content (SIZE + END tags) + 0x09, // SIZE tag + 0x00, // END tag + ]; + + let formula = Formula::new(); + let parser = MtefParser::new(formula.arena(), &data); + + // Should be valid with proper headers + assert!(parser.is_valid()); + + if let Some((version, platform, product, ver, sub)) = parser.version_info() { + assert_eq!(version, 5); + assert_eq!(platform, 1); + assert_eq!(product, 1); + assert_eq!(ver, 1); + assert_eq!(sub, 0); + } + } + + #[test] + fn test_mtef_parser_invalid_data() { + let formula = Formula::new(); + + // Test with data too short for OLE header + let parser1 = MtefParser::new(formula.arena(), &[0u8; 10]); + assert!(!parser1.is_valid()); + + // Test with invalid OLE header + let mut data = vec![0u8; 28]; + data[0] = 0x10; // Invalid cb_hdr + let parser2 = MtefParser::new(formula.arena(), &data); + assert!(!parser2.is_valid()); + } + + #[test] + fn test_character_lookup() { + // Test Greek letters + use crate::mtef::binary::charset::lookup_character; + + // Test lowercase alpha (typeface 132, character 97) + let result = lookup_character(132, 97, 1); + assert_eq!(result, Some("\\alpha ")); + + // Test uppercase Delta (typeface 133, character 68) + let result = lookup_character(133, 68, 1); + assert_eq!(result, Some("\\Delta ")); + + // Test equals sign (typeface 134, character 61) + let result = lookup_character(134, 61, 1); + assert_eq!(result, Some("=")); + + // Test non-existent character + let result = lookup_character(999, 999, 1); + assert_eq!(result, None); + } + + #[test] + fn test_embellishment_templates() { + use crate::mtef::binary::charset::get_embellishment_template; + + // Test dot embellishment + let template = get_embellishment_template(2); // embDOT + assert_eq!(template, "\\dot{%1} ,\\.%1 "); + + // Test hat embellishment + let template = get_embellishment_template(9); // embHAT + assert_eq!(template, "\\hat{%1} ,\\^%1 "); + + // Test vector embellishment + let template = get_embellishment_template(11); // embVEC + assert_eq!(template, "\\vec{%1} ,%1 "); + + // Test invalid embellishment + let template = get_embellishment_template(255); + assert_eq!(template, ""); + } + + #[test] + fn test_template_parsing() { + use crate::mtef::templates::TemplateParser; + + // Test template lookup + let template = TemplateParser::find_template(0, 3); // Fence: angle-both + assert!(template.is_some()); + let template_def = template.unwrap(); + assert_eq!(template_def.selector, 0); + assert_eq!(template_def.variation, 3); + assert!(template_def.template.contains("\\left\\langle")); + + // Test template lookup for fraction + let template = TemplateParser::find_template(11, 0); // Fraction + assert!(template.is_some()); + let template_def = template.unwrap(); + assert!(template_def.template.contains("\\frac")); + + // Test non-existent template + let template = TemplateParser::find_template(255, 0); + assert!(template.is_none()); + } + + #[test] + fn test_fence_template_conversion() { + use crate::mtef::templates::TemplateParser; + + // Test fence type detection + let fence = TemplateParser::fence_from_selector(1); // Parentheses + assert_eq!(fence, Some(crate::ast::Fence::Paren)); + + let fence = TemplateParser::fence_from_selector(2); // Braces + assert_eq!(fence, Some(crate::ast::Fence::Brace)); + + let fence = TemplateParser::fence_from_selector(3); // Brackets + assert_eq!(fence, Some(crate::ast::Fence::Bracket)); + + let fence = TemplateParser::fence_from_selector(4); // Pipes + assert_eq!(fence, Some(crate::ast::Fence::Pipe)); + + let fence = TemplateParser::fence_from_selector(255); // Invalid + assert!(fence.is_none()); + } + + #[test] + fn test_large_operator_conversion() { + use crate::mtef::templates::TemplateParser; + + // Test large operator type detection + let op = TemplateParser::large_op_from_selector(15); // Integrals + assert_eq!(op, Some(crate::ast::LargeOperator::Integral)); + + let op = TemplateParser::large_op_from_selector(16); // Sum + assert_eq!(op, Some(crate::ast::LargeOperator::Sum)); + + let op = TemplateParser::large_op_from_selector(17); // Product + assert_eq!(op, Some(crate::ast::LargeOperator::Product)); + + let op = TemplateParser::large_op_from_selector(21); // Integral (single with limits) + assert_eq!(op, Some(crate::ast::LargeOperator::Integral)); + + let op = TemplateParser::large_op_from_selector(255); // Invalid + assert!(op.is_none()); + } + + #[test] + fn test_template_ast_parsing() { + use crate::mtef::templates::TemplateParser; + + // Test fraction template parsing + let args: smallvec::SmallVec<[smallvec::SmallVec<[crate::ast::MathNode; 8]>; 4]> = smallvec![ + smallvec![crate::ast::MathNode::Number(Cow::Borrowed("1"))], + smallvec![crate::ast::MathNode::Number(Cow::Borrowed("2"))] + ]; + + let result = TemplateParser::parse_fraction(args[0].to_vec(), args[1].to_vec()); + + match result { + crate::ast::MathNode::Frac { + numerator, + denominator, + .. + } => { + assert_eq!(numerator.len(), 1); + assert_eq!(denominator.len(), 1); + }, + _ => panic!("Expected fraction node"), + } + } + + #[test] + fn test_charset_attributes() { + use crate::mtef::binary::charset::get_charset_attributes; + + let attrs = get_charset_attributes(0); // ZERO + assert_eq!(attrs.math_attr, 1); // Math + assert!(attrs.do_lookup); + assert!(attrs.use_codepoint); + + let attrs = get_charset_attributes(1); // TEXT + assert_eq!(attrs.math_attr, 2); // Force text + assert!(attrs.do_lookup); + assert!(attrs.use_codepoint); + + // Test out of bounds + let attrs = get_charset_attributes(100); + assert_eq!(attrs.math_attr, 3); // Default to force math + assert!(attrs.do_lookup); + assert!(attrs.use_codepoint); + } +} diff --git a/crates/litchi-formula/src/mtef/templates.rs b/crates/litchi-formula/src/mtef/templates.rs new file mode 100644 index 0000000..cc64c7b --- /dev/null +++ b/crates/litchi-formula/src/mtef/templates.rs @@ -0,0 +1,1038 @@ +//! MTEF template parsing based on rtf2latex2e template system +//! +//! This module provides template definitions and parsing logic for MTEF templates. +//! Templates represent structured mathematical constructs like fractions, roots, +//! integrals, fences, etc. +//! +//! Based on rtf2latex2e Profile_TEMPLATES_5 template system. + +use crate::ast::{Fence, LargeOperator, MathNode, Operator}; +use smallvec::SmallVec; + +/// Template argument list type - a small vector of node vectors +pub type TemplateArgs<'a> = SmallVec<[SmallVec<[MathNode<'a>; 8]>; 4]>; + +/// Template parser helper methods - based on rtf2latex2e Profile_TEMPLATES_5 +pub struct TemplateParser; + +/// Template definition structure +/// +/// Defines a specific MTEF template with its selector, variation, and LaTeX template string. +/// The description field is used for documentation and debugging purposes. +#[derive(Debug)] +pub struct TemplateDef { + /// Template selector (identifies template type) + pub selector: u8, + /// Template variation (specific form within type) + pub variation: u16, + /// Human-readable description (used for documentation and debugging) + #[allow(dead_code)] + pub description: &'static str, + /// LaTeX template string with argument placeholders + pub template: &'static str, +} + +/// MTEF v5 Template definitions based on rtf2latex2e Profile_TEMPLATES_5 +const MTEF_TEMPLATES: &[TemplateDef] = &[ + TemplateDef { + selector: 0, + variation: 1, + description: "fence: angle-left only", + template: "\\left\\langle #1[M]\\right. ", + }, + TemplateDef { + selector: 0, + variation: 2, + description: "fence: angle-right only", + template: "\\left. #1[M]\\right\\rangle ", + }, + TemplateDef { + selector: 0, + variation: 3, + description: "fence: angle-both", + template: "\\left\\langle #1[M]\\right\\rangle ", + }, + TemplateDef { + selector: 1, + variation: 1, + description: "fence: paren-left only", + template: "\\left( #1[M]\\right. ", + }, + TemplateDef { + selector: 1, + variation: 2, + description: "fence: paren-right only", + template: "\\left. #1[M]\\right) ", + }, + TemplateDef { + selector: 1, + variation: 3, + description: "fence: paren-both", + template: "\\left( #1[M]\\right) ", + }, + TemplateDef { + selector: 2, + variation: 1, + description: "fence: brace-left only", + template: "\\left\\{ #1[M]\\right. ", + }, + TemplateDef { + selector: 2, + variation: 2, + description: "fence: brace-right only", + template: "\\left. #1[M]\\right\\} ", + }, + TemplateDef { + selector: 2, + variation: 3, + description: "fence: brace-both", + template: "\\left\\{ #1[M]\\right\\} ", + }, + TemplateDef { + selector: 3, + variation: 1, + description: "fence: brack-left only", + template: "\\lef]t[ #1[M]\\right. ", + }, + TemplateDef { + selector: 3, + variation: 2, + description: "fence: brack-right only", + template: "\\left. #1[M]\\right] ", + }, + TemplateDef { + selector: 3, + variation: 3, + description: "fence: brack-both", + template: "\\left[ #1[M]\\right] ", + }, + TemplateDef { + selector: 4, + variation: 1, + description: "fence: bar-left only", + template: "\\left| #1[M]\\right. ", + }, + TemplateDef { + selector: 4, + variation: 2, + description: "fence: bar-right only", + template: "\\left. #1[M]\\right| ", + }, + TemplateDef { + selector: 4, + variation: 3, + description: "fence: bar-both", + template: "\\left| #1[M]\\right| ", + }, + TemplateDef { + selector: 5, + variation: 1, + description: "fence: dbar-left only", + template: "\\left\\| #1[M]\\right. ", + }, + TemplateDef { + selector: 5, + variation: 2, + description: "fence: dbar-right only", + template: "\\left. #1[M]\\right\\| ", + }, + TemplateDef { + selector: 5, + variation: 3, + description: "fence: dbar-both", + template: "\\left\\| #1[M]\\right\\| ", + }, + TemplateDef { + selector: 6, + variation: 1, + description: "fence: floor", + template: "\\left\\lfloor #1[M]\\right. ", + }, + TemplateDef { + selector: 6, + variation: 2, + description: "fence: floor", + template: "\\left. #1[M]\\right\\rfloor ", + }, + TemplateDef { + selector: 6, + variation: 3, + description: "fence: floor", + template: "\\left\\lfloor #1[M]\\right\\rfloor ", + }, + TemplateDef { + selector: 7, + variation: 1, + description: "fence: ceiling", + template: "\\left\\lceil #1[M]\\right. ", + }, + TemplateDef { + selector: 7, + variation: 2, + description: "fence: ceiling", + template: "\\left. #1[M]\\right\\rceil ", + }, + TemplateDef { + selector: 7, + variation: 3, + description: "fence: ceiling", + template: "\\left\\lceil #1[M]\\right\\rceil ", + }, + TemplateDef { + selector: 8, + variation: 0, + description: "fence: LBLB", + template: "\\left[ #1[M]\\right[ ", + }, + TemplateDef { + selector: 9, + variation: 0, + description: "fence: LPLP", + template: "\\left( #1[M]\\right( ", + }, + TemplateDef { + selector: 9, + variation: 1, + description: "fence: RPLP", + template: "\\left) #1[M]\\right( ", + }, + TemplateDef { + selector: 9, + variation: 2, + description: "fence: LBLP", + template: "\\left[ #1[M]\\right( ", + }, + TemplateDef { + selector: 9, + variation: 3, + description: "fence: RBLP", + template: "\\left] #1[M]\\right( ", + }, + TemplateDef { + selector: 9, + variation: 16, + description: "fence: LPRP", + template: "\\left( #1[M]\\right) ", + }, + TemplateDef { + selector: 9, + variation: 17, + description: "fence: RPRP", + template: "\\left) #1[M]\\right) ", + }, + TemplateDef { + selector: 9, + variation: 18, + description: "fence: LBRP", + template: "\\left[ #1[M]\\right) ", + }, + TemplateDef { + selector: 9, + variation: 19, + description: "fence: RBRP", + template: "\\left] #1[M]\\right) ", + }, + TemplateDef { + selector: 9, + variation: 32, + description: "fence: LPLB", + template: "\\left( #1[M]\\right[ ", + }, + TemplateDef { + selector: 9, + variation: 33, + description: "fence: RPLB", + template: "\\left) #1[M]\\right[ ", + }, + TemplateDef { + selector: 9, + variation: 34, + description: "fence: LBLB", + template: "\\left[ #1[M]\\right[ ", + }, + TemplateDef { + selector: 9, + variation: 35, + description: "fence: RBLB", + template: "\\left] #1[M]\\right[ ", + }, + TemplateDef { + selector: 9, + variation: 48, + description: "fence: LPRB", + template: "\\left( #1[M]\\right] ", + }, + TemplateDef { + selector: 9, + variation: 49, + description: "fence: RPRB", + template: "\\left) #1[M]\\right] ", + }, + TemplateDef { + selector: 9, + variation: 50, + description: "fence: LBRB", + template: "\\left[ #1[M]\\right] ", + }, + TemplateDef { + selector: 9, + variation: 51, + description: "fence: RBRB", + template: "\\left] #1[M]\\right] ", + }, + TemplateDef { + selector: 10, + variation: 0, + description: "root: sqroot", + template: "\\sqrt{#1[M]} ", + }, + TemplateDef { + selector: 10, + variation: 1, + description: "root: nthroot", + template: "\\sqrt[#2[M]]{#1[M]} ", + }, + TemplateDef { + selector: 11, + variation: 0, + description: "fract: tmfract", + template: "\\frac{#1[M]}{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 1, + description: "fract: smfract", + template: "\\frac{#1[M]}{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 2, + description: "fract: slfract", + template: "{#1[M]}/{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 3, + description: "fract: slfract", + template: "{#1[M]}/{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 4, + description: "fract: slfract", + template: "{#1[M]}/{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 5, + description: "fract: smfract", + template: "\\frac{#1[M]}{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 6, + description: "fract: slfract", + template: "{#1[M]}/{#2[M]} ", + }, + TemplateDef { + selector: 11, + variation: 7, + description: "fract: slfract", + template: "{#1[M]}/{#2[M]} ", + }, + TemplateDef { + selector: 12, + variation: 0, + description: "ubar: subar", + template: "\\underline{#1[M]} ", + }, + TemplateDef { + selector: 12, + variation: 1, + description: "ubar: dubar", + template: "\\underline{\\underline{#1[M]}} ", + }, + TemplateDef { + selector: 13, + variation: 0, + description: "obar: sobar", + template: "\\overline{#1[M]} ", + }, + TemplateDef { + selector: 13, + variation: 1, + description: "obar: dobar", + template: "\\overline{\\overline{#1[M]}} ", + }, + TemplateDef { + selector: 14, + variation: 0, + description: "larrow: box on top", + template: "\\stackrel{#1[M]}{\\longleftarrow} ", + }, + TemplateDef { + selector: 14, + variation: 1, + description: "larrow: box below ", + template: "\\stackunder{#1[M]}{\\longleftarrow} ", + }, + TemplateDef { + selector: 14, + variation: 0, + description: "rarrow: box on top", + template: "\\stackrel{#1[M]}{\\longrightarrow} ", + }, + TemplateDef { + selector: 14, + variation: 1, + description: "rarrow: box below ", + template: "\\stackunder{#1[M]}{\\longrightarrow} ", + }, + TemplateDef { + selector: 14, + variation: 0, + description: "barrow: box on top", + template: "\\stackrel{#1[M]}{\\longleftrightarrow} ", + }, + TemplateDef { + selector: 14, + variation: 1, + description: "barrow: box below ", + template: "\\stackunder{#1[M]}{\\longleftrightarrow} ", + }, + TemplateDef { + selector: 15, + variation: 0, + description: "integrals: single - no limits", + template: "\\int #1[M] ", + }, + TemplateDef { + selector: 15, + variation: 1, + description: "integrals: single - both", + template: "\\int\\nolimits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 15, + variation: 2, + description: "integrals: double - both", + template: "\\iint\\nolimits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 15, + variation: 3, + description: "integrals: triple - both", + template: "\\iiint\\nolimits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 15, + variation: 4, + description: "integrals: contour - no limits", + template: "\\oint #1[M] ", + }, + TemplateDef { + selector: 15, + variation: 8, + description: "integrals: contour - no limits", + template: "\\oint #1[M] ", + }, + TemplateDef { + selector: 15, + variation: 12, + description: "integrals: contour - no limits", + template: "\\oint #1[M] ", + }, + TemplateDef { + selector: 16, + variation: 0, + description: "sum: limits top/bottom - both", + template: "\\sum\\limits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 17, + variation: 0, + description: "product: limits top/bottom - both", + template: "\\prod\\limits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 18, + variation: 0, + description: "coproduct: limits top/bottom - both", + template: "\\dcoprod\\limits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 19, + variation: 0, + description: "union: limits top/bottom - both", + template: "\\dbigcup\\limits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 20, + variation: 0, + description: "intersection: limits top/bottom - both", + template: "\\dbigcap\\limits#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 21, + variation: 0, + description: "integrals: single - both", + template: "\\int#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 22, + variation: 0, + description: "sum: single - both", + template: "\\sum#2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP]#1[M] ", + }, + TemplateDef { + selector: 23, + variation: 0, + description: "limit: both", + template: "#1 #2[L][STARTSUB][ENDSUB]#3[L][STARTSUP][ENDSUP] ", + }, + TemplateDef { + selector: 24, + variation: 0, + description: "horizontal brace: lower", + template: "\\stackunder{#2[M]}{\\underbrace{#1[M]}} ", + }, + TemplateDef { + selector: 24, + variation: 1, + description: "horizontal brace: upper", + template: "\\stackrel{#2[M]}{\\overbrace{#1[M]}} ", + }, + TemplateDef { + selector: 25, + variation: 0, + description: "horizontal brace: lower", + template: "\\stackunder{#2[M]}{\\underbrace{#1[M]}} ", + }, + TemplateDef { + selector: 25, + variation: 1, + description: "horizontal brace: upper", + template: "\\stackrel{#2[M]}{\\overbrace{#1[M]}} ", + }, + TemplateDef { + selector: 25, + variation: 0, + description: "hbracket", + template: " ", + }, + TemplateDef { + selector: 27, + variation: 0, + description: "script: sub", + template: "#1[L][STARTSUB][ENDSUB] ", + }, + TemplateDef { + selector: 27, + variation: 1, + description: "script: sub", + template: "#1[L][STARTSUB][ENDSUB] ", + }, + TemplateDef { + selector: 28, + variation: 0, + description: "script: super", + template: "#2[L][STARTSUP][ENDSUP] ", + }, + TemplateDef { + selector: 28, + variation: 1, + description: "script: super", + template: "#2[L][STARTSUP][ENDSUP] ", + }, + TemplateDef { + selector: 29, + variation: 0, + description: "script: subsup", + template: "#1[L][STARTSUB][ENDSUB]#2[L][STARTSUP][ENDSUP] ", + }, +]; + +impl TemplateParser { + /// Find template by selector and variation + pub fn find_template(selector: u8, variation: u16) -> Option<&'static TemplateDef> { + MTEF_TEMPLATES + .iter() + .find(|t| t.selector == selector && t.variation == variation) + } + + /// Parse template arguments and apply formatting + /// + /// Parses the template string using rtf2latex2e template format and substitutes + /// the provided arguments. Template format uses LaTeX commands with placeholders + /// like #1[M], #2[L], etc. and special markers [STARTSUB], [ENDSUB], etc. + pub fn parse_template_arguments<'a>(template: &str, args: &TemplateArgs<'a>) -> MathNode<'a> { + let mut result = String::new(); + let mut chars = template.chars().peekable(); + + while let Some(ch) = chars.next() { + if ch == '#' { + // Parse argument placeholder like #1[M] or #2[L] + if let Some(digit) = chars.next().and_then(|c| c.to_digit(10)) { + let arg_index = digit as usize - 1; // Convert to 0-based index + + // Skip the mode specifier in brackets, e.g., [M] or [L] + if chars.next() == Some('[') { + for c in chars.by_ref() { + if c == ']' { + break; + } + } + } + + // Substitute the argument + if arg_index < args.len() { + // Convert argument nodes to text for simple substitution + let mut arg_text = String::new(); + for node in &args[arg_index] { + match node { + MathNode::Text(text) => arg_text.push_str(text), + MathNode::Number(num) => arg_text.push_str(num), + MathNode::Symbol(sym) => { + if let Some(unicode) = sym.unicode { + arg_text.push(unicode); + } else { + arg_text.push_str(&sym.name); + } + }, + _ => arg_text.push('?'), // Placeholder for complex nodes + } + } + result.push_str(&arg_text); + } + } else { + result.push('#'); + } + } else if ch == '[' { + // Handle special markers like [STARTSUB], [ENDSUB], etc. + let mut marker = String::new(); + for c in chars.by_ref() { + if c == ']' { + break; + } + marker.push(c); + } + + match marker.as_str() { + "STARTSUB" => result.push_str("_{"), + "ENDSUB" => result.push('}'), + "STARTSUP" => result.push_str("^{"), + "ENDSUP" => result.push('}'), + _ => { + // Unknown marker, keep as is + result.push('['); + result.push_str(&marker); + result.push(']'); + }, + } + } else { + result.push(ch); + } + } + + // Try to recognize common LaTeX patterns and convert to AST nodes + Self::parse_latex_to_ast(&result, args) + } + + /// Parse LaTeX string back to AST nodes for common patterns + /// + /// Recognizes common LaTeX constructs (fractions, roots, operators, etc.) + /// and converts them back to proper AST nodes instead of plain text. + fn parse_latex_to_ast<'a>(latex: &str, args: &TemplateArgs<'a>) -> MathNode<'a> { + let latex = latex.trim(); + + // Fraction: \frac{numerator}{denominator} + if latex.starts_with("\\frac{") + && latex.contains("}{") + && latex[latex.find("}{").unwrap() + 2..].find('}').is_some() + { + // Try to find the actual nodes from args + let mut numerator = Vec::new(); + let mut denominator = Vec::new(); + + // Simple heuristic: first arg is numerator, second is denominator + if args.len() >= 2 { + numerator = args[0].iter().cloned().collect(); + denominator = args[1].iter().cloned().collect(); + } + + return MathNode::Frac { + numerator, + denominator, + line_thickness: None, + frac_type: None, + }; + } + + // Root: \sqrt[index]{base} or \sqrt{base} + if latex.starts_with("\\sqrt") { + if latex.starts_with("\\sqrt[") { + if let Some(rel_pos) = latex.strip_prefix("\\sqrt[").and_then(|s| s.find("]{")) { + let abs_pos = 6 + rel_pos; + if latex[abs_pos + 2..].find('}').is_some() { + let mut base = Vec::new(); + let mut index = Vec::new(); + + if !args.is_empty() { + base = args[0].iter().cloned().collect(); + } + if args.len() >= 2 { + index = args[1].iter().cloned().collect(); + } + + return MathNode::Root { + base, + index: Some(index), + }; + } + } + } else if latex.starts_with("\\sqrt{") && latex[6..].find('}').is_some() { + let mut base = Vec::new(); + if !args.is_empty() { + base = args[0].iter().cloned().collect(); + } + + return MathNode::Root { base, index: None }; + } + } + + // Large operators with limits + if latex.contains("\\sum") || latex.contains("\\prod") || latex.contains("\\int") { + let operator = if latex.contains("\\sum") { + LargeOperator::Sum + } else if latex.contains("\\prod") { + LargeOperator::Product + } else { + LargeOperator::Integral + }; + + let mut lower_limit = None; + let mut upper_limit = None; + let mut integrand = None; + + // Extract limits from _{...}^{...} patterns + if let Some(sub_start) = latex.find("_{") + && latex[sub_start + 2..].find('}').is_some() + && args.len() >= 2 + { + lower_limit = Some(args[1].iter().cloned().collect()); + } + + if let Some(sup_start) = latex.find("^{") + && latex[sup_start + 2..].find('}').is_some() + && args.len() >= 3 + { + upper_limit = Some(args[2].iter().cloned().collect()); + } + + if !args.is_empty() { + integrand = Some(args[0].iter().cloned().collect()); + } + + return MathNode::LargeOp { + operator, + lower_limit, + upper_limit, + integrand, + hide_lower: false, + hide_upper: false, + }; + } + + // Subscripts and superscripts + if latex.contains("_{") && latex.contains("^{") { + // Both sub and superscript + let mut base = Vec::new(); + let mut subscript = Vec::new(); + let mut superscript = Vec::new(); + + if !args.is_empty() { + base = args[0].iter().cloned().collect(); + } + if args.len() >= 2 { + subscript = args[1].iter().cloned().collect(); + } + if args.len() >= 3 { + superscript = args[2].iter().cloned().collect(); + } + + return MathNode::SubSup { + base, + subscript, + superscript, + }; + } else if latex.contains("_{") { + // Subscript only + let mut base = Vec::new(); + let mut subscript = Vec::new(); + + if !args.is_empty() { + base = args[0].iter().cloned().collect(); + } + if args.len() >= 2 { + subscript = args[1].iter().cloned().collect(); + } + + return MathNode::Sub { base, subscript }; + } else if latex.contains("^{") { + // Superscript only + let mut base = Vec::new(); + let mut exponent = Vec::new(); + + if !args.is_empty() { + base = args[0].iter().cloned().collect(); + } + if args.len() >= 2 { + exponent = args[1].iter().cloned().collect(); + } + + return MathNode::Power { base, exponent }; + } + + // Fences: \left...\right... + if latex.contains("\\left") && latex.contains("\\right") { + // Extract content between \left and \right + if let Some(left_pos) = latex.find("\\left") + && let Some(right_pos) = latex.find("\\right") + { + let content_start = latex[left_pos..] + .find('{') + .map(|p| left_pos + p + 1) + .unwrap_or(left_pos + 6); + let content_end = right_pos; + + if content_start < content_end { + // Determine fence type + let open_fence = if latex.contains("\\left(") { + Fence::Paren + } else if latex.contains("\\left[") { + Fence::Bracket + } else if latex.contains("\\left{") { + Fence::Brace + } else if latex.contains("\\left|") { + Fence::Pipe + } else { + Fence::Paren // default + }; + + let close_fence = if latex.contains("\\right)") { + Fence::Paren + } else if latex.contains("\\right]") { + Fence::Bracket + } else if latex.contains("\\right}") { + Fence::Brace + } else if latex.contains("\\right|") { + Fence::Pipe + } else { + Fence::Paren // default + }; + + let mut content = Vec::new(); + if !args.is_empty() { + content = args[0].iter().cloned().collect(); + } + + return MathNode::Fenced { + open: open_fence, + content, + close: close_fence, + separator: None, + }; + } + } + } + + // Default: return as text + MathNode::Text(latex.to_string().into()) + } + /// Parse a fraction template + pub fn parse_fraction<'a>( + numerator: Vec>, + denominator: Vec>, + ) -> MathNode<'a> { + MathNode::Frac { + numerator, + denominator, + line_thickness: None, + frac_type: None, + } + } + + /// Parse a slash template (inline fraction) + /// + /// Public API for potential external use or future MTEF features + #[allow(dead_code)] // Part of public template parsing API + pub fn parse_slash<'a>( + numerator: Vec>, + denominator: Vec>, + ) -> MathNode<'a> { + MathNode::Row(vec![ + MathNode::Row(numerator), + MathNode::Operator(Operator::Divide), + MathNode::Row(denominator), + ]) + } + + /// Parse a root template + pub fn parse_root<'a>( + base: Vec>, + index: Option>>, + ) -> MathNode<'a> { + MathNode::Root { base, index } + } + + /// Parse a subscript template + pub fn parse_subscript<'a>( + base: Vec>, + subscript: Vec>, + ) -> MathNode<'a> { + MathNode::Sub { base, subscript } + } + + /// Parse a superscript template + pub fn parse_superscript<'a>( + base: Vec>, + superscript: Vec>, + ) -> MathNode<'a> { + MathNode::Power { + base, + exponent: superscript, + } + } + + /// Parse a subscript-superscript template + pub fn parse_subsup<'a>( + base: Vec>, + subscript: Vec>, + superscript: Vec>, + ) -> MathNode<'a> { + MathNode::SubSup { + base, + subscript, + superscript, + } + } + + /// Parse an underscript template + /// + /// Public API for potential external use or future MTEF features + #[allow(dead_code)] // Part of public template parsing API + pub fn parse_below<'a>(base: Vec>, script: Vec>) -> MathNode<'a> { + MathNode::Under { + base, + under: script, + position: None, + } + } + + /// Parse an overscript template + /// + /// Public API for potential external use or future MTEF features + #[allow(dead_code)] // Part of public template parsing API + pub fn parse_above<'a>(base: Vec>, script: Vec>) -> MathNode<'a> { + MathNode::Over { + base, + over: script, + position: None, + } + } + + /// Parse an underscript-overscript template + /// + /// Public API for potential external use or future MTEF features + #[allow(dead_code)] // Part of public template parsing API + pub fn parse_below_above<'a>( + base: Vec>, + below: Vec>, + above: Vec>, + ) -> MathNode<'a> { + MathNode::UnderOver { + base, + under: below, + over: above, + position: None, + } + } + + /// Parse a large operator template + pub fn parse_large_op<'a>( + operator: LargeOperator, + lower_limit: Vec>, + upper_limit: Vec>, + integrand: Vec>, + ) -> MathNode<'a> { + MathNode::LargeOp { + operator, + lower_limit: if lower_limit.is_empty() { + None + } else { + Some(lower_limit) + }, + upper_limit: if upper_limit.is_empty() { + None + } else { + Some(upper_limit) + }, + integrand: if integrand.is_empty() { + None + } else { + Some(integrand) + }, + hide_lower: false, + hide_upper: false, + } + } + + /// Parse a fence template + pub fn parse_fence<'a>(fence: Fence, content: Vec>) -> MathNode<'a> { + MathNode::Fenced { + open: fence, + content, + close: fence, + separator: None, + } + } + + /// Get large operator from template selector + /// + /// Maps MTEF template selectors to corresponding large operator types. + /// Some selectors may map to the same operator type (e.g., multiple integral variants). + /// + /// Public API for template system, may be used by custom template handlers + #[allow(dead_code)] // Part of public template mapping API + pub fn large_op_from_selector(selector: u8) -> Option { + match selector { + 15 => Some(LargeOperator::Integral), // TMPL_INTOP: integrals (single, double, triple, contour) + 16 => Some(LargeOperator::Sum), // TMPL_SUM: summation + 17 => Some(LargeOperator::Product), // TMPL_PROD: product + 18 => Some(LargeOperator::Coproduct), // TMPL_COPROD: coproduct + 19 => Some(LargeOperator::Union), // TMPL_UNION: union + 20 => Some(LargeOperator::Intersection), // TMPL_INTER: intersection + 21 => Some(LargeOperator::Integral), // TMPL_IINTOP: single integral with limits + 22 => Some(LargeOperator::Sum), // TMPL_IIINTOP: single sum with limits + 23 => Some(LargeOperator::Integral), // TMPL_OINTOP: contour integral / limit template + _ => None, + } + } + + /// Get fence from template selector + /// + /// Maps MTEF template selectors to corresponding fence types. + /// + /// Public API for template system, may be used by custom template handlers + #[allow(dead_code)] // Part of public template mapping API + pub fn fence_from_selector(selector: u8) -> Option { + match selector { + 1 => Some(Fence::Paren), // TMPL_PAREN: parentheses + 3 => Some(Fence::Bracket), // TMPL_BRACKET: square brackets + 2 => Some(Fence::Brace), // TMPL_BRACE: curly braces + 4 => Some(Fence::Pipe), // TMPL_BAR: vertical bars + 5 => Some(Fence::DoublePipe), // TMPL_DBAR: double vertical bars + _ => None, + } + } +} diff --git a/src/formula/omml/attributes.rs b/crates/litchi-formula/src/omml/attributes.rs similarity index 99% rename from src/formula/omml/attributes.rs rename to crates/litchi-formula/src/omml/attributes.rs index a6e9d60..ec2f031 100644 --- a/src/formula/omml/attributes.rs +++ b/crates/litchi-formula/src/omml/attributes.rs @@ -1,8 +1,8 @@ -use crate::formula::ast::{ +use crate::ast::{ BreakType, FractionType, LineStyle, Position, ShapeType, StrikeStyle, VerticalAlignment, *, }; -use crate::formula::omml::elements::ElementProperties; -use crate::formula::omml::lookup::*; +use crate::omml::elements::ElementProperties; +use crate::omml::lookup::*; /// SIMD-accelerated numeric parsing functions /// Fast integer parsing using atoi_simd diff --git a/src/formula/omml/elements.rs b/crates/litchi-formula/src/omml/elements.rs similarity index 99% rename from src/formula/omml/elements.rs rename to crates/litchi-formula/src/omml/elements.rs index 08dc005..e7d6207 100644 --- a/src/formula/omml/elements.rs +++ b/crates/litchi-formula/src/omml/elements.rs @@ -1,4 +1,4 @@ -use crate::formula::ast::{AccentType, Fence, LargeOperator, MathNode, MatrixFence}; +use crate::ast::{AccentType, Fence, LargeOperator, MathNode, MatrixFence}; /// Element types in OMML /// diff --git a/crates/litchi-formula/src/omml/error.rs b/crates/litchi-formula/src/omml/error.rs new file mode 100644 index 0000000..5440004 --- /dev/null +++ b/crates/litchi-formula/src/omml/error.rs @@ -0,0 +1,120 @@ +/// Errors that can occur during OMML parsing +#[derive(Debug)] +#[non_exhaustive] +pub enum OmmlError { + XmlError(String), + ParseError(String), + InvalidStructure(String), + ValidationError(String), + UnsupportedFeature(String), + EncodingError(String), + DepthLimitExceeded(usize), + MalformedElement(String), + MissingRequiredElement(String), + InvalidAttribute(String), + ArenaAllocationError(String), +} + +impl std::fmt::Display for OmmlError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + OmmlError::XmlError(msg) => write!(f, "XML parsing error: {}", msg), + OmmlError::ParseError(msg) => write!(f, "OMML parse error: {}", msg), + OmmlError::InvalidStructure(msg) => write!(f, "Invalid OMML structure: {}", msg), + OmmlError::ValidationError(msg) => write!(f, "OMML validation error: {}", msg), + OmmlError::UnsupportedFeature(msg) => write!(f, "Unsupported OMML feature: {}", msg), + OmmlError::EncodingError(msg) => write!(f, "Text encoding error: {}", msg), + OmmlError::DepthLimitExceeded(limit) => { + write!(f, "XML depth limit exceeded: {}", limit) + }, + OmmlError::MalformedElement(msg) => write!(f, "Malformed element: {}", msg), + OmmlError::MissingRequiredElement(msg) => { + write!(f, "Missing required element: {}", msg) + }, + OmmlError::InvalidAttribute(msg) => write!(f, "Invalid attribute: {}", msg), + OmmlError::ArenaAllocationError(msg) => write!(f, "Arena allocation error: {}", msg), + } + } +} + +impl std::error::Error for OmmlError {} + +impl From for OmmlError { + fn from(err: std::str::Utf8Error) -> Self { + OmmlError::EncodingError(format!("UTF-8 decoding error: {}", err)) + } +} + +impl From for OmmlError { + fn from(err: bumpalo::AllocErr) -> Self { + OmmlError::ArenaAllocationError(format!("Arena allocation failed: {}", err)) + } +} + +impl From for OmmlError { + fn from(err: quick_xml::Error) -> Self { + OmmlError::XmlError(format!("Quick XML error: {}", err)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_omml_error_display() { + let err = OmmlError::XmlError("xml test".to_string()); + assert!(err.to_string().contains("XML parsing error")); + assert!(err.to_string().contains("xml test")); + + let err = OmmlError::ParseError("parse test".to_string()); + assert!(err.to_string().contains("OMML parse error")); + assert!(err.to_string().contains("parse test")); + + let err = OmmlError::InvalidStructure("structure test".to_string()); + assert!(err.to_string().contains("Invalid OMML structure")); + + let err = OmmlError::ValidationError("validation test".to_string()); + assert!(err.to_string().contains("OMML validation error")); + + let err = OmmlError::UnsupportedFeature("feature test".to_string()); + assert!(err.to_string().contains("Unsupported OMML feature")); + + let err = OmmlError::EncodingError("encoding test".to_string()); + assert!(err.to_string().contains("Text encoding error")); + + let err = OmmlError::DepthLimitExceeded(100); + assert!(err.to_string().contains("XML depth limit exceeded")); + assert!(err.to_string().contains("100")); + + let err = OmmlError::MalformedElement("malformed test".to_string()); + assert!(err.to_string().contains("Malformed element")); + + let err = OmmlError::MissingRequiredElement("missing test".to_string()); + assert!(err.to_string().contains("Missing required element")); + + let err = OmmlError::InvalidAttribute("invalid attr".to_string()); + assert!(err.to_string().contains("Invalid attribute")); + + let err = OmmlError::ArenaAllocationError("arena test".to_string()); + assert!(err.to_string().contains("Arena allocation error")); + } + + #[test] + fn test_omml_error_from_utf8_error() { + // Create an invalid UTF-8 sequence + let invalid_utf8 = vec![0x80, 0x81, 0x82]; + let utf8_err = std::str::from_utf8(&invalid_utf8).unwrap_err(); + let err: OmmlError = utf8_err.into(); + assert!(matches!(err, OmmlError::EncodingError(_))); + assert!(err.to_string().contains("UTF-8")); + } + + #[test] + fn test_omml_error_debug() { + let err = OmmlError::ParseError("test".to_string()); + let debug_str = format!("{:?}", err); + assert!(debug_str.contains("ParseError")); + assert!(debug_str.contains("test")); + } +} diff --git a/src/formula/omml/handlers/accent.rs b/crates/litchi-formula/src/omml/handlers/accent.rs similarity index 89% rename from src/formula/omml/handlers/accent.rs rename to crates/litchi-formula/src/omml/handlers/accent.rs index 892438c..776151d 100644 --- a/src/formula/omml/handlers/accent.rs +++ b/crates/litchi-formula/src/omml/handlers/accent.rs @@ -1,11 +1,9 @@ // Accent element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::{ - get_attribute_value, parse_accent_type, parse_position_type, -}; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_accent_properties; +use crate::ast::*; +use crate::omml::attributes::{get_attribute_value, parse_accent_type, parse_position_type}; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_accent_properties; use quick_xml::events::BytesStart; /// Handler for accent elements diff --git a/src/formula/omml/handlers/bar.rs b/crates/litchi-formula/src/omml/handlers/bar.rs similarity index 86% rename from src/formula/omml/handlers/bar.rs rename to crates/litchi-formula/src/omml/handlers/bar.rs index 317600c..84b57d6 100644 --- a/src/formula/omml/handlers/bar.rs +++ b/crates/litchi-formula/src/omml/handlers/bar.rs @@ -1,8 +1,8 @@ // Bar element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::parse_position_type; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::attributes::parse_position_type; +use crate::omml::elements::ElementContext; /// Handler for bar elements pub struct BarHandler; diff --git a/src/formula/omml/handlers/border_box.rs b/crates/litchi-formula/src/omml/handlers/border_box.rs similarity index 91% rename from src/formula/omml/handlers/border_box.rs rename to crates/litchi-formula/src/omml/handlers/border_box.rs index 99f897b..e9e3510 100644 --- a/src/formula/omml/handlers/border_box.rs +++ b/crates/litchi-formula/src/omml/handlers/border_box.rs @@ -1,7 +1,7 @@ // Border box element handler -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::elements::ElementContext; /// Handler for border box elements pub struct BorderBoxHandler; diff --git a/src/formula/omml/handlers/box_handler.rs b/crates/litchi-formula/src/omml/handlers/box_handler.rs similarity index 90% rename from src/formula/omml/handlers/box_handler.rs rename to crates/litchi-formula/src/omml/handlers/box_handler.rs index 208cf57..ec79539 100644 --- a/src/formula/omml/handlers/box_handler.rs +++ b/crates/litchi-formula/src/omml/handlers/box_handler.rs @@ -1,7 +1,7 @@ // Box element handler -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::elements::ElementContext; /// Handler for box elements pub struct BoxHandler; diff --git a/src/formula/omml/handlers/char_handler.rs b/crates/litchi-formula/src/omml/handlers/char_handler.rs similarity index 92% rename from src/formula/omml/handlers/char_handler.rs rename to crates/litchi-formula/src/omml/handlers/char_handler.rs index 96adacd..5377a40 100644 --- a/src/formula/omml/handlers/char_handler.rs +++ b/crates/litchi-formula/src/omml/handlers/char_handler.rs @@ -1,7 +1,7 @@ // Character element handler -use crate::formula::omml::attributes::get_attribute_value_borrowed; -use crate::formula::omml::elements::ElementContext; +use crate::omml::attributes::get_attribute_value_borrowed; +use crate::omml::elements::ElementContext; /// Handler for character elements (used within properties) pub struct CharHandler; diff --git a/src/formula/omml/handlers/chr.rs b/crates/litchi-formula/src/omml/handlers/chr.rs similarity index 94% rename from src/formula/omml/handlers/chr.rs rename to crates/litchi-formula/src/omml/handlers/chr.rs index 26232c1..b67ae3a 100644 --- a/src/formula/omml/handlers/chr.rs +++ b/crates/litchi-formula/src/omml/handlers/chr.rs @@ -1,6 +1,6 @@ // Handler for character property element (chr) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for character property element (chr) #[allow(dead_code)] // Handler implementation, used via the char_handler module diff --git a/crates/litchi-formula/src/omml/handlers/components.rs b/crates/litchi-formula/src/omml/handlers/components.rs new file mode 100644 index 0000000..b931a48 --- /dev/null +++ b/crates/litchi-formula/src/omml/handlers/components.rs @@ -0,0 +1,210 @@ +// Component element handlers + +use crate::omml::elements::{ElementContext, ElementType}; +use crate::omml::utils::extend_vec_efficient; + +/// Handler for numerator elements +pub struct NumeratorHandler; + +impl NumeratorHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Fraction { + parent.numerator = Some(context.children.clone()); + } else { + // Pass children up if not in a fraction context + extend_vec_efficient(&mut parent.children, context.children.clone()); + } + } + } +} + +/// Handler for denominator elements +pub struct DenominatorHandler; + +impl DenominatorHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Fraction { + parent.denominator = Some(context.children.clone()); + } else { + // Pass children up if not in a fraction context + extend_vec_efficient(&mut parent.children, context.children.clone()); + } + } + } +} + +/// Handler for degree elements (for radicals) +pub struct DegreeHandler; + +impl DegreeHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Radical { + parent.degree = Some(context.children.clone()); + } else { + // Pass children up if not in a radical context + extend_vec_efficient(&mut parent.children, context.children.clone()); + } + } + } +} + +/// Handler for base elements +pub struct BaseHandler; + +impl BaseHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + match parent.element_type { + ElementType::Superscript | ElementType::Subscript | ElementType::SubSup => { + parent.base = Some(context.children.clone()); + }, + ElementType::Radical => { + parent.base = Some(context.children.clone()); + }, + ElementType::Accent | ElementType::Bar | ElementType::GroupChar => { + parent.base = Some(context.children.clone()); + }, + ElementType::Nary => { + // For n-ary operators, the e element is the integrand + parent.integrand = Some(context.children.clone()); + }, + ElementType::EqArr => { + // For equation arrays, each e element is a row + parent.eq_array_rows.push(context.children.clone()); + }, + _ => { + // Pass children up for other contexts + extend_vec_efficient(&mut parent.children, context.children.clone()); + }, + } + } + } +} + +/// Handler for lower limit elements +pub struct LowerLimitHandler; + +impl LowerLimitHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Nary { + parent.lower_limit = Some(context.children.clone()); + } else { + // Pass children up if not in a nary context + extend_vec_efficient(&mut parent.children, context.children.clone()); + } + } + } +} + +/// Handler for upper limit elements +pub struct UpperLimitHandler; + +impl UpperLimitHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Nary { + parent.upper_limit = Some(context.children.clone()); + } else { + // Pass children up if not in a nary context + extend_vec_efficient(&mut parent.children, context.children.clone()); + } + } + } +} + +/// Handler for integrand elements +pub struct IntegrandHandler; + +impl IntegrandHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Nary { + parent.integrand = Some(context.children.clone()); + } else { + // Pass children up if not in a nary context + crate::omml::utils::extend_vec_efficient( + &mut parent.children, + context.children.clone(), + ); + } + } + } +} + +/// Handler for upper limit elements (limUpp) +pub struct LimUppHandler; + +impl LimUppHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Nary { + parent.upper_limit = Some(context.children.clone()); + } else { + // If not in nary context, treat as overset + crate::omml::utils::extend_vec_efficient( + &mut parent.children, + context.children.clone(), + ); + } + } + } +} + +/// Handler for lower limit elements (limLow) +pub struct LimLowHandler; + +impl LimLowHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: component handlers move owned Vec between contexts + ) { + if let Some(parent) = parent_context { + if parent.element_type == ElementType::Nary { + parent.lower_limit = Some(context.children.clone()); + } else { + // If not in nary context, treat as underset + crate::omml::utils::extend_vec_efficient( + &mut parent.children, + context.children.clone(), + ); + } + } + } +} diff --git a/src/formula/omml/handlers/ctrl_props.rs b/crates/litchi-formula/src/omml/handlers/ctrl_props.rs similarity index 92% rename from src/formula/omml/handlers/ctrl_props.rs rename to crates/litchi-formula/src/omml/handlers/ctrl_props.rs index d912e51..e439d22 100644 --- a/src/formula/omml/handlers/ctrl_props.rs +++ b/crates/litchi-formula/src/omml/handlers/ctrl_props.rs @@ -1,6 +1,6 @@ // Control properties element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for control properties elements (ctrlPr) pub struct CtrlPropsHandler; diff --git a/src/formula/omml/handlers/delim.rs b/crates/litchi-formula/src/omml/handlers/delim.rs similarity index 92% rename from src/formula/omml/handlers/delim.rs rename to crates/litchi-formula/src/omml/handlers/delim.rs index 644d0a9..5b8ea2d 100644 --- a/src/formula/omml/handlers/delim.rs +++ b/crates/litchi-formula/src/omml/handlers/delim.rs @@ -1,9 +1,9 @@ // Delimiter element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::{get_attribute_value, parse_fence_type}; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_delimiter_properties; +use crate::ast::*; +use crate::omml::attributes::{get_attribute_value, parse_fence_type}; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_delimiter_properties; use quick_xml::events::BytesStart; /// Handler for delimiter (fenced) elements diff --git a/src/formula/omml/handlers/eq_arr.rs b/crates/litchi-formula/src/omml/handlers/eq_arr.rs similarity index 95% rename from src/formula/omml/handlers/eq_arr.rs rename to crates/litchi-formula/src/omml/handlers/eq_arr.rs index 39cdf4d..55f9307 100644 --- a/src/formula/omml/handlers/eq_arr.rs +++ b/crates/litchi-formula/src/omml/handlers/eq_arr.rs @@ -1,11 +1,11 @@ // Equation array element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::{ +use crate::ast::*; +use crate::omml::attributes::{ get_attribute_value, get_attribute_value_float, get_attribute_value_int, }; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_eq_arr_properties; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_eq_arr_properties; use quick_xml::events::BytesStart; /// Handler for equation array elements diff --git a/src/formula/omml/handlers/eq_arr_pr.rs b/crates/litchi-formula/src/omml/handlers/eq_arr_pr.rs similarity index 87% rename from src/formula/omml/handlers/eq_arr_pr.rs rename to crates/litchi-formula/src/omml/handlers/eq_arr_pr.rs index b176ed4..2c21f72 100644 --- a/src/formula/omml/handlers/eq_arr_pr.rs +++ b/crates/litchi-formula/src/omml/handlers/eq_arr_pr.rs @@ -1,6 +1,6 @@ // Equation array properties element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for equation array properties elements #[allow(dead_code)] // Handler implementation, reserved for property parsing @@ -17,7 +17,7 @@ impl EqArrPrHandler { // Store the parsed properties in the parent context parent.properties = context.properties.clone(); // Pass children up - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.children, context.children.clone(), ); diff --git a/src/formula/omml/handlers/fraction.rs b/crates/litchi-formula/src/omml/handlers/fraction.rs similarity index 92% rename from src/formula/omml/handlers/fraction.rs rename to crates/litchi-formula/src/omml/handlers/fraction.rs index 24bc335..da9fc00 100644 --- a/src/formula/omml/handlers/fraction.rs +++ b/crates/litchi-formula/src/omml/handlers/fraction.rs @@ -1,8 +1,8 @@ // Fraction element handler -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_fraction_properties; +use crate::ast::*; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_fraction_properties; use quick_xml::events::BytesStart; /// Handler for fraction elements diff --git a/src/formula/omml/handlers/function.rs b/crates/litchi-formula/src/omml/handlers/function.rs similarity index 95% rename from src/formula/omml/handlers/function.rs rename to crates/litchi-formula/src/omml/handlers/function.rs index 47686c3..1896341 100644 --- a/src/formula/omml/handlers/function.rs +++ b/crates/litchi-formula/src/omml/handlers/function.rs @@ -1,7 +1,7 @@ // Function element handlers -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::elements::ElementContext; use std::borrow::Cow; /// Handler for function elements diff --git a/src/formula/omml/handlers/group_char.rs b/crates/litchi-formula/src/omml/handlers/group_char.rs similarity index 90% rename from src/formula/omml/handlers/group_char.rs rename to crates/litchi-formula/src/omml/handlers/group_char.rs index 5f16568..6202528 100644 --- a/src/formula/omml/handlers/group_char.rs +++ b/crates/litchi-formula/src/omml/handlers/group_char.rs @@ -1,11 +1,9 @@ // Group character element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::{ - get_attribute_value, parse_position_type, parse_vertical_alignment, -}; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_group_char_properties; +use crate::ast::*; +use crate::omml::attributes::{get_attribute_value, parse_position_type, parse_vertical_alignment}; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_group_char_properties; use quick_xml::events::BytesStart; use std::borrow::Cow; diff --git a/src/formula/omml/handlers/group_chr_pr.rs b/crates/litchi-formula/src/omml/handlers/group_chr_pr.rs similarity index 94% rename from src/formula/omml/handlers/group_chr_pr.rs rename to crates/litchi-formula/src/omml/handlers/group_chr_pr.rs index 43851c6..06d58d8 100644 --- a/src/formula/omml/handlers/group_chr_pr.rs +++ b/crates/litchi-formula/src/omml/handlers/group_chr_pr.rs @@ -1,6 +1,6 @@ // Handler for group character properties (groupChrPr) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for group character properties (groupChrPr) pub struct GroupChrPrHandler; diff --git a/src/formula/omml/handlers/limit.rs b/crates/litchi-formula/src/omml/handlers/limit.rs similarity index 83% rename from src/formula/omml/handlers/limit.rs rename to crates/litchi-formula/src/omml/handlers/limit.rs index 1983600..726d147 100644 --- a/src/formula/omml/handlers/limit.rs +++ b/crates/litchi-formula/src/omml/handlers/limit.rs @@ -1,6 +1,6 @@ // Limit element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for limit elements pub struct LimitHandler; @@ -13,7 +13,7 @@ impl LimitHandler { ) { if let Some(parent) = parent_context { // Limits are handled by the specific handlers above - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.children, context.children.clone(), ); diff --git a/src/formula/omml/handlers/lit.rs b/crates/litchi-formula/src/omml/handlers/lit.rs similarity index 93% rename from src/formula/omml/handlers/lit.rs rename to crates/litchi-formula/src/omml/handlers/lit.rs index 9fb82dc..ac93226 100644 --- a/src/formula/omml/handlers/lit.rs +++ b/crates/litchi-formula/src/omml/handlers/lit.rs @@ -1,6 +1,6 @@ // Handler for literal text property element (lit) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for literal text property (m:lit) pub struct LitHandler; diff --git a/crates/litchi-formula/src/omml/handlers/matrix.rs b/crates/litchi-formula/src/omml/handlers/matrix.rs new file mode 100644 index 0000000..66ac98b --- /dev/null +++ b/crates/litchi-formula/src/omml/handlers/matrix.rs @@ -0,0 +1,102 @@ +// Matrix element handlers + +use crate::ast::*; +use crate::omml::attributes::{get_attribute_value, parse_matrix_fence}; +use crate::omml::elements::{ElementContext, ElementType}; +use crate::omml::properties::parse_matrix_properties; +use quick_xml::events::BytesStart; + +/// Handler for matrix elements +pub struct MatrixHandler; + +impl MatrixHandler { + pub fn handle_start<'arena>( + elem: &BytesStart, + context: &mut ElementContext<'arena>, + _arena: &'arena bumpalo::Bump, // Unused: matrix elements are owned Vec, no string allocation + ) { + let attrs: Vec<_> = elem.attributes().filter_map(|a| a.ok()).collect(); + + // Parse matrix column spacing (mcs) attribute using SIMD-accelerated parsing + let fence_val = get_attribute_value(&attrs, "mcs"); + context.matrix_fence = parse_matrix_fence(fence_val.as_deref()); + + // Parse matrix properties + context.properties = parse_matrix_properties(&attrs); + } + + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: matrix elements are owned Vec, no string allocation + ) { + let fence_type = context.matrix_fence.unwrap_or(MatrixFence::None); + let rows = std::mem::take(&mut context.matrix_rows); + + // Create matrix properties from context + let properties = + if context.properties.matrix_alignment.is_some() + || context.properties.matrix_row_spacing.is_some() + || context.properties.matrix_column_spacing.is_some() + { + Some(MatrixProperties { + base_alignment: context.properties.matrix_alignment.as_ref().and_then(|s| { + match s.as_str() { + "top" => Some(Alignment::Top), + "center" | "cen" => Some(Alignment::Center), + "bottom" | "bot" => Some(Alignment::Bottom), + "baseline" | "base" => Some(Alignment::Baseline), + _ => None, + } + }), + column_gap: context + .properties + .matrix_column_spacing + .as_ref() + .and_then(|s| s.parse().ok()), + row_spacing: context + .properties + .matrix_row_spacing + .as_ref() + .and_then(|s| s.parse().ok()), + column_spacing: None, // Would need more complex parsing + }) + } else { + None + }; + + let node = MathNode::Matrix { + rows, + fence_type, + properties, + }; + + if let Some(parent) = parent_context { + parent.children.push(node); + } + } +} + +/// Handler for matrix row elements +pub struct MatrixRowHandler; + +impl MatrixRowHandler { + pub fn handle_end<'arena>( + context: &mut ElementContext<'arena>, + parent_context: Option<&mut ElementContext<'arena>>, + _arena: &'arena bumpalo::Bump, // Unused: matrix elements are owned Vec, no string allocation + ) { + if let Some(parent) = parent_context + && parent.element_type == ElementType::Matrix + { + // Matrix row - collect cells from children + // Each child represents a cell (mtd element) + let mut row = Vec::new(); + for child in &context.children { + // Each child is a cell containing mathematical content + row.push(vec![child.clone()]); + } + parent.matrix_rows.push(row); + } + } +} diff --git a/src/formula/omml/handlers/matrix_cell.rs b/crates/litchi-formula/src/omml/handlers/matrix_cell.rs similarity index 87% rename from src/formula/omml/handlers/matrix_cell.rs rename to crates/litchi-formula/src/omml/handlers/matrix_cell.rs index 6ebbe6a..ae18dce 100644 --- a/src/formula/omml/handlers/matrix_cell.rs +++ b/crates/litchi-formula/src/omml/handlers/matrix_cell.rs @@ -1,6 +1,6 @@ // Matrix cell element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for matrix cell elements #[allow(dead_code)] // Handler implementation, used by matrix row handler @@ -16,7 +16,7 @@ impl MatrixCellHandler { if let Some(parent) = parent_context { // Matrix cells are processed by the matrix row handler // Here we just pass children up to the matrix row - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.children, context.children.clone(), ); diff --git a/src/formula/omml/handlers/mod.rs b/crates/litchi-formula/src/omml/handlers/mod.rs similarity index 100% rename from src/formula/omml/handlers/mod.rs rename to crates/litchi-formula/src/omml/handlers/mod.rs diff --git a/src/formula/omml/handlers/nary.rs b/crates/litchi-formula/src/omml/handlers/nary.rs similarity index 90% rename from src/formula/omml/handlers/nary.rs rename to crates/litchi-formula/src/omml/handlers/nary.rs index 01ad545..12f5341 100644 --- a/src/formula/omml/handlers/nary.rs +++ b/crates/litchi-formula/src/omml/handlers/nary.rs @@ -1,9 +1,9 @@ // N-ary operator element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::{get_attribute_value, parse_large_operator}; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_nary_properties; +use crate::ast::*; +use crate::omml::attributes::{get_attribute_value, parse_large_operator}; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_nary_properties; use quick_xml::events::BytesStart; /// Handler for n-ary operator elements diff --git a/src/formula/omml/handlers/nor.rs b/crates/litchi-formula/src/omml/handlers/nor.rs similarity index 93% rename from src/formula/omml/handlers/nor.rs rename to crates/litchi-formula/src/omml/handlers/nor.rs index f075e00..f17899c 100644 --- a/src/formula/omml/handlers/nor.rs +++ b/crates/litchi-formula/src/omml/handlers/nor.rs @@ -1,6 +1,6 @@ // Handler for normal text property element (nor) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for normal text property (m:nor) pub struct NorHandler; diff --git a/src/formula/omml/handlers/phantom.rs b/crates/litchi-formula/src/omml/handlers/phantom.rs similarity index 89% rename from src/formula/omml/handlers/phantom.rs rename to crates/litchi-formula/src/omml/handlers/phantom.rs index 069bd53..cb9e37e 100644 --- a/src/formula/omml/handlers/phantom.rs +++ b/crates/litchi-formula/src/omml/handlers/phantom.rs @@ -1,7 +1,7 @@ // Phantom element handler -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::elements::ElementContext; /// Handler for phantom elements pub struct PhantomHandler; diff --git a/src/formula/omml/handlers/pos.rs b/crates/litchi-formula/src/omml/handlers/pos.rs similarity index 93% rename from src/formula/omml/handlers/pos.rs rename to crates/litchi-formula/src/omml/handlers/pos.rs index 794d146..7221064 100644 --- a/src/formula/omml/handlers/pos.rs +++ b/crates/litchi-formula/src/omml/handlers/pos.rs @@ -1,6 +1,6 @@ // Handler for position property element (pos) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for position property element (pos) pub struct PosHandler; diff --git a/src/formula/omml/handlers/post_script.rs b/crates/litchi-formula/src/omml/handlers/post_script.rs similarity index 84% rename from src/formula/omml/handlers/post_script.rs rename to crates/litchi-formula/src/omml/handlers/post_script.rs index c8ad561..0d8ecb5 100644 --- a/src/formula/omml/handlers/post_script.rs +++ b/crates/litchi-formula/src/omml/handlers/post_script.rs @@ -1,6 +1,6 @@ // Post-script element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for post-script elements pub struct PostScriptHandler; @@ -13,7 +13,7 @@ impl PostScriptHandler { ) { if let Some(parent) = parent_context { // Post-scripts are handled by the superscript/subscript elements - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.post_scripts, context.children.clone(), ); diff --git a/src/formula/omml/handlers/pre_script.rs b/crates/litchi-formula/src/omml/handlers/pre_script.rs similarity index 83% rename from src/formula/omml/handlers/pre_script.rs rename to crates/litchi-formula/src/omml/handlers/pre_script.rs index 399c7df..292506e 100644 --- a/src/formula/omml/handlers/pre_script.rs +++ b/crates/litchi-formula/src/omml/handlers/pre_script.rs @@ -1,6 +1,6 @@ // Pre-script element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for pre-script elements pub struct PreScriptHandler; @@ -13,7 +13,7 @@ impl PreScriptHandler { ) { if let Some(parent) = parent_context { // Pre-scripts are handled by the superscript/subscript elements - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.pre_scripts, context.children.clone(), ); diff --git a/src/formula/omml/handlers/radical.rs b/crates/litchi-formula/src/omml/handlers/radical.rs similarity index 88% rename from src/formula/omml/handlers/radical.rs rename to crates/litchi-formula/src/omml/handlers/radical.rs index 449b4c0..63dce5b 100644 --- a/src/formula/omml/handlers/radical.rs +++ b/crates/litchi-formula/src/omml/handlers/radical.rs @@ -1,7 +1,7 @@ // Radical element handler -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::elements::ElementContext; /// Handler for radical (root) elements pub struct RadicalHandler; diff --git a/src/formula/omml/handlers/run_props.rs b/crates/litchi-formula/src/omml/handlers/run_props.rs similarity index 96% rename from src/formula/omml/handlers/run_props.rs rename to crates/litchi-formula/src/omml/handlers/run_props.rs index f2a9e60..9dfcb41 100644 --- a/src/formula/omml/handlers/run_props.rs +++ b/crates/litchi-formula/src/omml/handlers/run_props.rs @@ -1,6 +1,6 @@ // Run properties element handler -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for run properties elements (rPr) pub struct RunPropsHandler; diff --git a/src/formula/omml/handlers/scr.rs b/crates/litchi-formula/src/omml/handlers/scr.rs similarity index 93% rename from src/formula/omml/handlers/scr.rs rename to crates/litchi-formula/src/omml/handlers/scr.rs index a2356fd..370f205 100644 --- a/src/formula/omml/handlers/scr.rs +++ b/crates/litchi-formula/src/omml/handlers/scr.rs @@ -1,6 +1,6 @@ // Handler for script/style property element (scr) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for script/style property (m:scr) pub struct ScrHandler; diff --git a/src/formula/omml/handlers/script.rs b/crates/litchi-formula/src/omml/handlers/script.rs similarity index 84% rename from src/formula/omml/handlers/script.rs rename to crates/litchi-formula/src/omml/handlers/script.rs index 872486c..5aaf6b3 100644 --- a/src/formula/omml/handlers/script.rs +++ b/crates/litchi-formula/src/omml/handlers/script.rs @@ -1,7 +1,7 @@ // Script element handlers -use crate::formula::ast::*; -use crate::formula::omml::elements::ElementContext; +use crate::ast::*; +use crate::omml::elements::ElementContext; /// Handler for superscript elements pub struct SuperscriptHandler; @@ -79,16 +79,16 @@ impl SuperscriptElementHandler { ) { if let Some(parent) = parent_context { match parent.element_type { - crate::formula::omml::elements::ElementType::Superscript - | crate::formula::omml::elements::ElementType::SubSup => { + crate::omml::elements::ElementType::Superscript + | crate::omml::elements::ElementType::SubSup => { parent.superscript = Some(context.children.clone()); }, - crate::formula::omml::elements::ElementType::Nary => { + crate::omml::elements::ElementType::Nary => { parent.upper_limit = Some(context.children.clone()); }, _ => { // Pass children up for other contexts - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.children, context.children.clone(), ); @@ -109,16 +109,16 @@ impl SubscriptElementHandler { ) { if let Some(parent) = parent_context { match parent.element_type { - crate::formula::omml::elements::ElementType::Subscript - | crate::formula::omml::elements::ElementType::SubSup => { + crate::omml::elements::ElementType::Subscript + | crate::omml::elements::ElementType::SubSup => { parent.subscript = Some(context.children.clone()); }, - crate::formula::omml::elements::ElementType::Nary => { + crate::omml::elements::ElementType::Nary => { parent.lower_limit = Some(context.children.clone()); }, _ => { // Pass children up for other contexts - crate::formula::omml::utils::extend_vec_efficient( + crate::omml::utils::extend_vec_efficient( &mut parent.children, context.children.clone(), ); diff --git a/src/formula/omml/handlers/spacing.rs b/crates/litchi-formula/src/omml/handlers/spacing.rs similarity index 86% rename from src/formula/omml/handlers/spacing.rs rename to crates/litchi-formula/src/omml/handlers/spacing.rs index 9aba485..983f68f 100644 --- a/src/formula/omml/handlers/spacing.rs +++ b/crates/litchi-formula/src/omml/handlers/spacing.rs @@ -1,9 +1,9 @@ // Spacing element handler -use crate::formula::ast::*; -use crate::formula::omml::attributes::{get_attribute_value, parse_space_type}; -use crate::formula::omml::elements::ElementContext; -use crate::formula::omml::properties::parse_spacing_properties; +use crate::ast::*; +use crate::omml::attributes::{get_attribute_value, parse_space_type}; +use crate::omml::elements::ElementContext; +use crate::omml::properties::parse_spacing_properties; use quick_xml::events::BytesStart; /// Handler for spacing elements diff --git a/src/formula/omml/handlers/sty.rs b/crates/litchi-formula/src/omml/handlers/sty.rs similarity index 94% rename from src/formula/omml/handlers/sty.rs rename to crates/litchi-formula/src/omml/handlers/sty.rs index fc715f0..ce7f196 100644 --- a/src/formula/omml/handlers/sty.rs +++ b/crates/litchi-formula/src/omml/handlers/sty.rs @@ -1,6 +1,6 @@ // Handler for style property element (sty) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for style property (m:sty) pub struct StyHandler; diff --git a/src/formula/omml/handlers/vert_jc.rs b/crates/litchi-formula/src/omml/handlers/vert_jc.rs similarity index 93% rename from src/formula/omml/handlers/vert_jc.rs rename to crates/litchi-formula/src/omml/handlers/vert_jc.rs index 06f8b9a..c6201bf 100644 --- a/src/formula/omml/handlers/vert_jc.rs +++ b/crates/litchi-formula/src/omml/handlers/vert_jc.rs @@ -1,6 +1,6 @@ // Handler for vertical justification property element (vertJc) -use crate::formula::omml::elements::ElementContext; +use crate::omml::elements::ElementContext; /// Handler for vertical justification property element (vertJc) pub struct VertJcHandler; diff --git a/src/formula/omml/lookup.rs b/crates/litchi-formula/src/omml/lookup.rs similarity index 99% rename from src/formula/omml/lookup.rs rename to crates/litchi-formula/src/omml/lookup.rs index 32f4ffd..c4a4d67 100644 --- a/src/formula/omml/lookup.rs +++ b/crates/litchi-formula/src/omml/lookup.rs @@ -1,5 +1,5 @@ use super::elements::ElementType; -use crate::formula::ast::{ +use crate::ast::{ AccentType, Alignment, FunctionName, LargeOperator, Operator, PredefinedSymbol, StyleType, }; use phf::{phf_map, phf_set}; diff --git a/crates/litchi-formula/src/omml/mod.rs b/crates/litchi-formula/src/omml/mod.rs new file mode 100644 index 0000000..a4725cf --- /dev/null +++ b/crates/litchi-formula/src/omml/mod.rs @@ -0,0 +1,1134 @@ +/// OMML attribute parsing +/// +/// This module handles parsing of OMML element attributes and properties. +mod attributes; +/// OMML element types and context +mod elements; +/// OMML Error Types +/// +/// This module defines all error types that can occur during OMML parsing. +mod error; +/// OMML element handlers +/// +/// This module contains handlers for specific OMML elements that require +/// complex parsing logic. Each handler is organized into separate modules +/// for better maintainability. +mod handlers; +/// Performance-optimized lookup tables for OMML parsing +/// +/// This module provides compile-time generated perfect hash function (PHF) +/// lookup tables for fast element and attribute name resolution. +mod lookup; +/// OMML Parser Implementation +/// +/// This module contains the main OMML parsing logic with performance optimizations. +mod parser; +/// OMML property parsing +/// +/// This module handles parsing of OMML property elements and attributes. +/// Properties control styling, spacing, alignment, and other formatting aspects. +mod properties; +/// OMML utility functions and performance optimizations +/// +/// This module provides utility functions for OMML parsing, including +/// performance optimizations, string processing, and helper functions. +mod utils; + +use crate::ast::MathNode; + +pub use error::OmmlError; +/// Re-export public API +pub use parser::OmmlParser; + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::{AccentType, Fence, Formula, LargeOperator}; + + #[test] + fn test_parse_simple_text() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#"x"#; + let nodes = parser.parse(xml).unwrap(); + + assert_eq!(nodes.len(), 1); + match &nodes[0] { + MathNode::Text(text) => assert_eq!(text.as_ref(), "x"), + _ => panic!("Expected text node"), + } + } + + #[test] + fn test_parse_multiple_text_runs() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + a + b + c + "#; + let nodes = parser.parse(xml).unwrap(); + + assert_eq!(nodes.len(), 3); + for (i, node) in nodes.iter().enumerate() { + match node { + MathNode::Text(text) => { + let expected = match i { + 0 => "a", + 1 => "b", + 2 => "c", + _ => unreachable!(), + }; + assert_eq!(text.as_ref(), expected); + }, + _ => panic!("Expected text node at position {}", i), + } + } + } + + #[test] + fn test_parse_fraction() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + 1 + 2 + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Frac { + numerator, + denominator, + .. + } => { + assert!(!numerator.is_empty()); + assert!(!denominator.is_empty()); + }, + _ => panic!("Expected fraction node"), + } + } + + #[test] + fn test_parse_fraction_with_properties() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + noBar + a + b + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Frac { + numerator, + denominator, + .. + } => { + assert!(!numerator.is_empty()); + assert!(!denominator.is_empty()); + }, + _ => panic!("Expected fraction node"), + } + } + + #[test] + fn test_parse_delimiter() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + ( + ) + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Fenced { open, close, .. } => { + assert_eq!(*open, Fence::Paren); + assert_eq!(*close, Fence::Paren); + }, + _ => panic!("Expected fenced node"), + } + } + + #[test] + fn test_parse_delimiter_brackets() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + [ + ] + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Fenced { open, close, .. } => { + assert_eq!(*open, Fence::Bracket); + assert_eq!(*close, Fence::Bracket); + }, + _ => panic!("Expected fenced node"), + } + } + + #[test] + fn test_parse_function() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + sin + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Function { name, .. } => { + assert_eq!(name.as_ref(), "sin"); + }, + _ => panic!("Expected function node"), + } + } + + #[test] + fn test_parse_function_complex() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + log + + + x + 2 + + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Function { name, argument } => { + assert_eq!(name.as_ref(), "log"); + assert!(!argument.is_empty()); + }, + _ => panic!("Expected function node"), + } + } + + #[test] + fn test_parse_accent() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + ^ + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Accent { accent, .. } => { + assert_eq!(*accent, AccentType::Hat); + }, + _ => panic!("Expected accent node"), + } + } + + #[test] + fn test_parse_accent_bar() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Accent { accent, .. } => { + assert_eq!(*accent, AccentType::Bar); + }, + _ => panic!("Expected accent node"), + } + } + + #[test] + fn test_parse_bar() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Bar { .. } => { + // Bar is represented as Bar node + }, + _ => panic!("Expected bar node"), + } + } + + #[test] + fn test_parse_nary_with_limits() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + i=1 + n + ai + + "#; + + let nodes = match parser.parse(xml) { + Ok(nodes) => nodes, + Err(e) => { + println!("Parse error: {:?}", e); + panic!("Parse failed: {:?}", e); + }, + }; + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::LargeOp { + operator, + lower_limit, + upper_limit, + .. + } => { + assert_eq!(*operator, LargeOperator::Sum); + assert!(lower_limit.is_some()); + assert!(upper_limit.is_some()); + }, + _ => panic!("Expected large operator node"), + } + } + + #[test] + fn test_parse_nary_integral() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + 0 + 1 + x2 + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::LargeOp { + operator, + lower_limit, + upper_limit, + .. + } => { + assert_eq!(*operator, LargeOperator::Integral); + assert!(lower_limit.is_some()); + assert!(upper_limit.is_some()); + }, + _ => panic!("Expected large operator node"), + } + } + + #[test] + fn test_parse_superscript() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + 2 + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Power { base, exponent } => { + assert!(!base.is_empty()); + assert!(!exponent.is_empty()); + }, + _ => panic!("Expected power node"), + } + } + + #[test] + fn test_parse_subscript() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + i + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Sub { base, subscript } => { + assert!(!base.is_empty()); + assert!(!subscript.is_empty()); + }, + _ => panic!("Expected sub node"), + } + } + + #[test] + fn test_parse_subsup() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + i + 2 + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::SubSup { + base, + subscript, + superscript, + } => { + assert!(!base.is_empty()); + assert!(!subscript.is_empty()); + assert!(!superscript.is_empty()); + }, + _ => panic!("Expected subsup node"), + } + } + + #[test] + fn test_parse_radical() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + 2 + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Root { base, index } => { + assert!(!base.is_empty()); + assert!(index.is_some()); + }, + _ => panic!("Expected root node"), + } + } + + #[test] + fn test_parse_radical_simple() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Root { base, index } => { + assert!(!base.is_empty()); + assert!(index.is_none()); + }, + _ => panic!("Expected root node"), + } + } + + #[test] + fn test_parse_matrix() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + + 2 + center + + + + + + a + b + + + c + d + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Matrix { rows, .. } => { + assert!(!rows.is_empty()); + }, + _ => panic!("Expected matrix node"), + } + } + + #[test] + fn test_parse_box() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Fenced { open, close, .. } => { + assert_eq!(*open, Fence::None); + assert_eq!(*close, Fence::None); + }, + _ => panic!("Expected fenced node"), + } + } + + #[test] + fn test_parse_phantom() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Phantom(content) => { + assert!(!content.is_empty()); + }, + _ => panic!("Expected phantom node"), + } + } + + #[test] + fn test_parse_border_box() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Fenced { open, close, .. } => { + assert_eq!(*open, Fence::None); + assert_eq!(*close, Fence::None); + }, + _ => panic!("Expected fenced node"), + } + } + + #[test] + fn test_parse_equation_array() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + center + + a=b + c=d + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::EqArray { rows, .. } => { + assert!(!rows.is_empty()); + }, + _ => panic!("Expected equation array node"), + } + } + + #[test] + fn test_parse_group_char() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + { + top + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::GroupChar { .. } => { + // Group char is represented as GroupChar node + }, + _ => panic!("Expected group char node"), + } + } + + #[test] + fn test_parse_spacing() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + a + + thickmathspace + + b + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + // Should contain text nodes and spacing + } + + #[test] + fn test_parse_complex_expression() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + sin + + + x + + + y + 2 + + + + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Function { name, argument } => { + assert_eq!(name.as_ref(), "sin"); + assert!(!argument.is_empty()); + }, + _ => panic!("Expected function node"), + } + } + + #[test] + fn test_parse_empty_math() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#""#; + let result = parser.parse(xml); + assert!(result.is_err()); // Empty math should fail validation + } + + #[test] + fn test_parse_invalid_xml() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#""#; + let result = parser.parse(xml); + assert!(result.is_err()); // Unknown elements should result in empty math which fails validation + } + + #[test] + fn test_parse_malformed_xml() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#"unclosed"#; + let result = parser.parse(xml); + assert!(result.is_err()); // Should return error for malformed XML + } + + #[test] + fn test_parse_unicode_characters() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + α + β + + "#; + let nodes = parser.parse(xml).unwrap(); + + assert_eq!(nodes.len(), 3); + match (&nodes[0], &nodes[1], &nodes[2]) { + (MathNode::Text(a), MathNode::Text(b), MathNode::Text(c)) => { + assert_eq!(a.as_ref(), "α"); + assert_eq!(b.as_ref(), "β"); + assert_eq!(c.as_ref(), "∑"); + }, + _ => panic!("Expected text nodes"), + } + } + + #[test] + fn test_parse_complex_nested_expression() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + sin + + + x + y + + + + + 2 + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + // Verify the structure contains a power node + match &nodes[0] { + MathNode::Power { base, exponent } => { + assert!(!base.is_empty()); + assert!(!exponent.is_empty()); + }, + _ => panic!("Expected power node"), + } + } + + #[test] + fn test_parse_matrix_with_properties() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + + 2 + center + + + + + + a + b + + + c + d + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Matrix { rows, .. } => { + assert_eq!(rows.len(), 2); + assert_eq!(rows[0].len(), 2); + assert_eq!(rows[1].len(), 2); + }, + _ => panic!("Expected matrix node"), + } + } + + #[test] + fn test_parse_nary_with_complex_limits() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + + + i + 0 + + + n + + + x + i + + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::LargeOp { + operator, + lower_limit, + upper_limit, + integrand, + .. + } => { + assert_eq!(*operator, LargeOperator::Sum); + assert!(lower_limit.is_some()); + assert!(upper_limit.is_some()); + assert!(integrand.is_some()); + }, + _ => panic!("Expected large operator node"), + } + } + + #[test] + fn test_parse_accent_with_position() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + + + v + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Accent { accent, base, .. } => { + assert_eq!(*accent, AccentType::Vec); + assert!(!base.is_empty()); + }, + _ => panic!("Expected accent node"), + } + } + + #[test] + fn test_parse_group_character_with_position() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + { + top + center + + + + a + b + + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::GroupChar { + base, + character, + position, + vertical_alignment, + } => { + assert!(!base.is_empty()); + assert_eq!(character.as_deref(), Some("{")); + assert_eq!(*position, Some(crate::ast::Position::Top)); + assert_eq!( + *vertical_alignment, + Some(crate::ast::VerticalAlignment::Center) + ); + }, + _ => panic!("Expected group character node"), + } + } + + #[test] + fn test_parse_phantom_element() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Phantom(content) => { + assert!(!content.is_empty()); + }, + _ => panic!("Expected phantom node"), + } + } + + #[test] + fn test_parse_radical_with_degree() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + 3 + x + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Root { base, index } => { + assert!(!base.is_empty()); + assert!(index.is_some()); + }, + _ => panic!("Expected root node with index"), + } + } + + #[test] + fn test_parse_spacing_element() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + a + + thickmathspace + + b + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(nodes.len() >= 2); // Should have at least text and spacing + } + + #[test] + fn test_validation_empty_math() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = ""; + let result = parser.parse(xml); + assert!(result.is_err()); + } + + #[test] + fn test_validation_malformed_xml() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = ""; + let result = parser.parse(xml); + assert!(result.is_err()); + } + + #[test] + fn test_validation_invalid_nesting() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + // Numerator outside of fraction + let xml = r#" + 1 + "#; + let result = parser.parse(xml); + assert!(result.is_err()); + } + + #[test] + fn test_validation_missing_required_elements() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + // Fraction with empty numerator + let xml = r#" + + + 2 + + "#; + let result = parser.parse(xml); + assert!(result.is_err()); + } + + #[test] + fn test_predefined_symbols() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + α + β + + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert_eq!(nodes.len(), 4); + } + + #[test] + fn test_deep_nesting_limit() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + // Create deeply nested XML that exceeds the limit + let mut xml = "".to_string(); + for _ in 0..1010 { + xml.push_str(""); + } + xml.push_str("x"); + for _ in 0..1010 { + xml.push_str("1"); + } + xml.push_str(""); + + let result = parser.parse(&xml); + assert!(result.is_err()); + } + + #[test] + fn test_run_properties() { + let formula = Formula::new(); + let parser = OmmlParser::new(formula.arena()); + + let xml = r#" + + + bi + p + Times New Roman + 1 + + text + + "#; + + let nodes = parser.parse(xml).unwrap(); + assert!(!nodes.is_empty()); + match &nodes[0] { + MathNode::Run { + content, + literal, + style, + font, + .. + } => { + assert!(!content.is_empty()); + assert!(literal.unwrap_or(false)); + assert_eq!(*style, Some(crate::StyleType::BoldItalic)); + assert_eq!(font.as_deref(), Some("Times New Roman")); + }, + _ => panic!("Expected run node"), + } + } +} diff --git a/crates/litchi-formula/src/omml/parser.rs b/crates/litchi-formula/src/omml/parser.rs new file mode 100644 index 0000000..5a2c3ab --- /dev/null +++ b/crates/litchi-formula/src/omml/parser.rs @@ -0,0 +1,687 @@ +use crate::ast::{MathNode, StrikeStyle}; +use crate::omml::attributes::*; +use crate::omml::elements::*; +use crate::omml::error::OmmlError; +use crate::omml::handlers::*; +use crate::omml::lookup::*; +use crate::omml::properties::*; +use crate::omml::utils::{validate_element_nesting, validate_omml_structure, *}; +use quick_xml::Reader; +use quick_xml::events::{BytesStart, Event}; +use std::borrow::Cow; + +/// OMML parser that converts OMML XML to our formula AST +pub struct OmmlParser<'arena> { + arena: &'arena bumpalo::Bump, +} + +impl<'arena> OmmlParser<'arena> { + /// Create a new OMML parser with the given arena + pub fn new(arena: &'arena bumpalo::Bump) -> Self { + Self { arena } + } + + /// Parse OMML from a string + /// + /// # Example + /// ```ignore + /// let formula = Formula::new(); + /// let parser = OmmlParser::new(formula.arena()); + /// let nodes = parser.parse("x")?; + /// ``` + pub fn parse(&self, xml: &str) -> Result>, OmmlError> { + // Validate input + if xml.trim().is_empty() { + return Err(OmmlError::InvalidStructure("Empty XML input".to_string())); + } + + let mut reader = Reader::from_str(xml); + reader.config_mut().trim_text(true); + + // Use high-performance element stack with capacity hint and context pooling + let mut stack = ElementStack::with_capacity(64); + let mut context_pool = ContextPool::new(32); + let mut result = Vec::new(); + let mut depth = 0; + const MAX_DEPTH: usize = 1000; // Prevent stack overflow attacks + + loop { + match reader.read_event() { + Ok(Event::Start(ref e)) => { + depth += 1; + if depth > MAX_DEPTH { + return Err(OmmlError::InvalidStructure(format!( + "Maximum XML depth {} exceeded", + MAX_DEPTH + ))); + } + self.handle_start_element(e, &mut stack, &mut context_pool)?; + }, + Ok(Event::End(ref e)) => { + let name = e.local_name(); + self.handle_end_element( + name.as_ref(), + &mut stack, + &mut result, + &mut context_pool, + )?; + depth = depth.saturating_sub(1); + }, + Ok(Event::Text(ref e)) => { + self.handle_text_element(e, &mut stack)?; + }, + Ok(Event::CData(ref e)) => { + self.handle_cdata_element(e, &mut stack)?; + }, + Ok(Event::Empty(ref e)) => { + // Handle self-closing tags + self.handle_empty_element(e, &mut stack, &mut result, &mut context_pool)?; + }, + Ok(Event::Eof) => break, + Err(e) => { + let position = reader.buffer_position(); + return Err(OmmlError::XmlError(format!( + "XML parsing error at position {}: {}", + position, e + ))); + }, + _ => {}, // Skip other events (comments, processing instructions, etc.) + } + } + + // Validate that we have a properly closed document + if depth != 0 { + return Err(OmmlError::InvalidStructure(format!( + "Unclosed elements detected, final depth: {}", + depth + ))); + } + + // Validate result structure + if result.is_empty() && !xml.contains(", + context_pool: &mut ContextPool<'arena>, + ) -> Result<(), OmmlError> { + let name = elem.local_name(); + let name_str = + std::str::from_utf8(name.as_ref()).map_err(|e| OmmlError::ParseError(e.to_string()))?; + let element_type = get_element_type(name_str); + + // Handle context-dependent element types + let element_type = match (name_str, stack.last().map(|ctx| ctx.element_type)) { + ("e" | "m:e", Some(ElementType::Nary)) => ElementType::Integrand, + ("e" | "m:e", Some(ElementType::Radical)) => ElementType::Base, + ( + "e" | "m:e", + Some(ElementType::Superscript) + | Some(ElementType::Subscript) + | Some(ElementType::SubSup), + ) => ElementType::Base, + ("e" | "m:e", Some(ElementType::Fraction)) => ElementType::Denominator, // Actually, fraction has num/den, but e might be used differently + ("e" | "m:e", Some(ElementType::MatrixRow)) => ElementType::MatrixCell, // Matrix cells within rows + _ => element_type, + }; + + // Validate element nesting + let parent_type = stack.last().map(|ctx| ctx.element_type); + validate_element_nesting(&element_type, parent_type.as_ref())?; + + // Create new context for this element using the context pool + let mut context = context_pool.get(element_type); + + // Parse attributes using SIMD-accelerated parsing with caching + let attrs: Vec<_> = elem.attributes().filter_map(|a| a.ok()).collect(); + + // Performance optimization: Don't store attributes in context as they're never used + // This eliminates a clone() and unsafe transmute on every element + + // Element-specific attribute parsing using handlers + // Performance optimization: Only parse properties that are actually needed by each element type + // Avoids 40+ String allocations per element in the default case + match element_type { + ElementType::Delimiter => { + DelimiterHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Nary => { + NaryHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Accent => { + AccentHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Matrix => { + MatrixHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Fraction => { + FractionHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::GroupChar => { + GroupCharHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::EqArr => { + EqArrHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Spacing => { + SpacingHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::MatrixCell => { + // Matrix cells don't need special start handling + }, + ElementType::EqArrPr => { + // Equation array properties - no special start handling needed + }, + ElementType::Limit => { + // Limit elements - no special start handling needed + }, + ElementType::PreScript => { + // Pre-script elements - no special start handling needed + }, + ElementType::PostScript => { + // Post-script elements - no special start handling needed + }, + ElementType::Properties => { + // Parse properties based on the element name + context.properties = match name_str { + "dPr" | "m:dPr" => parse_delimiter_properties(&attrs), + "fPr" | "m:fPr" => parse_fraction_properties(&attrs), + "naryPr" | "m:naryPr" => parse_nary_properties(&attrs), + "accPr" | "m:accPr" => parse_accent_properties(&attrs), + "radPr" | "m:radPr" => parse_radical_properties(&attrs), + "sSupPr" | "m:sSupPr" => parse_general_properties(&attrs), + "sSubPr" | "m:sSubPr" => parse_general_properties(&attrs), + "funcPr" | "m:funcPr" => parse_general_properties(&attrs), + "limPr" | "m:limPr" => parse_limit_properties(&attrs), + "barPr" | "m:barPr" => parse_bar_properties(&attrs), + "boxPr" | "m:boxPr" => parse_box_properties(&attrs), + "borderBoxPr" | "m:borderBoxPr" => parse_border_box_properties(&attrs), + "phantomPr" | "m:phantomPr" => parse_phantom_properties(&attrs), + "spacingPr" | "m:spacingPr" => parse_spacing_properties(&attrs), + _ => parse_general_properties(&attrs), + }; + }, + ElementType::AccentProperties => { + context.properties = parse_accent_properties(&attrs); + }, + _ => { + // Default case: Don't eagerly parse properties (major performance win!) + // Properties will be parsed on-demand by handlers if needed. + // This eliminates 40+ String allocations per element that were never used. + // Most elements (Text, Run, Base, etc.) don't need any properties at all. + }, + } + + stack.push(context); + Ok(()) + } + + fn handle_end_element( + &self, + name: &[u8], + stack: &mut ElementStack<'arena>, + result: &mut Vec>, + _context_pool: &mut ContextPool<'arena>, + ) -> Result<(), OmmlError> { + if stack.is_empty() { + return Ok(()); + } + + let name_str = + std::str::from_utf8(name).map_err(|e| OmmlError::ParseError(e.to_string()))?; + let element_type = get_element_type(name_str); + let mut context = stack.pop().unwrap(); + + // Get parent context for passing results up + let parent_context = stack.last_mut(); + + // Use element-specific handlers + match element_type { + ElementType::Math => { + // Root element - add all children to result + result.extend(context.children); + }, + ElementType::Run => { + // Check if run has any properties - if so, create a Run node + let has_properties = context.properties.run_literal.is_some() + || context.properties.math_variant.is_some() + || context.properties.run_normal_text.is_some() + || context.properties.color.is_some() + || context.properties.underline.is_some() + || context.properties.overline.is_some() + || context.properties.strike_through.is_some() + || context.properties.double_strike_through.is_some(); + + if has_properties { + // Create a Run node with properties + // Performance: Use std::mem::take to avoid cloning children + let run_node = MathNode::Run { + content: std::mem::take(&mut context.children), + literal: context.properties.run_literal, + style: context + .properties + .math_variant + .as_ref() + .and_then(|s| parse_style_value(s)), + font: context + .properties + .run_normal_text + .as_ref() + .map(|s| std::borrow::Cow::Borrowed(self.arena.alloc_str(s))), + color: context + .properties + .color + .as_ref() + .map(|s| std::borrow::Cow::Borrowed(self.arena.alloc_str(s))), + underline: context + .properties + .underline + .as_ref() + .and_then(|s| parse_line_style(Some(s))), + overline: context + .properties + .overline + .as_ref() + .and_then(|s| parse_line_style(Some(s))), + strike_through: context + .properties + .strike_through + .and_then(|b| if b { Some(StrikeStyle::Single) } else { None }), + double_strike_through: context.properties.double_strike_through, + }; + + if let Some(parent) = parent_context { + parent.children.push(run_node); + } + } else { + // No properties - pass children up directly + if let Some(parent) = parent_context { + extend_vec_efficient(&mut parent.children, context.children); + } + } + }, + ElementType::Text => { + // Create text node and pass up + if !context.text.is_empty() { + let text = intern_string(self.arena, context.text.as_str()); + let node = MathNode::Text(Cow::Borrowed(text)); + if let Some(parent) = parent_context { + parent.children.push(node); + } + // Text node recorded + } + }, + ElementType::Delimiter => { + DelimiterHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Nary => { + NaryHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Function => { + FunctionHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::FunctionName => { + FunctionNameHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Accent => { + AccentHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Bar => { + BarHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Box => { + BoxHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Phantom => { + PhantomHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Matrix => { + MatrixHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::MatrixRow => { + MatrixRowHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Fraction => { + FractionHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Radical => { + RadicalHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Superscript => { + SuperscriptHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Subscript => { + SubscriptHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::SubSup => { + SubSupHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::GroupChar => { + GroupCharHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::BorderBox => { + BorderBoxHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::EqArr => { + EqArrHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Character => { + CharHandler::handle_end(name, &mut context, parent_context, self.arena); + }, + ElementType::Spacing => { + SpacingHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Numerator => { + NumeratorHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Denominator => { + DenominatorHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Degree => { + DegreeHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Base => { + BaseHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::SuperscriptElement => { + SuperscriptElementHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::SubscriptElement => { + SubscriptElementHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::LowerLimit => { + LowerLimitHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::UpperLimit => { + UpperLimitHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::LimLow => { + LimLowHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::LimUpp => { + LimUppHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Integrand => { + IntegrandHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Limit => { + LimitHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::PreScript => { + PreScriptHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::PostScript => { + PostScriptHandler::handle_end(&mut context, parent_context, self.arena); + }, + // Handle run properties (rPr) and control properties (ctrlPr) specifically + ElementType::Properties if name_str == "rPr" || name_str == "m:rPr" => { + RunPropsHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Properties if name_str == "ctrlPr" || name_str == "m:ctrlPr" => { + CtrlPropsHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Properties if name_str == "groupChrPr" || name_str == "m:groupChrPr" => { + GroupChrPrHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Position => { + PosHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::VerticalAlignment => { + VertJcHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Lit => { + LitHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Scr => { + ScrHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Sty => { + StyHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Nor => { + NorHandler::handle_end(&mut context, parent_context, self.arena); + }, + // Handle property elements - store properties in parent and pass children up + ElementType::Properties => { + if let Some(parent) = parent_context { + // Store the parsed properties in the parent context + parent.properties = context.properties.clone(); + extend_vec_efficient(&mut parent.children, context.children); + } + }, + ElementType::AccentProperties => { + if let Some(parent) = parent_context { + // Store the parsed properties in the parent context + parent.properties = context.properties.clone(); + extend_vec_efficient(&mut parent.children, context.children); + } + }, + // Handle structural elements that just pass children up + ElementType::MatrixCell => { + if let Some(parent) = parent_context { + extend_vec_efficient(&mut parent.children, context.children); + } + }, + _ => { + // For unknown or unhandled elements, pass children up + if let Some(parent) = parent_context { + extend_vec_efficient(&mut parent.children, context.children); + } + }, + } + + Ok(()) + } + + fn handle_text_element( + &self, + event: &[u8], + stack: &mut ElementStack<'arena>, + ) -> Result<(), OmmlError> { + if let Some(context) = stack.last_mut() { + // For OMML, text content is typically plain and doesn't need unescaping + let text_str = + std::str::from_utf8(event).map_err(|e| OmmlError::ParseError(e.to_string()))?; + + // Process text efficiently + let processed_text = process_text_zero_copy(text_str); + context.text.push_str(processed_text.as_ref()); + } + + Ok(()) + } + + fn handle_cdata_element( + &self, + event: &[u8], + stack: &mut ElementStack<'arena>, + ) -> Result<(), OmmlError> { + if let Some(context) = stack.last_mut() { + // CDATA content is already unescaped by quick-xml + let text_str = + std::str::from_utf8(event).map_err(|e| OmmlError::ParseError(e.to_string()))?; + + // Process text efficiently + let processed_text = process_text_zero_copy(text_str); + context.text.push_str(processed_text.as_ref()); + } + + Ok(()) + } + + fn handle_empty_element( + &self, + elem: &BytesStart, + stack: &mut ElementStack<'arena>, + _result: &mut Vec>, + context_pool: &mut ContextPool<'arena>, + ) -> Result<(), OmmlError> { + let name = elem.local_name(); + let name_str = + std::str::from_utf8(name.as_ref()).map_err(|e| OmmlError::ParseError(e.to_string()))?; + let element_type = get_element_type(name_str); + + // For self-closing elements, we need to handle both start and end logic + let mut context = context_pool.get(element_type); + + // Parse attributes + let attrs: Vec<_> = elem.attributes().filter_map(|a| a.ok()).collect(); + + // Performance optimization: Don't store attributes in context as they're never used + // This eliminates a clone() and unsafe transmute on every element + + context.properties = parse_attributes_batch(&attrs); + + // Handle element-specific start logic + match element_type { + ElementType::Delimiter => { + DelimiterHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Nary => { + NaryHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Accent => { + AccentHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Matrix => { + MatrixHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Fraction => { + FractionHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::GroupChar => { + GroupCharHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::EqArr => { + EqArrHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::Spacing => { + SpacingHandler::handle_start(elem, &mut context, self.arena); + }, + ElementType::MatrixCell => { + // Matrix cells don't need special start handling + }, + ElementType::EqArrPr => { + // Equation array properties - no special start handling needed + }, + ElementType::Limit => { + // Limit elements - no special start handling needed + }, + ElementType::PreScript => { + // Pre-script elements - no special start handling needed + }, + ElementType::PostScript => { + // Post-script elements - no special start handling needed + }, + _ => { + // For other elements, properties are already parsed + }, + } + + // Handle element-specific end logic (since it's self-closing) + let parent_context = stack.last_mut(); + match element_type { + ElementType::Math => { + // Root element - should not be self-closing in valid OMML + return Err(OmmlError::InvalidStructure( + "Math element cannot be self-closing".to_string(), + )); + }, + ElementType::Run => { + // Pass empty content up to parent + if let Some(_parent) = parent_context { + // Empty run contributes nothing + } + }, + ElementType::Text => { + // Empty text node + if let Some(parent) = parent_context { + let text = intern_string(self.arena, ""); + let node = MathNode::Text(std::borrow::Cow::Borrowed(text)); + parent.children.push(node); + } + }, + ElementType::Delimiter => { + DelimiterHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Nary => { + NaryHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Function => { + FunctionHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Accent => { + AccentHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Bar => { + BarHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Box => { + BoxHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Phantom => { + PhantomHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Matrix => { + MatrixHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::MatrixRow => { + MatrixRowHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Fraction => { + FractionHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Radical => { + RadicalHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Superscript => { + SuperscriptHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Subscript => { + SubscriptHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::SubSup => { + SubSupHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::GroupChar => { + GroupCharHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::BorderBox => { + BorderBoxHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::EqArr => { + EqArrHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Spacing => { + SpacingHandler::handle_end(&mut context, parent_context, self.arena); + }, + ElementType::Character => { + CharHandler::handle_end(name.as_ref(), &mut context, parent_context, self.arena); + }, + _ => { + // For unknown or unhandled self-closing elements, do nothing + }, + } + + // Return context to pool for reuse + context_pool.put(context); + + Ok(()) + } +} diff --git a/crates/litchi-formula/src/omml/properties.rs b/crates/litchi-formula/src/omml/properties.rs new file mode 100644 index 0000000..1cd085e --- /dev/null +++ b/crates/litchi-formula/src/omml/properties.rs @@ -0,0 +1,884 @@ +use crate::omml::attributes::*; +use crate::omml::elements::ElementProperties; + +/// Parse run properties (m:rPr) +#[allow(dead_code)] // Used indirectly through property parsing +pub fn parse_run_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "scr" | "m:scr" => { + // Script/style type (normal, bold, italic, etc.) + properties.math_variant = Some(value.to_string()); + properties.style = Some(value.to_string()); + }, + "sty" | "m:sty" => { + // Math style (display/text) + properties.display_style = + Some(matches!(value, "d" | "display" | "1" | "true")); + properties.run_math_style = Some(value.to_string()); + }, + "nor" | "m:nor" => { + // Normal text font + properties.font = Some(value.to_string()); + properties.run_normal_text = Some(value.to_string()); + }, + "lit" | "m:lit" => { + // Literal text flag + properties.run_literal = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse fraction properties (m:fPr) +pub fn parse_fraction_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "type" | "m:type" => { + // Fraction type (bar, noBar, skewed) + properties.fraction_type = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse delimiter properties (m:dPr) +pub fn parse_delimiter_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "begChr" | "m:begChr" => { + // Beginning character + properties.delimiter_open_char = Some(value.to_string()); + }, + "endChr" | "m:endChr" => { + // Ending character + properties.delimiter_close_char = Some(value.to_string()); + }, + "sepChr" | "m:sepChr" => { + // Separator character + properties.delimiter_separator_char = Some(value.to_string()); + }, + "grow" | "m:grow" => { + // Grow to fit content + properties.delimiter_grow = Some(matches!(value, "1" | "true")); + }, + "shp" | "m:shp" => { + // Shape (centered, match) + properties.delimiter_shape = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse n-ary operator properties (m:naryPr) +pub fn parse_nary_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "chr" | "m:chr" => { + // Operator character + properties.chr = Some(value.to_string()); + }, + "grow" | "m:grow" => { + // Grow to fit content + properties.nary_operator_grow = Some(matches!(value, "1" | "true")); + }, + "subHide" | "m:subHide" => { + // Hide subscript + properties.nary_hide_sub = Some(matches!(value, "1" | "true")); + }, + "supHide" | "m:supHide" => { + // Hide superscript + properties.nary_hide_sup = Some(matches!(value, "1" | "true")); + }, + "limLoc" | "m:limLoc" => { + // Limit location (undOvr, subSup) + properties.style = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse accent properties (m:accPr) +pub fn parse_accent_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "chr" | "m:chr" => { + // Accent character + properties.chr = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse matrix properties (m:mPr) +pub fn parse_matrix_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "baseJc" | "m:baseJc" => { + // Baseline justification + properties.matrix_alignment = Some(value.to_string()); + }, + "plcHide" | "m:plcHide" => { + // Hide placeholder + properties.hide = Some(matches!(value, "1" | "true")); + }, + "rSp" | "m:rSp" => { + // Row spacing + properties.matrix_row_spacing = Some(value.to_string()); + }, + "cSp" | "m:cSp" => { + // Column spacing + properties.matrix_column_spacing = Some(value.to_string()); + }, + "cGp" | "m:cGp" => { + // Column gap + properties.spacing = Some(value.to_string()); + }, + "mcs" | "m:mcs" => { + // Matrix column spacing (complex structure) + properties.spacing = Some(value.to_string()); + }, + "mcsJc" | "m:mcsJc" => { + // Matrix column spacing justification + properties.alignment = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse group character properties (m:groupChrPr) +pub fn parse_group_char_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "chr" | "m:chr" => { + // Group character + properties.chr = Some(value.to_string()); + }, + "pos" | "m:pos" => { + // Position (top/bot) + properties.accent_position = Some(value.to_string()); + }, + "vertJc" | "m:vertJc" => { + // Vertical justification + properties.vertical_alignment = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse equation array properties (m:eqArrPr) +pub fn parse_eq_arr_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "baseJc" | "m:baseJc" => { + // Baseline justification + properties.eq_arr_base_alignment = Some(value.to_string()); + }, + "maxDist" | "m:maxDist" => { + // Maximum distance + properties.eq_arr_max_distance = Some(value.to_string()); + }, + "objDist" | "m:objDist" => { + // Object distance + properties.eq_arr_object_distance = Some(value.to_string()); + }, + "rSp" | "m:rSp" => { + // Row spacing + properties.eq_arr_row_spacing = Some(value.to_string()); + }, + "rSpRule" | "m:rSpRule" => { + // Row spacing rule + properties.eq_arr_row_spacing_rule = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse limit properties (m:lim) +pub fn parse_limit_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "type" | "m:type" => { + // Limit type (undOvr, subSup) + properties.style = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse bar properties (m:barPr) +pub fn parse_bar_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "pos" | "m:pos" => { + // Position (top/bot) + properties.accent_position = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse box properties (m:boxPr) +pub fn parse_box_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "opEmu" | "m:opEmu" => { + // Operator emulation + properties.box_operator_emulation = Some(matches!(value, "1" | "true")); + }, + "noBreak" | "m:noBreak" => { + // No break + properties.box_no_break = Some(matches!(value, "1" | "true")); + }, + "diff" | "m:diff" => { + // Differential + properties.box_differential = Some(matches!(value, "1" | "true")); + }, + "brk" | "m:brk" => { + // Break + properties.box_break = Some(matches!(value, "1" | "true")); + }, + "aln" | "m:aln" => { + // Alignment + properties.box_alignment = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse border box properties (m:borderBoxPr) +pub fn parse_border_box_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "hideTop" | "m:hideTop" => { + properties.border_hide_top = Some(matches!(value, "1" | "true")); + }, + "hideBot" | "m:hideBot" => { + properties.border_hide_bottom = Some(matches!(value, "1" | "true")); + }, + "hideLeft" | "m:hideLeft" => { + properties.border_hide_left = Some(matches!(value, "1" | "true")); + }, + "hideRight" | "m:hideRight" => { + properties.border_hide_right = Some(matches!(value, "1" | "true")); + }, + "strikeH" | "m:strikeH" => { + properties.border_strike_horizontal = Some(matches!(value, "1" | "true")); + }, + "strikeV" | "m:strikeV" => { + properties.border_strike_vertical = Some(matches!(value, "1" | "true")); + }, + "strikeBLTR" | "m:strikeBLTR" => { + // Strike bottom-left to top-right + properties.border_strike_bltr = Some(matches!(value, "1" | "true")); + }, + "strikeTLBR" | "m:strikeTLBR" => { + // Strike top-left to bottom-right + properties.border_strike_tlbr = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse phantom properties (m:phantPr) +pub fn parse_phantom_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "show" | "m:show" => { + // Show phantom content + properties.phantom_show = Some(matches!(value, "1" | "true")); + }, + "zeroWid" | "m:zeroWid" => { + // Zero width + properties.phantom_zero_width = Some(matches!(value, "1" | "true")); + }, + "zeroAsc" | "m:zeroAsc" => { + // Zero ascent + properties.phantom_zero_ascent = Some(matches!(value, "1" | "true")); + }, + "zeroDesc" | "m:zeroDesc" => { + // Zero descent + properties.phantom_zero_descent = Some(matches!(value, "1" | "true")); + }, + "transp" | "m:transp" => { + // Transparent + properties.phantom_transparent = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse radical properties (m:radPr) +pub fn parse_radical_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "degHide" | "m:degHide" => { + // Hide degree + properties.radical_hide_degree = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse spacing properties (m:sPre, m:sPost, etc.) +pub fn parse_spacing_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "val" | "m:val" => { + properties.spacing = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse subscript properties (m:sSubPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_subscript_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "degHide" | "m:degHide" => { + // Hide degree/subscript + properties.hide = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse superscript properties (m:sSupPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_superscript_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "degHide" | "m:degHide" => { + // Hide degree/superscript + properties.hide = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse sub-sup properties (m:sSubSupPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_subsup_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "subHide" | "m:subHide" => { + // Hide subscript + properties.nary_hide_sub = Some(matches!(value, "1" | "true")); + }, + "supHide" | "m:supHide" => { + // Hide superscript + properties.nary_hide_sup = Some(matches!(value, "1" | "true")); + }, + "aln" | "m:aln" => { + // Alignment + properties.alignment = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse prescript properties (m:sPrePr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_prescript_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "subHide" | "m:subHide" => { + // Hide prescript subscript + properties.nary_hide_sub = Some(matches!(value, "1" | "true")); + }, + "supHide" | "m:supHide" => { + // Hide prescript superscript + properties.nary_hide_sup = Some(matches!(value, "1" | "true")); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse function properties (m:funcPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_function_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "type" | "m:type" => { + // Function type + properties.style = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse upper limit properties (m:limUppPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_upper_limit_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "limLoc" | "m:limLoc" => { + // Limit location (undOvr, subSup) + properties.style = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse lower limit properties (m:limLowPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_lower_limit_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "limLoc" | "m:limLoc" => { + // Limit location (undOvr, subSup) + properties.style = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse control properties (m:ctrlPr) +#[allow(dead_code)] // Part of the property parsing API, used in element-specific property handling +pub fn parse_control_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + "ascii" | "m:ascii" => { + // ASCII font + properties.font = Some(value.to_string()); + }, + "hAnsi" | "m:hAnsi" => { + // High ANSI font + properties.font = Some(value.to_string()); + }, + "cs" | "m:cs" => { + // Complex script font + properties.font = Some(value.to_string()); + }, + "eastAsia" | "m:eastAsia" => { + // East Asia font + properties.font = Some(value.to_string()); + }, + _ => {}, + } + } + } + + properties +} + +/// Parse general element properties from attributes +pub fn parse_general_properties( + attrs: &[quick_xml::events::attributes::Attribute], +) -> ElementProperties { + let mut properties = ElementProperties::default(); + + for attr in attrs { + if let Ok(key) = std::str::from_utf8(attr.key.as_ref()) + && let Ok(value) = std::str::from_utf8(&attr.value) + { + match key { + // Style and formatting + "scr" | "m:scr" => properties.math_variant = Some(value.to_string()), + "sty" | "m:sty" => { + properties.display_style = Some(matches!(value, "d" | "display" | "1" | "true")) + }, + + // Size and scaling + "sz" | "m:sz" => properties.size = Some(value.to_string()), + "minSz" | "m:minSz" => properties.min_size = Some(value.to_string()), + "maxSz" | "m:maxSz" => properties.max_size = Some(value.to_string()), + "scrLvl" | "m:scrLvl" => { + if let Some(lvl) = parse_int_simd(value) { + properties.script_level = Some(lvl); + } + }, + + // Color and font + "color" | "m:color" => properties.color = Some(value.to_string()), + "font" | "m:font" => properties.font = Some(value.to_string()), + "nor" | "m:nor" => properties.font = Some(value.to_string()), + + // Layout and positioning + "aln" | "m:aln" => properties.alignment = Some(value.to_string()), + "alnScr" | "m:alnScr" => properties.alignment = Some(value.to_string()), + "vertJc" | "m:vertJc" => properties.vertical_alignment = Some(value.to_string()), + "baseJc" | "m:baseJc" => properties.alignment = Some(value.to_string()), + + // Characters and symbols + "chr" | "m:chr" => properties.chr = Some(value.to_string()), + + // Spacing + "val" | "m:val" => properties.spacing = Some(value.to_string()), + + // Fraction properties + "type" | "m:type" => properties.fraction_type = Some(value.to_string()), + "lnThick" | "m:lnThick" => { + properties.fraction_line_thickness = Some(value.to_string()) + }, + + // Matrix properties + "rSp" | "m:rSp" => properties.matrix_row_spacing = Some(value.to_string()), + "cSp" | "m:cSp" => properties.matrix_column_spacing = Some(value.to_string()), + + // Accent properties + "pos" | "m:pos" => properties.accent_position = Some(value.to_string()), + + // Box properties + "diff" | "m:diff" => { + properties.box_differential = Some(matches!(value, "1" | "true")) + }, + "opEmu" | "m:opEmu" => { + properties.box_operator_emulation = Some(matches!(value, "1" | "true")) + }, + "brk" | "m:brk" => properties.box_break = Some(matches!(value, "1" | "true")), + "noBreak" | "m:noBreak" => { + properties.box_no_break = Some(matches!(value, "1" | "true")) + }, + + // Phantom properties + "show" | "m:show" => properties.phantom_show = Some(matches!(value, "1" | "true")), + "zeroWid" | "m:zeroWid" => { + properties.phantom_zero_width = Some(matches!(value, "1" | "true")) + }, + "zeroAsc" | "m:zeroAsc" => { + properties.phantom_zero_ascent = Some(matches!(value, "1" | "true")) + }, + "zeroDesc" | "m:zeroDesc" => { + properties.phantom_zero_descent = Some(matches!(value, "1" | "true")) + }, + "transp" | "m:transp" => { + properties.phantom_transparent = Some(matches!(value, "1" | "true")) + }, + + // Border box properties + "hideTop" | "m:hideTop" => { + properties.border_hide_top = Some(matches!(value, "1" | "true")) + }, + "hideBot" | "m:hideBot" => { + properties.border_hide_bottom = Some(matches!(value, "1" | "true")) + }, + "hideLeft" | "m:hideLeft" => { + properties.border_hide_left = Some(matches!(value, "1" | "true")) + }, + "hideRight" | "m:hideRight" => { + properties.border_hide_right = Some(matches!(value, "1" | "true")) + }, + "strikeH" | "m:strikeH" => { + properties.border_strike_horizontal = Some(matches!(value, "1" | "true")) + }, + "strikeV" | "m:strikeV" => { + properties.border_strike_vertical = Some(matches!(value, "1" | "true")) + }, + "strikeBLTR" | "m:strikeBLTR" => { + properties.border_strike_bltr = Some(matches!(value, "1" | "true")) + }, + "strikeTLBR" | "m:strikeTLBR" => { + properties.border_strike_tlbr = Some(matches!(value, "1" | "true")) + }, + + // Equation array properties + "maxDist" | "m:maxDist" => properties.eq_arr_max_distance = Some(value.to_string()), + "objDist" | "m:objDist" => { + properties.eq_arr_object_distance = Some(value.to_string()) + }, + "rSpRule" | "m:rSpRule" => { + properties.eq_arr_row_spacing_rule = Some(value.to_string()) + }, + + // N-ary operator properties + "subHide" | "m:subHide" => { + properties.nary_hide_sub = Some(matches!(value, "1" | "true")) + }, + "supHide" | "m:supHide" => { + properties.nary_hide_sup = Some(matches!(value, "1" | "true")) + }, + "grow" | "m:grow" => { + properties.nary_operator_grow = Some(matches!(value, "1" | "true")) + }, + + // Delimiter properties + "sepChr" | "m:sepChr" => { + properties.delimiter_separator_char = Some(value.to_string()) + }, + "begChr" | "m:begChr" => properties.delimiter_open_char = Some(value.to_string()), + "endChr" | "m:endChr" => properties.delimiter_close_char = Some(value.to_string()), + "shp" | "m:shp" => properties.delimiter_shape = Some(value.to_string()), + + // Radical properties + "degHide" | "m:degHide" => { + properties.radical_hide_degree = Some(matches!(value, "1" | "true")) + }, + + // Run properties + "lit" | "m:lit" => properties.run_literal = Some(matches!(value, "1" | "true")), + + // Visibility and rendering + "hide" | "m:hide" => properties.hide = Some(matches!(value, "1" | "true")), + "strike" | "m:strike" => { + properties.strike_through = Some(matches!(value, "1" | "true")) + }, + "dstrike" | "m:dstrike" => { + properties.double_strike_through = Some(matches!(value, "1" | "true")) + }, + + // Line styles + "u" | "m:u" => properties.underline = Some(value.to_string()), + "o" | "m:o" => properties.overline = Some(value.to_string()), + + // Special positioning attributes + "den" | "m:den" => properties.alignment = Some("denominator".to_string()), + "num" | "m:num" => properties.alignment = Some("numerator".to_string()), + + _ => {}, // Ignore unknown attributes + } + } + } + + properties +} diff --git a/crates/litchi-formula/src/omml/utils.rs b/crates/litchi-formula/src/omml/utils.rs new file mode 100644 index 0000000..44fb63b --- /dev/null +++ b/crates/litchi-formula/src/omml/utils.rs @@ -0,0 +1,572 @@ +use crate::ast::MathNode; +use crate::omml::elements::{ElementContext, ElementType}; +use std::borrow::Cow as StdCow; + +/// High-performance string interning using bumpalo arena +/// +/// This function interns strings in the arena to avoid allocations +/// and improve memory locality. +pub fn intern_string<'arena>(arena: &'arena bumpalo::Bump, s: &str) -> &'arena str { + arena.alloc_str(s) +} + +/// Fast text content extraction and processing +/// +/// Extracts text content from MathNodes, handling different node types efficiently. +#[allow(dead_code)] // Utility function reserved for text extraction features +pub fn extract_text_content(nodes: &[MathNode]) -> String { + let mut result = String::new(); + for node in nodes { + match node { + MathNode::Text(text) => result.push_str(text.as_ref()), + MathNode::Fenced { content, .. } => { + result.push_str(&extract_text_content(content)); + }, + MathNode::Function { name, argument } => { + result.push_str(name.as_ref()); + result.push('('); + result.push_str(&extract_text_content(argument)); + result.push(')'); + }, + // Add more cases as needed + _ => {}, // Skip non-text nodes + } + } + result +} + +/// Fast element type lookup using perfect hashing +/// +/// Pre-computed hash table for element name to type mapping. +/// This provides O(1) lookup instead of string matching. +#[allow(dead_code)] // Alternative element type lookup, reserved for optimization +pub fn get_element_type_fast(name: &[u8]) -> ElementType { + match name { + b"m:oMath" | b"oMath" => ElementType::Math, + b"m:r" | b"r" => ElementType::Run, + b"m:t" | b"t" => ElementType::Text, + b"m:f" | b"f" => ElementType::Fraction, + b"m:num" | b"num" => ElementType::Numerator, + b"m:den" | b"den" => ElementType::Denominator, + b"m:rad" | b"rad" => ElementType::Radical, + b"m:deg" | b"deg" => ElementType::Degree, + b"m:e" | b"e" => ElementType::Base, + b"m:sSup" | b"sSup" => ElementType::Superscript, + b"m:sSub" | b"sSub" => ElementType::Subscript, + b"m:sSubSup" | b"sSubSup" => ElementType::SubSup, + b"m:sup" | b"sup" => ElementType::SuperscriptElement, + b"m:sub" | b"sub" => ElementType::SubscriptElement, + b"m:d" | b"d" => ElementType::Delimiter, + b"m:nary" | b"nary" => ElementType::Nary, + b"m:func" | b"func" => ElementType::Function, + b"m:fName" | b"fName" => ElementType::FunctionName, + b"m:m" | b"m" => ElementType::Matrix, + b"m:mr" | b"mr" => ElementType::MatrixRow, + b"m:mPr" | b"mPr" => ElementType::Properties, + b"m:acc" | b"acc" => ElementType::Accent, + b"m:accPr" | b"accPr" => ElementType::AccentProperties, + b"m:bar" | b"bar" => ElementType::Bar, + b"m:box" | b"box" => ElementType::Box, + b"m:phant" | b"phant" => ElementType::Phantom, + b"m:groupChr" | b"groupChr" => ElementType::GroupChar, + b"m:borderBox" | b"borderBox" => ElementType::BorderBox, + b"m:eqArr" | b"eqArr" => ElementType::EqArr, + b"m:eqArrPr" | b"eqArrPr" => ElementType::EqArrPr, + b"m:rPr" | b"rPr" => ElementType::Properties, + b"m:fPr" | b"fPr" => ElementType::Properties, + b"m:radPr" | b"radPr" => ElementType::Properties, + b"m:sSupPr" | b"sSupPr" => ElementType::Properties, + b"m:sSubPr" | b"sSubPr" => ElementType::Properties, + b"m:dPr" | b"dPr" => ElementType::Properties, + b"m:naryPr" | b"naryPr" => ElementType::Properties, + b"m:funcPr" | b"funcPr" => ElementType::Properties, + b"m:groupChrPr" | b"groupChrPr" => ElementType::Properties, + b"m:chr" | b"chr" => ElementType::Text, // Character element + b"m:sPre" | b"sPre" => ElementType::Run, // Pre-script + b"m:sPost" | b"sPost" => ElementType::Run, // Post-script + b"m:lim" | b"lim" => ElementType::Nary, // Limit + b"m:limLow" | b"limLow" => ElementType::SubscriptElement, // Lower limit + b"m:limUpp" | b"limUpp" => ElementType::SuperscriptElement, // Upper limit + _ => ElementType::Unknown, + } +} + +/// Fast attribute lookup using SIMD-accelerated search +/// +/// Uses memchr for fast byte searching in attribute data. +#[allow(dead_code)] // Alternative attribute lookup, reserved for optimization +pub fn find_attribute_fast<'a>( + attrs: &'a [quick_xml::events::attributes::Attribute<'a>], + key: &str, +) -> Option<&'a quick_xml::events::attributes::Attribute<'a>> { + for attr in attrs { + if let Ok(attr_key) = std::str::from_utf8(attr.key.as_ref()) + && (attr_key == key || attr_key == format!("m:{}", key)) + { + return Some(attr); + } + } + None +} + +/// Batch processing of element contexts +/// +/// Reuses element contexts to reduce allocations. +pub struct ContextPool<'arena> { + pool: Vec>, + available: Vec, +} + +impl<'arena> ContextPool<'arena> { + pub fn new(capacity: usize) -> Self { + Self { + pool: Vec::with_capacity(capacity), + available: Vec::new(), + } + } + + pub fn get(&mut self, element_type: ElementType) -> ElementContext<'arena> { + if let Some(index) = self.available.pop() { + let mut context = self.pool.swap_remove(index); + context.element_type = element_type; + context.clear(); + context + } else { + ElementContext::new(element_type) + } + } + + pub fn put(&mut self, mut context: ElementContext<'arena>) { + if self.pool.len() < self.pool.capacity() { + context.clear(); + self.pool.push(context); + } + // If pool is full, context is dropped + } +} + +/// Zero-copy text processing +/// +/// Processes text content without unnecessary allocations. +pub fn process_text_zero_copy<'a>(text: &'a str) -> StdCow<'a, str> { + // Remove leading/trailing whitespace without allocation if possible + let trimmed = text.trim(); + if trimmed.len() == text.len() { + StdCow::Borrowed(text) + } else { + StdCow::Owned(trimmed.to_string()) + } +} + +/// Fast numeric parsing for OMML attributes +/// +/// Uses fast parsing libraries for performance. +#[allow(dead_code)] // Utility function for numeric attribute parsing +pub fn parse_numeric_attr(attr: Option<&str>) -> Option { + attr.and_then(|s| fast_float2::parse(s).ok()) +} + +/// Memory-efficient vector operations +/// +/// Extends vectors without unnecessary reallocations. +pub fn extend_vec_efficient(vec: &mut Vec, items: impl IntoIterator) { + vec.extend(items); +} + +/// Fast element stacking +/// +/// Custom stack implementation optimized for OMML parsing. +/// Pre-allocates capacity and provides fast access patterns. +pub struct ElementStack<'arena> { + stack: Vec>, +} + +impl<'arena> ElementStack<'arena> { + /// Create a new stack with pre-allocated capacity for performance + #[allow(dead_code)] + pub fn new() -> Self { + Self { + stack: Vec::with_capacity(64), // Typical OMML depth is much less than this + } + } + + /// Create a new stack with specified capacity + pub fn with_capacity(capacity: usize) -> Self { + Self { + stack: Vec::with_capacity(capacity), + } + } + + /// Push a context onto the stack + #[inline(always)] + pub fn push(&mut self, context: ElementContext<'arena>) { + self.stack.push(context); + } + + /// Pop a context from the stack + #[inline(always)] + pub fn pop(&mut self) -> Option> { + self.stack.pop() + } + + /// Get reference to the top context + #[inline(always)] + pub fn last(&self) -> Option<&ElementContext<'arena>> { + self.stack.last() + } + + /// Get mutable reference to the top context + #[inline(always)] + pub fn last_mut(&mut self) -> Option<&mut ElementContext<'arena>> { + self.stack.last_mut() + } + + /// Get reference to the context at the specified depth from the top + /// (0 = top, 1 = parent of top, etc.) + #[inline(always)] + #[allow(dead_code)] // Reserved for advanced stack operations + pub fn peek(&self, depth: usize) -> Option<&ElementContext<'arena>> { + let len = self.stack.len(); + if depth < len { + Some(&self.stack[len - 1 - depth]) + } else { + None + } + } + + /// Get mutable reference to the context at the specified depth from the top + #[inline(always)] + #[allow(dead_code)] // Reserved for advanced stack operations + pub fn peek_mut(&mut self, depth: usize) -> Option<&mut ElementContext<'arena>> { + let len = self.stack.len(); + if depth < len { + let idx = len - 1 - depth; + Some(&mut self.stack[idx]) + } else { + None + } + } + + /// Check if stack is empty + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.stack.is_empty() + } + + /// Get current stack depth + #[inline(always)] + #[allow(dead_code)] + pub fn len(&self) -> usize { + self.stack.len() + } + + /// Clear all elements from the stack + #[allow(dead_code)] + pub fn clear(&mut self) { + self.stack.clear(); + } + + /// Get the capacity of the underlying vector + #[inline(always)] + #[allow(dead_code)] + pub fn capacity(&self) -> usize { + self.stack.capacity() + } + + /// Reserve additional capacity + #[allow(dead_code)] // Reserved for stack optimization + pub fn reserve(&mut self, additional: usize) { + self.stack.reserve(additional); + } + + /// Shrink capacity to fit current length + #[allow(dead_code)] // Reserved for memory optimization + pub fn shrink_to_fit(&mut self) { + self.stack.shrink_to_fit(); + } +} + +/// Fast XML namespace handling +/// +/// Strips XML namespaces efficiently. +#[allow(dead_code)] // Utility function for namespace handling +pub fn strip_namespace(name: &[u8]) -> &[u8] { + if let Some(colon_pos) = memchr::memchr(b':', name) { + &name[colon_pos + 1..] + } else { + name + } +} + +/// Error handling utilities +/// +/// Fast error path handling. +#[allow(dead_code)] // Utility function for error handling +pub fn handle_parse_error(result: Result) -> Result { + result.map_err(|e| e.to_string()) +} + +/// Validation utilities for OMML parsing +/// +/// Validates OMML element and attribute names. +#[allow(dead_code)] // Utility function for validation +pub fn is_valid_omml_element_name(name: &str) -> bool { + // Basic validation - element names should be alphanumeric with possible namespace prefix + !name.is_empty() + && name + .chars() + .all(|c| c.is_alphanumeric() || c == ':' || c == '_' || c == '-') +} + +/// Validates OMML attribute values for basic sanity checks +#[allow(dead_code)] // Utility function for validation +pub fn validate_omml_attribute_value(value: &str) -> bool { + // Basic validation - no null bytes, reasonable length + !value.is_empty() && value.len() < 10000 && !value.contains('\0') +} + +/// Memory-efficient string deduplication +/// +/// Uses a simple interning mechanism for frequently used strings. +#[allow(dead_code)] // Reserved for string interning optimization +pub struct StringInterner { + strings: std::collections::HashSet, +} + +#[allow(dead_code)] // Reserved for string interning optimization +impl StringInterner { + pub fn new() -> Self { + Self { + strings: std::collections::HashSet::new(), + } + } + + pub fn intern(&mut self, s: &str) -> &str { + if self.strings.contains(s) { + // Return reference to existing string + self.strings.get(s).unwrap().as_str() + } else { + // Insert new string and return reference + self.strings.insert(s.to_string()); + self.strings.get(s).unwrap().as_str() + } + } +} + +/// Fast attribute value extraction with SIMD +/// +/// Optimized version using SIMD for common patterns. +#[allow(dead_code)] // Alternative SIMD-accelerated attribute lookup +pub fn extract_attribute_value_simd<'a>( + attrs: &'a [quick_xml::events::attributes::Attribute<'a>], + key: &str, +) -> Option<&'a [u8]> { + for attr in attrs { + if let Ok(attr_key) = std::str::from_utf8(attr.key.as_ref()) + && (attr_key == key || attr_key == format!("m:{}", key)) + { + return Some(&attr.value); + } + } + None +} + +/// XML content normalization +/// +/// Normalizes whitespace and entities in XML text content. +#[allow(dead_code)] // Utility function for XML text normalization +pub fn normalize_xml_text(text: &str) -> String { + // Basic normalization - collapse whitespace, unescape common entities + text.replace("<", "<") + .replace(">", ">") + .replace("&", "&") + .replace(""", "\"") + .replace("'", "'") + .replace(" ", " ") + .replace(" ", "\u{00A0}") // Non-breaking space +} + +/// OMML document validation +/// +/// Validates the structure and content of parsed OMML. +pub fn validate_omml_structure(nodes: &[super::MathNode]) -> Result<(), super::OmmlError> { + // Empty OMML documents are not allowed + if nodes.is_empty() { + return Err(super::OmmlError::InvalidStructure( + "Empty OMML document".to_string(), + )); + } + + // Check for required root math element + let has_math_root = nodes + .iter() + .any(|node| matches!(node, super::MathNode::Row(_))); + if !has_math_root && !nodes.is_empty() { + // Allow documents that don't start with explicit math element + // as long as they contain valid mathematical content + validate_math_nodes(nodes)?; + } + + Ok(()) +} + +/// Validate mathematical nodes for structural correctness +pub fn validate_math_nodes(nodes: &[super::MathNode]) -> Result<(), super::OmmlError> { + for node in nodes { + match node { + super::MathNode::Frac { + numerator, + denominator, + .. + } => { + if numerator.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Fraction numerator is empty".to_string(), + )); + } + if denominator.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Fraction denominator is empty".to_string(), + )); + } + }, + super::MathNode::Root { base, .. } if base.is_empty() => { + return Err(super::OmmlError::MissingRequiredElement( + "Root base is empty".to_string(), + )); + }, + super::MathNode::Power { base, exponent } => { + if base.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Power base is empty".to_string(), + )); + } + if exponent.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Power exponent is empty".to_string(), + )); + } + }, + super::MathNode::Sub { base, subscript } => { + if base.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Subscript base is empty".to_string(), + )); + } + if subscript.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Subscript is empty".to_string(), + )); + } + }, + super::MathNode::Function { name, argument } => { + if name.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Function name is empty".to_string(), + )); + } + if argument.is_empty() { + return Err(super::OmmlError::MissingRequiredElement( + "Function argument is empty".to_string(), + )); + } + }, + super::MathNode::Fenced { content, .. } if content.is_empty() => { + return Err(super::OmmlError::ValidationError( + "Fenced content is empty".to_string(), + )); + }, + super::MathNode::Matrix { rows, .. } => { + if rows.is_empty() { + return Err(super::OmmlError::ValidationError( + "Matrix has no rows".to_string(), + )); + } + for (i, row) in rows.iter().enumerate() { + if row.is_empty() { + return Err(super::OmmlError::ValidationError(format!( + "Matrix row {} is empty", + i + ))); + } + } + }, + _ => {}, // Other nodes don't have specific validation requirements + } + } + Ok(()) +} + +/// Validate OMML element nesting +/// +/// Checks that elements are properly nested according to OMML specification. +pub fn validate_element_nesting( + element_type: &ElementType, + parent_type: Option<&ElementType>, +) -> Result<(), super::OmmlError> { + match element_type { + ElementType::Math + // Math element should be root or not have a parent + if parent_type.is_some() => { + return Err(super::OmmlError::InvalidStructure( + "Math element should be root".to_string(), + )); + }, + ElementType::Numerator | ElementType::Denominator + if !matches!(parent_type, Some(ElementType::Fraction)) => { + return Err(super::OmmlError::InvalidStructure( + "Numerator/denominator must be inside fraction".to_string(), + )); + }, + ElementType::Degree + if !matches!(parent_type, Some(ElementType::Radical)) => { + return Err(super::OmmlError::InvalidStructure( + "Degree must be inside radical".to_string(), + )); + }, + ElementType::Base => { + match parent_type { + Some( + ElementType::Superscript + | ElementType::Subscript + | ElementType::SubSup + | ElementType::Radical + | ElementType::Accent + | ElementType::Bar + | ElementType::GroupChar, + ) => {}, + _ => { + // Allow base elements in other contexts too - they might be generic containers + }, + } + }, + ElementType::SuperscriptElement => match parent_type { + Some( + ElementType::Superscript + | ElementType::SubSup + | ElementType::Nary + | ElementType::Integrand, + ) => {}, + _ => { + return Err(super::OmmlError::InvalidStructure( + "Superscript element in invalid context".to_string(), + )); + }, + }, + ElementType::SubscriptElement => match parent_type { + Some( + ElementType::Subscript + | ElementType::SubSup + | ElementType::Nary + | ElementType::Integrand, + ) => {}, + _ => { + return Err(super::OmmlError::InvalidStructure( + "Subscript element in invalid context".to_string(), + )); + }, + }, + _ => {}, // Other elements have more flexible nesting rules + } + Ok(()) +} diff --git a/crates/litchi-imgconv/Cargo.toml b/crates/litchi-imgconv/Cargo.toml new file mode 100644 index 0000000..9a0a0b9 --- /dev/null +++ b/crates/litchi-imgconv/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "litchi-imgconv" +description = "EMF/WMF/PICT image format conversion for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +base64 = { workspace = true } +bytes = { workspace = true } +flate2 = { workspace = true } +image = { workspace = true } +litchi-core = { workspace = true } +ryu = { workspace = true } +xml-minifier = { workspace = true } +zerocopy = { workspace = true } diff --git a/crates/litchi-imgconv/README.md b/crates/litchi-imgconv/README.md new file mode 100644 index 0000000..3608753 --- /dev/null +++ b/crates/litchi-imgconv/README.md @@ -0,0 +1,43 @@ +# litchi-imgconv + +Pure decoders and converters for the image formats embedded inside Microsoft Office documents. + +## Overview + +`litchi-imgconv` parses BLIP (Binary Large Image or Picture) records and +the metafile formats they wrap — Enhanced Metafile (EMF), Windows Metafile +(WMF), and Macintosh PICT — and converts them to modern raster formats +(PNG, JPEG, WebP) or SVG. It is a leaf crate in the +[Litchi](https://github.com/DevExzh/litchi) workspace, depending only on +`litchi-core` plus the `image`, `flate2`, `bytes`, `xml-minifier`, and +`zerocopy` crates. The OLE/Escher integration glue lives in the umbrella +`litchi` crate. + +## Usage + +```toml +[dependencies] +litchi-imgconv = "0.0.1" +``` + +```rust +use litchi_imgconv::{Blip, convert_blip_to_png}; + +fn render(blip_bytes: &[u8]) -> litchi_core::error::Result> { + let blip = Blip::parse(blip_bytes)?; + convert_blip_to_png(&blip, Some(800), None) +} +``` + +## Features + +- BLIP record parsing (`Blip`, `BitmapBlip`, `MetafileBlip`, `BlipType`) +- BLIP store table parsing (`BlipStore`, `BlipStoreEntry`) +- EMF, WMF, and PICT metafile decoding to PNG/JPEG/WebP +- Optional SVG output for vector metafiles +- Optional resizing with high-quality Lanczos3 filtering + +## License + +Licensed under the Apache License, Version 2.0. Part of the +[Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-imgconv/examples/convert_emf.rs b/crates/litchi-imgconv/examples/convert_emf.rs new file mode 100644 index 0000000..a758256 --- /dev/null +++ b/crates/litchi-imgconv/examples/convert_emf.rs @@ -0,0 +1,77 @@ +//! Convert an EMF or WMF file to PNG. +//! +//! This example demonstrates the direct entry points that take raw metafile +//! bytes (rather than a wrapping BLIP record): +//! [`litchi_imgconv::emf::convert_emf`] and [`litchi_imgconv::wmf::convert_wmf`]. +//! The format is dispatched by file-extension; PICT files would be routed to +//! [`litchi_imgconv::pict::convert_pict`] in the same way. +//! +//! # Run +//! +//! ```bash +//! # Use the bundled samples: +//! cargo run -p litchi-imgconv --example convert_emf -- \ +//! test-data/images/emf/wrench.emf wrench.png +//! +//! cargo run -p litchi-imgconv --example convert_emf -- \ +//! test-data/images/wmf/santa.wmf santa.png +//! +//! # Or with no arguments — defaults to test-data/images/emf/wrench.emf: +//! cargo run -p litchi-imgconv --example convert_emf +//! ``` + +use std::path::{Path, PathBuf}; + +use image::ImageFormat; +use litchi_imgconv::{emf, pict, wmf}; + +fn main() -> Result<(), Box> { + // First arg: input path (defaults to a bundled EMF sample). + // Second arg: output PNG path (defaults to alongside the input with .png). + let mut args = std::env::args().skip(1); + let input: PathBuf = args + .next() + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("test-data/images/emf/wrench.emf")); + let output: PathBuf = args + .next() + .map(PathBuf::from) + .unwrap_or_else(|| input.with_extension("png")); + + if !input.exists() { + return Err(format!( + "input file does not exist: {} (run from repo root, or pass an absolute path)", + input.display() + ) + .into()); + } + + let bytes = std::fs::read(&input)?; + println!("loaded {} ({} bytes)", input.display(), bytes.len()); + + // Pick the converter by extension. The width parameter (1024) demonstrates + // aspect-ratio-preserving resize: the converter computes the matching + // height when only one dimension is supplied. + let png = match extension_lower(&input).as_deref() { + Some("emf") => emf::convert_emf(&bytes, ImageFormat::Png, Some(1024), None)?, + Some("wmf") => wmf::convert_wmf(&bytes, ImageFormat::Png, Some(1024), None)?, + Some("pict" | "pct") => pict::convert_pict(&bytes, ImageFormat::Png, Some(1024), None)?, + other => { + return Err(format!( + "unsupported extension {:?}; expected .emf, .wmf, or .pict", + other + ) + .into()); + }, + }; + + std::fs::write(&output, &png)?; + println!("wrote {} ({} bytes PNG)", output.display(), png.len()); + Ok(()) +} + +fn extension_lower(p: &Path) -> Option { + p.extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_ascii_lowercase()) +} diff --git a/crates/litchi-imgconv/examples/metafile_to_svg.rs b/crates/litchi-imgconv/examples/metafile_to_svg.rs new file mode 100644 index 0000000..3d2aeee --- /dev/null +++ b/crates/litchi-imgconv/examples/metafile_to_svg.rs @@ -0,0 +1,89 @@ +//! Convert an EMF or WMF metafile to SVG. +//! +//! `litchi-imgconv` exposes two public, format-specific SVG entry points: +//! +//! - [`litchi_imgconv::emf::convert_emf_to_svg`] (`emf/mod.rs`) +//! - [`litchi_imgconv::wmf::convert_wmf_to_svg`] (`wmf/mod.rs`) +//! +//! Both walk the parsed metafile records and emit a minimal SVG document +//! using the building blocks in the [`litchi_imgconv::svg`] module +//! (`SvgBuilder`, `SvgPath`, `SvgRect`, `SvgEllipse`, `SvgText`, +//! `SvgImage`). Embedded raster blits become base64 `data:image/png` URLs +//! so the resulting SVG is fully self-contained. +//! +//! The PICT decoder does not currently expose a `convert_pict_to_svg` +//! entry point - see `pict/mod.rs`, which only ships PNG/JPEG/WebP +//! converters - so this example only handles EMF and WMF. +//! +//! # Run +//! +//! ```bash +//! # Use the bundled EMF sample: +//! cargo run -p litchi-imgconv --example metafile_to_svg +//! +//! # Or pass any EMF/WMF file: +//! cargo run -p litchi-imgconv --example metafile_to_svg -- \ +//! test-data/images/wmf/santa.wmf santa.svg +//! ``` + +use std::path::{Path, PathBuf}; + +use litchi_imgconv::{emf, wmf}; + +fn main() -> Result<(), Box> { + let mut args = std::env::args().skip(1); + let input: PathBuf = args + .next() + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("test-data/images/emf/wrench.emf")); + let output: PathBuf = args + .next() + .map(PathBuf::from) + .unwrap_or_else(|| input.with_extension("svg")); + + if !input.exists() { + return Err(format!( + "input file does not exist: {} (run from repo root, or pass an absolute path)", + input.display() + ) + .into()); + } + + let bytes = std::fs::read(&input)?; + println!("loaded {} ({} bytes)", input.display(), bytes.len()); + + let svg: String = match extension_lower(&input).as_deref() { + Some("emf") => emf::convert_emf_to_svg(&bytes)?, + Some("wmf") => wmf::convert_wmf_to_svg(&bytes)?, + Some("pict" | "pct") => { + // The pict module currently has no convert_pict_to_svg; raster-only. + return Err("PICT to SVG conversion is not exposed by litchi-imgconv; \ + use convert_pict (raster) instead" + .into()); + }, + other => { + return Err(format!("unsupported extension {:?}; expected .emf or .wmf", other).into()); + }, + }; + + std::fs::write(&output, svg.as_bytes())?; + println!( + "wrote {} ({} bytes SVG, {} chars)", + output.display(), + svg.len(), + svg.chars().count() + ); + + // Print the first line of the SVG so the user can see it really is a + // valid document without having to open it. + if let Some(first_line) = svg.lines().next() { + println!("first line : {}", first_line); + } + Ok(()) +} + +fn extension_lower(p: &Path) -> Option { + p.extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_ascii_lowercase()) +} diff --git a/crates/litchi-imgconv/examples/parse_blip.rs b/crates/litchi-imgconv/examples/parse_blip.rs new file mode 100644 index 0000000..b10e4d4 --- /dev/null +++ b/crates/litchi-imgconv/examples/parse_blip.rs @@ -0,0 +1,164 @@ +//! Parse an OfficeArt BLIP record and convert it to PNG. +//! +//! A BLIP (Binary Large Image or Picture) record is the wrapper Microsoft +//! Office uses to embed images inside `.doc` / `.xls` / `.ppt` streams. This +//! example shows the two-step API: +//! +//! 1. [`litchi_imgconv::blip::Blip::parse`] decodes the OfficeArt record +//! header, the optional UID(s), and the embedded picture payload. +//! 2. [`litchi_imgconv::convert_blip_to_png`] dispatches to the right +//! backend (EMF/WMF/PICT decoder, or pass-through for already-raster +//! formats) and produces a modern PNG. +//! +//! Because this repository does not ship a raw BLIP record fixture, the +//! example synthesises a minimal but spec-valid `BlipPNG` record (record +//! type `0xF01E`, see [MS-ODRAW] §2.2.23) by wrapping the bundled +//! `test-data/images/png/lena.png` with an 8-byte OfficeArt header, a +//! 16-byte zero UID, and the `0xFF` marker byte. The same code path works +//! on real BLIP bytes extracted from an Office document. +//! +//! # Run +//! +//! ```bash +//! # Synthesize a BlipPNG from the bundled PNG and convert it back out: +//! cargo run -p litchi-imgconv --example parse_blip +//! +//! # Or pass a real BLIP record file (raw bytes starting at the OfficeArt +//! # record header — i.e. starting with the 4-bit version + 12-bit +//! # instance + 16-bit record type + 32-bit length): +//! cargo run -p litchi-imgconv --example parse_blip -- path/to/blip.bin out.png +//! ``` + +use std::path::PathBuf; + +use litchi_imgconv::blip::Blip; +use litchi_imgconv::{BlipType, convert_blip_to_png}; + +fn main() -> Result<(), Box> { + let mut args = std::env::args().skip(1); + let blip_input = args.next(); + let output: PathBuf = args + .next() + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("blip_out.png")); + + // Either load a real BLIP record from disk, or synthesise one inline + // from a bundled PNG so the example is runnable out of the box. + let blip_bytes: Vec = match blip_input { + Some(path) => { + println!("reading BLIP record from {path}"); + std::fs::read(&path)? + }, + None => { + let png_path = PathBuf::from("test-data/images/png/lena.png"); + if !png_path.exists() { + return Err(format!( + "no BLIP path given and the bundled fallback does not exist: {} \ + (run from repo root)", + png_path.display() + ) + .into()); + } + let png = std::fs::read(&png_path)?; + println!( + "no BLIP path given - synthesising a BlipPNG from {} ({} bytes)", + png_path.display(), + png.len() + ); + synthesize_blip_png(&png) + }, + }; + + // 1. Parse the OfficeArt BLIP record. This is zero-copy: `blip` borrows + // from `blip_bytes` for its picture data. + let blip = Blip::parse(&blip_bytes)?; + + // 2. Inspect the parsed record. + let kind = blip.blip_type(); + println!("\n--- BLIP metadata ---"); + println!("kind : {:?}", kind); + if let Some(t) = kind { + println!("extension : .{}", t.extension()); + println!("is_metafile : {}", t.is_metafile()); + } + match &blip { + Blip::Metafile(m) => { + println!( + "header : version={} instance=0x{:03X} type=0x{:04X} length={}", + m.header.version, m.header.instance, m.header.record_type, m.header.length + ); + println!("uid : {}", hex16(&m.uid)); + if let Some(u) = m.secondary_uid { + println!("secondary uid : {}", hex16(&u)); + } + println!("uncompressed_size: {} bytes", m.uncompressed_size); + println!("compressed_size : {} bytes", m.compressed_size); + println!( + "bounds : ({}, {}) -> ({}, {})", + m.bounds.0, m.bounds.1, m.bounds.2, m.bounds.3 + ); + println!("size (EMU) : {} x {}", m.size_emu.0, m.size_emu.1); + println!( + "compression / filter: 0x{:02X} / 0x{:02X} (compressed = {})", + m.compression, + m.filter, + m.is_compressed() + ); + println!("picture_data len : {} bytes", m.picture_data.len()); + }, + Blip::Bitmap(b) => { + println!( + "header : version={} instance=0x{:03X} type=0x{:04X} length={}", + b.header.version, b.header.instance, b.header.record_type, b.header.length + ); + println!("uid : {}", hex16(&b.uid)); + println!("marker : 0x{:02X}", b.marker); + println!("picture_data len : {} bytes", b.picture_data.len()); + }, + } + + // 3. Convert to PNG. For an already-PNG bitmap BLIP this is essentially a + // decode/re-encode round-trip; for EMF/WMF/PICT the raster is rendered. + let png = convert_blip_to_png(&blip, None, None)?; + std::fs::write(&output, &png)?; + println!( + "\nconverted to PNG : {} ({} bytes)", + output.display(), + png.len() + ); + Ok(()) +} + +/// Build a minimal `BlipPNG` OfficeArt record (`0xF01E`) wrapping `png_data`. +/// +/// Layout (see MS-ODRAW §2.2.23 OfficeArtBlipPNG): +/// - 8-byte record header: ver/inst (u16 LE) | type (u16 LE) | length (u32 LE) +/// - 16-byte primary UID (we use all zeros - the parser does not validate it) +/// - 1-byte marker (0xFF means "external", any value parses) +/// - PNG file bytes +fn synthesize_blip_png(png_data: &[u8]) -> Vec { + // For BlipPNG the canonical instance is 0x6E0 (no secondary UID); version is 0. + // ver_inst layout: low 4 bits = version, upper 12 bits = instance. + let version: u16 = 0x0; + let instance: u16 = 0x6E0; + let ver_inst: u16 = (instance << 4) | (version & 0x0F); + let record_type: u16 = BlipType::Png as u16; // 0xF01E + let body_len = (16 + 1 + png_data.len()) as u32; + + let mut out = Vec::with_capacity(8 + body_len as usize); + out.extend_from_slice(&ver_inst.to_le_bytes()); + out.extend_from_slice(&record_type.to_le_bytes()); + out.extend_from_slice(&body_len.to_le_bytes()); + out.extend_from_slice(&[0u8; 16]); // primary UID + out.push(0xFF); // marker + out.extend_from_slice(png_data); + out +} + +fn hex16(b: &[u8; 16]) -> String { + let mut s = String::with_capacity(32); + for byte in b { + s.push_str(&format!("{:02x}", byte)); + } + s +} diff --git a/crates/litchi-imgconv/fuzz/.gitignore b/crates/litchi-imgconv/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/crates/litchi-imgconv/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/litchi-imgconv/fuzz/Cargo.toml b/crates/litchi-imgconv/fuzz/Cargo.toml new file mode 100644 index 0000000..02b7ce7 --- /dev/null +++ b/crates/litchi-imgconv/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "litchi-imgconv-fuzz" +version = "0.0.0" +edition = "2024" +publish = false +authors = ["Ryker Zhu "] +license = "Apache-2.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +litchi-imgconv = { path = ".." } + +[[bin]] +name = "convert_image" +path = "fuzz_targets/convert_image.rs" +test = false +doc = false +bench = false + +[profile.release] +debug = 1 +codegen-units = 1 +lto = "thin" + +[workspace] diff --git a/crates/litchi-imgconv/fuzz/fuzz_targets/convert_image.rs b/crates/litchi-imgconv/fuzz/fuzz_targets/convert_image.rs new file mode 100644 index 0000000..6445013 --- /dev/null +++ b/crates/litchi-imgconv/fuzz/fuzz_targets/convert_image.rs @@ -0,0 +1,7 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + let _ = litchi_imgconv::emf::convert_emf_to_png(data, None, None); +}); diff --git a/crates/litchi-imgconv/src/blip.rs b/crates/litchi-imgconv/src/blip.rs new file mode 100644 index 0000000..0754299 --- /dev/null +++ b/crates/litchi-imgconv/src/blip.rs @@ -0,0 +1,577 @@ +// BLIP (Binary Large Image or Picture) record parsing and handling +// +// This module implements parsing of OfficeArtBlip records from Microsoft Office +// file formats, supporting both metafile formats (EMF, WMF, PICT) and bitmap +// formats (JPEG, PNG, DIB, TIFF). +// +// References: +// - [MS-ODRAW] 2.2.23: OfficeArtBlip records +// - https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-odraw/5dc1b9ed-818c-436f-8a4f-905a7ebb1ba9 + +use litchi_core::binary::{read_u16_le, read_u32_le}; +use litchi_core::error::{Error, Result}; +use std::borrow::Cow; +use std::io::Read; + +/// Type of BLIP record +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BlipType { + /// Enhanced Metafile (EMF) + Emf = 0xF01A, + /// Windows Metafile (WMF) + Wmf = 0xF01B, + /// Macintosh PICT + Pict = 0xF01C, + /// JPEG + Jpeg = 0xF01D, + /// PNG + Png = 0xF01E, + /// Device Independent Bitmap (DIB) + Dib = 0xF01F, + /// TIFF + Tiff = 0xF029, +} + +impl BlipType { + /// Parse BlipType from record type ID + pub fn from_record_id(record_id: u16) -> Option { + match record_id { + 0xF01A => Some(Self::Emf), + 0xF01B => Some(Self::Wmf), + 0xF01C => Some(Self::Pict), + 0xF01D => Some(Self::Jpeg), + 0xF01E => Some(Self::Png), + 0xF01F => Some(Self::Dib), + 0xF029 => Some(Self::Tiff), + _ => None, + } + } + + /// Check if this is a metafile format (EMF, WMF, PICT) + pub const fn is_metafile(&self) -> bool { + matches!(self, Self::Emf | Self::Wmf | Self::Pict) + } + + /// Check if this is a bitmap format (JPEG, PNG, DIB, TIFF) + pub const fn is_bitmap(&self) -> bool { + !self.is_metafile() + } + + /// Get the file extension for this BLIP type + pub const fn extension(&self) -> &'static str { + match self { + Self::Emf => "emf", + Self::Wmf => "wmf", + Self::Pict => "pict", + Self::Jpeg => "jpg", + Self::Png => "png", + Self::Dib => "dib", + Self::Tiff => "tiff", + } + } +} + +/// OfficeArt record header +#[derive(Debug, Clone)] +pub struct RecordHeader { + /// Record version (4 bits) + pub version: u8, + /// Record instance (12 bits) + pub instance: u16, + /// Record type + pub record_type: u16, + /// Record length (excluding header) + pub length: u32, +} + +impl RecordHeader { + /// Parse a record header from bytes + /// + /// # Arguments + /// * `data` - Byte slice containing the header (at least 8 bytes) + /// + /// # Returns + /// The parsed header or an error + pub fn parse(data: &[u8]) -> Result { + if data.len() < 8 { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for record header".into(), + )); + } + + // Read version and instance from first 2 bytes + let ver_inst = read_u16_le(data, 0).unwrap_or(0); + let version = (ver_inst & 0x0F) as u8; + let instance = (ver_inst >> 4) & 0xFFF; + + let record_type = read_u16_le(data, 2).unwrap_or(0); + let length = read_u32_le(data, 4).unwrap_or(0); + + Ok(Self { + version, + instance, + record_type, + length, + }) + } + + /// Get the options field (combination of version and instance) + pub const fn options(&self) -> u16 { + (self.instance << 4) | (self.version as u16) + } +} + +/// Metafile BLIP data structure (EMF, WMF, PICT) +/// +/// These formats include additional metadata and may be compressed +#[derive(Debug, Clone)] +pub struct MetafileBlip<'data> { + /// Record header + pub header: RecordHeader, + /// Primary UID (16 bytes MD4/MD5 hash) + pub uid: [u8; 16], + /// Secondary UID (optional, present if (instance ^ signature) == 0x10) + pub secondary_uid: Option<[u8; 16]>, + /// Uncompressed size in bytes + pub uncompressed_size: u32, + /// Clipping bounds (x1, y1, x2, y2) + pub bounds: (i32, i32, i32, i32), + /// Size in EMU (English Metric Units) - width, height + pub size_emu: (i32, i32), + /// Compressed size in bytes + pub compressed_size: u32, + /// Compression flag (0 = deflate, 0xFE = no compression) + pub compression: u8, + /// Filter byte (usually 0xFE) + pub filter: u8, + /// Picture data (may be compressed) - uses Cow for zero-copy when possible + pub picture_data: Cow<'data, [u8]>, +} + +impl<'data> MetafileBlip<'data> { + /// Parse a metafile BLIP record + /// + /// # Arguments + /// * `data` - Complete record data including header + /// + /// # Returns + /// The parsed metafile BLIP or an error + pub fn parse(data: &'data [u8]) -> Result { + let header = RecordHeader::parse(data)?; + let mut offset = 8; + + // Parse primary UID + if offset + 16 > data.len() { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for UID".into(), + )); + } + let mut uid = [0u8; 16]; + uid.copy_from_slice(&data[offset..offset + 16]); + offset += 16; + + // Check if secondary UID is present + let signature = Self::get_signature(header.record_type); + let has_secondary = (header.options() ^ signature) == 0x10; + let secondary_uid = if has_secondary { + if offset + 16 > data.len() { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for secondary UID".into(), + )); + } + let mut sec_uid = [0u8; 16]; + sec_uid.copy_from_slice(&data[offset..offset + 16]); + offset += 16; + Some(sec_uid) + } else { + None + }; + + // Parse metadata manually (34 bytes total) + // According to [MS-ODRAW] 2.2.23: + // - uncompressed_size: 4 bytes + // - bounds: 16 bytes (4 x i32: left, top, right, bottom) + // - size_emu: 8 bytes (2 x i32: width, height) + // - compressed_size: 4 bytes + // - compression: 1 byte + // - filter: 1 byte + if offset + 34 > data.len() { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for metafile metadata".into(), + )); + } + + // Manual parsing to avoid alignment issues + let uncompressed_size = read_u32_le(data, offset)?; + let bounds_left = read_u32_le(data, offset + 4)? as i32; + let bounds_top = read_u32_le(data, offset + 8)? as i32; + let bounds_right = read_u32_le(data, offset + 12)? as i32; + let bounds_bottom = read_u32_le(data, offset + 16)? as i32; + let size_width = read_u32_le(data, offset + 20)? as i32; + let size_height = read_u32_le(data, offset + 24)? as i32; + let compressed_size = read_u32_le(data, offset + 28)?; + let compression = data[offset + 32]; + let filter = data[offset + 33]; + offset += 34; + + // Extract picture data (zero-copy borrow) + // Use the remaining data as the picture data, up to compressed_size + let pic_data_len = compressed_size as usize; + let available_data = data.len() - offset; + + // If compressed_size is larger than available data, use what we have + let actual_pic_data_len = pic_data_len.min(available_data); + let picture_data = Cow::Borrowed(&data[offset..offset + actual_pic_data_len]); + + Ok(Self { + header, + uid, + secondary_uid, + uncompressed_size, + bounds: (bounds_left, bounds_top, bounds_right, bounds_bottom), + size_emu: (size_width, size_height), + compressed_size, + compression, + filter, + picture_data, + }) + } + + /// Get the signature for a given record type + fn get_signature(record_type: u16) -> u16 { + match record_type { + 0xF01A => 0x3D4, // EMF + 0xF01B => 0x216, // WMF + 0xF01C => 0x542, // PICT + _ => 0, + } + } + + /// Check if the picture data is compressed + pub const fn is_compressed(&self) -> bool { + self.compression == 0 + } + + /// Decompress the picture data if compressed + /// + /// # Returns + /// Uncompressed picture data or the original data if not compressed + pub fn decompress(&self) -> Result> { + if !self.is_compressed() { + return Ok(self.picture_data.clone()); + } + + // Check if data has ZLIB header (0x78 0x9C or similar) + // MS-ODRAW specifies DEFLATE (RFC1950) which uses ZLIB wrapping + let use_zlib = self.picture_data.len() >= 2 && self.picture_data[0] == 0x78; + + let mut decompressed = Vec::with_capacity(self.uncompressed_size as usize); + + let result = if use_zlib { + // Use ZlibDecoder for data with ZLIB wrapper (0x78 0x9C header) + let mut decoder = flate2::read::ZlibDecoder::new(&self.picture_data[..]); + decoder.read_to_end(&mut decompressed) + } else { + // Use DeflateDecoder for raw DEFLATE data + let mut decoder = flate2::read::DeflateDecoder::new(&self.picture_data[..]); + decoder.read_to_end(&mut decompressed) + }; + + match result { + Ok(_) => Ok(Cow::Owned(decompressed)), + Err(e) => Err(Error::ParseError(format!("Decompression failed: {}", e))), + } + } + + /// Get the BLIP type + pub fn blip_type(&self) -> Option { + BlipType::from_record_id(self.header.record_type) + } + + /// Get WMF data with proper placeable header added + /// + /// WMF data in BLIP records doesn't include the placeable header, so we need to + /// reconstruct it using the bounds and size_emu metadata from the BLIP. + /// + /// According to MS-ODRAW and Apache POI: + /// - BLIP stores WMF without placeable header + /// - rcBounds contains the logical bounds + /// - ptSize contains the size in EMU (English Metric Units) + /// - We need to create a placeable header for proper WMF parsing + pub fn get_wmf_with_header(&self) -> Result> { + // Only for WMF type + if self.blip_type() != Some(BlipType::Wmf) { + return Err(Error::ParseError("Not a WMF metafile".into())); + } + + let wmf_data = self.decompress()?; + + // Check if it already has a placeable header (shouldn't happen with BLIP data) + if wmf_data.len() >= 4 { + let first_u32 = + u32::from_le_bytes([wmf_data[0], wmf_data[1], wmf_data[2], wmf_data[3]]); + if first_u32 == 0x9AC6CDD7 { + // Already has placeable header + return Ok(wmf_data); + } + } + + // Calculate proper bounds for placeable header + // ptSize is in EMU, convert to logical units + // 1 inch = 914400 EMU, and we use 1440 units per inch (twips) + let (left, top, right, bottom) = + if self.bounds.0 == 0 && self.bounds.1 == 0 && self.bounds.2 == 0 && self.bounds.3 == 0 + { + // Bounds are zero, calculate from size_emu + // Convert EMU to twips: emu * 1440 / 914400 + let width_twips = (self.size_emu.0 as i64 * 1440 / 914400) as i32; + let height_twips = (self.size_emu.1 as i64 * 1440 / 914400) as i32; + (0, 0, width_twips, height_twips) + } else { + // Use BLIP bounds - they're already in logical units + self.bounds + }; + + // Create placeable header (22 bytes) + let mut result = Vec::with_capacity(22 + wmf_data.len()); + + // Key: 0x9AC6CDD7 (Aldus Placeable Metafile magic number) + result.extend_from_slice(&0x9AC6CDD7u32.to_le_bytes()); + // Handle (always 0) + result.extend_from_slice(&0u16.to_le_bytes()); + // Left, Top, Right, Bottom (bounds in logical units) + result.extend_from_slice(&(left as i16).to_le_bytes()); + result.extend_from_slice(&(top as i16).to_le_bytes()); + result.extend_from_slice(&(right as i16).to_le_bytes()); + result.extend_from_slice(&(bottom as i16).to_le_bytes()); + // Inch (units per inch) - use 1440 (twips) + result.extend_from_slice(&1440u16.to_le_bytes()); + // Reserved (always 0) + result.extend_from_slice(&0u32.to_le_bytes()); + + // Calculate checksum (XOR of all 16-bit words in header so far) + let mut checksum: u16 = 0; + for chunk in result[0..20].chunks(2) { + if chunk.len() == 2 { + let word = u16::from_le_bytes([chunk[0], chunk[1]]); + checksum ^= word; + } + } + result.extend_from_slice(&checksum.to_le_bytes()); + + // Append original WMF data + result.extend_from_slice(&wmf_data); + + Ok(Cow::Owned(result)) + } +} + +/// Bitmap BLIP data structure (JPEG, PNG, DIB, TIFF) +/// +/// These formats have simpler structure without compression metadata +#[derive(Debug, Clone)] +pub struct BitmapBlip<'data> { + /// Record header + pub header: RecordHeader, + /// Primary UID (16 bytes MD4/MD5 hash) + pub uid: [u8; 16], + /// Marker byte (0xFF for external files) + pub marker: u8, + /// Picture data (already in the target format) - uses Cow for zero-copy when possible + pub picture_data: Cow<'data, [u8]>, +} + +impl<'data> BitmapBlip<'data> { + /// Parse a bitmap BLIP record + /// + /// # Arguments + /// * `data` - Complete record data including header + /// + /// # Returns + /// The parsed bitmap BLIP or an error + pub fn parse(data: &'data [u8]) -> Result { + let header = RecordHeader::parse(data)?; + let mut offset = 8; + + // Parse UID + if offset + 16 > data.len() { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for UID".into(), + )); + } + let mut uid = [0u8; 16]; + uid.copy_from_slice(&data[offset..offset + 16]); + offset += 16; + + // Parse marker + if offset >= data.len() { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for marker".into(), + )); + } + let marker = data[offset]; + offset += 1; + + // Extract picture data (zero-copy borrow) + let picture_data = Cow::Borrowed(&data[offset..]); + + Ok(Self { + header, + uid, + marker, + picture_data, + }) + } + + /// Get the BLIP type + pub fn blip_type(&self) -> Option { + BlipType::from_record_id(self.header.record_type) + } +} + +/// General BLIP record that can be either metafile or bitmap +#[derive(Debug, Clone)] +pub enum Blip<'data> { + /// Metafile format (EMF, WMF, PICT) + Metafile(MetafileBlip<'data>), + /// Bitmap format (JPEG, PNG, DIB, TIFF) + Bitmap(BitmapBlip<'data>), +} + +impl<'data> Blip<'data> { + /// Parse a BLIP record from bytes + /// + /// # Arguments + /// * `data` - Complete record data including header + /// + /// # Returns + /// The parsed BLIP or an error + /// + /// # Example + /// ```no_run + /// use litchi_imgconv::blip::Blip; + /// + /// let data = vec![/* BLIP record bytes */]; + /// let blip = Blip::parse(&data)?; + /// # Ok::<(), litchi_core::error::Error>(()) + /// ``` + pub fn parse(data: &'data [u8]) -> Result { + if data.len() < 8 { + return Err(litchi_core::error::Error::ParseError( + "Insufficient data for BLIP record".into(), + )); + } + + let header = RecordHeader::parse(data)?; + let blip_type = BlipType::from_record_id(header.record_type).ok_or_else(|| { + litchi_core::error::Error::ParseError(format!( + "Unknown BLIP record type: 0x{:04X}", + header.record_type + )) + })?; + + match blip_type { + BlipType::Emf | BlipType::Wmf | BlipType::Pict => { + Ok(Self::Metafile(MetafileBlip::parse(data)?)) + }, + BlipType::Jpeg | BlipType::Png | BlipType::Dib | BlipType::Tiff => { + Ok(Self::Bitmap(BitmapBlip::parse(data)?)) + }, + } + } + + /// Get the BLIP type + pub fn blip_type(&self) -> Option { + match self { + Self::Metafile(m) => m.blip_type(), + Self::Bitmap(b) => b.blip_type(), + } + } + + /// Get the raw picture data + /// + /// For metafiles, this returns the data as-is (possibly compressed). + /// Use `get_decompressed_data()` to get uncompressed data for metafiles. + pub fn picture_data(&self) -> &[u8] { + match self { + Self::Metafile(m) => &m.picture_data, + Self::Bitmap(b) => &b.picture_data, + } + } + + /// Get decompressed picture data + /// + /// For bitmap BLIPs, this returns the data as-is. + /// For metafile BLIPs, this decompresses if necessary. + pub fn get_decompressed_data(&self) -> Result> { + match self { + Self::Metafile(m) => m.decompress(), + Self::Bitmap(b) => Ok(b.picture_data.clone()), + } + } + + /// Convert to owned data (useful when lifetime constraints are problematic) + pub fn into_owned(self) -> Blip<'static> { + match self { + Self::Metafile(m) => Blip::Metafile(MetafileBlip { + header: m.header, + uid: m.uid, + secondary_uid: m.secondary_uid, + uncompressed_size: m.uncompressed_size, + bounds: m.bounds, + size_emu: m.size_emu, + compressed_size: m.compressed_size, + compression: m.compression, + filter: m.filter, + picture_data: Cow::Owned(m.picture_data.into_owned()), + }), + Self::Bitmap(b) => Blip::Bitmap(BitmapBlip { + header: b.header, + uid: b.uid, + marker: b.marker, + picture_data: Cow::Owned(b.picture_data.into_owned()), + }), + } + } + + /// Get decompressed picture data with proper header for WMF + /// + /// For WMF metafiles, this adds the placeable header using BLIP metadata. + /// For other formats, this is equivalent to get_decompressed_data(). + pub fn get_picture_data_for_conversion(&self) -> Result> { + match self { + Blip::Metafile(m) => { + // For WMF, add placeable header + if m.blip_type() == Some(BlipType::Wmf) { + m.get_wmf_with_header() + } else { + // For EMF and PICT, just decompress + m.decompress() + } + }, + Blip::Bitmap(_) => { + // For bitmaps, use regular decompressed data + self.get_decompressed_data() + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_blip_type_metafile() { + assert!(BlipType::Emf.is_metafile()); + assert!(BlipType::Wmf.is_metafile()); + assert!(BlipType::Pict.is_metafile()); + assert!(!BlipType::Jpeg.is_metafile()); + } + + #[test] + fn test_blip_type_extension() { + assert_eq!(BlipType::Emf.extension(), "emf"); + assert_eq!(BlipType::Png.extension(), "png"); + } +} diff --git a/src/images/bse.rs b/crates/litchi-imgconv/src/bse.rs similarity index 98% rename from src/images/bse.rs rename to crates/litchi-imgconv/src/bse.rs index 70b1853..4a1407f 100644 --- a/src/images/bse.rs +++ b/crates/litchi-imgconv/src/bse.rs @@ -8,9 +8,9 @@ // - [MS-ODRAW] 2.2.32: OfficeArtBStoreContainerFileBlock // - [MS-ODRAW] 2.2.33: OfficeArtBSE -use crate::common::binary::read_u32_le; -use crate::common::error::{Error, Result}; -use crate::images::BlipType; +use crate::BlipType; +use litchi_core::binary::read_u32_le; +use litchi_core::error::{Error, Result}; use std::borrow::Cow; /// BlipStoreEntry - metadata and index for a BLIP record @@ -127,7 +127,7 @@ impl<'data> BlipStoreEntry<'data> { // Name is stored as UTF-16 LE (2 bytes per character) let name_bytes = &data[name_start..name_end]; - if !name_len.is_multiple_of(2) { + if name_len % 2 != 0 { return Err(Error::ParseError( "Invalid BSE name length (not UTF-16)".into(), )); diff --git a/crates/litchi-imgconv/src/emf/converter.rs b/crates/litchi-imgconv/src/emf/converter.rs new file mode 100644 index 0000000..aea9631 --- /dev/null +++ b/crates/litchi-imgconv/src/emf/converter.rs @@ -0,0 +1,263 @@ +// EMF to raster image converter +// +// Converts EMF metafiles to modern raster formats (PNG, JPEG, WebP). +// +// Note: Full EMF rendering would require implementing a complete GDI rendering engine, +// which is extremely complex. This implementation provides: +// 1. Extraction of embedded bitmaps from EMF records +// 2. Placeholder generation with proper dimensions +// 3. A foundation for future full rendering support + +use super::parser::EmfParser; +use image::{DynamicImage, ImageBuffer, ImageFormat, Rgba, RgbaImage}; +use litchi_core::error::{Error, Result}; +use std::io::Cursor; + +/// Options for EMF to raster conversion +#[derive(Debug, Clone)] +pub struct EmfToRasterOptions { + /// Target width (None = use source dimensions) + pub width: Option, + /// Target height (None = use source dimensions) + pub height: Option, + /// Background color for rendering + pub background_color: Rgba, +} + +impl Default for EmfToRasterOptions { + fn default() -> Self { + Self { + width: None, + height: None, + background_color: Rgba([255, 255, 255, 255]), + } + } +} + +/// EMF to raster converter +pub struct EmfConverter { + parser: EmfParser, + options: EmfToRasterOptions, +} + +impl EmfConverter { + /// Create a new EMF converter + pub fn new(parser: EmfParser, options: EmfToRasterOptions) -> Self { + Self { parser, options } + } + + /// Calculate output dimensions maintaining aspect ratio + fn calculate_dimensions(&self) -> (u32, u32) { + let src_width = self.parser.width().max(1) as u32; + let src_height = self.parser.height().max(1) as u32; + + match (self.options.width, self.options.height) { + (Some(w), Some(h)) => (w, h), + (Some(w), None) => { + let aspect = src_height as f64 / src_width as f64; + let h = (w as f64 * aspect) as u32; + (w, h) + }, + (None, Some(h)) => { + let aspect = src_width as f64 / src_height as f64; + let w = (h as f64 * aspect) as u32; + (w, h) + }, + (None, None) => { + // Use source dimensions, but cap at reasonable size + let max_dim = 4096; + if src_width > max_dim || src_height > max_dim { + let scale = (max_dim as f64) / src_width.max(src_height) as f64; + ( + (src_width as f64 * scale) as u32, + (src_height as f64 * scale) as u32, + ) + } else { + (src_width, src_height) + } + }, + } + } + + /// Try to extract embedded bitmap from EMF records + /// + /// EMF files can contain embedded bitmaps via various record types: + /// - EMR_STRETCHDIBITS (0x00000051) + /// - EMR_BITBLT (0x00000040) + /// - EMR_STRETCHBLT (0x00000041) + fn extract_embedded_bitmap(&self) -> Option { + // Scan through records looking for bitmap data + for record in &self.parser.records { + match record.record_type { + 0x00000051 => { + // EMR_STRETCHDIBITS - contains DIB data + if let Some(img) = self.parse_dib_from_record(&record.data) { + return Some(img); + } + }, + 0x00000040 | 0x00000041 => { + // EMR_BITBLT or EMR_STRETCHBLT + if let Some(img) = self.parse_bitmap_from_bitblt(&record.data) { + return Some(img); + } + }, + _ => {}, + } + } + None + } + + /// Parse DIB (Device Independent Bitmap) data from record + fn parse_dib_from_record(&self, data: &[u8]) -> Option { + // DIB data structure is complex; for now, try to detect standard formats + // that image crate can handle directly + if data.len() < 40 { + return None; + } + + // Try to load as BMP (DIB is essentially BMP without file header) + // We need to construct a proper BMP file header + if let Ok(img) = self.construct_bmp_from_dib(data) { + return Some(img); + } + + None + } + + /// Construct a BMP image from DIB data + fn construct_bmp_from_dib(&self, dib_data: &[u8]) -> Result { + // BMP file header is 14 bytes + // BITMAPFILEHEADER structure + let file_size = 14u32 + dib_data.len() as u32; + let pixel_data_offset = 14u32 + 40u32; // header + BITMAPINFOHEADER + + let mut bmp_data = Vec::with_capacity(file_size as usize); + + // BMP file header + bmp_data.extend_from_slice(b"BM"); // Signature + bmp_data.extend_from_slice(&file_size.to_le_bytes()); // File size + bmp_data.extend_from_slice(&[0u8; 4]); // Reserved + bmp_data.extend_from_slice(&pixel_data_offset.to_le_bytes()); // Pixel data offset + + // Append DIB data + bmp_data.extend_from_slice(dib_data); + + // Try to load the constructed BMP + match image::load_from_memory(&bmp_data) { + Ok(img) => Ok(img), + Err(_) => Err(Error::ParseError("Failed to load DIB as BMP".into())), + } + } + + /// Parse bitmap from BITBLT record + fn parse_bitmap_from_bitblt(&self, _data: &[u8]) -> Option { + // BITBLT records are complex and may not always contain embedded bitmap data + // This is a placeholder for future implementation + None + } + + /// Create a placeholder image with EMF metadata + /// + /// This generates a simple placeholder when full rendering isn't available. + /// The placeholder includes dimensional information. + fn create_placeholder(&self, width: u32, height: u32) -> RgbaImage { + let mut img = ImageBuffer::from_pixel(width, height, self.options.background_color); + + // Draw a simple border and diagonal lines to indicate this is a placeholder + let border_color = Rgba([128, 128, 128, 255]); + + // Draw border + for x in 0..width { + if x < height { + img.put_pixel(x, 0, border_color); + img.put_pixel(x, height - 1, border_color); + } + } + for y in 0..height { + if y < width { + img.put_pixel(0, y, border_color); + img.put_pixel(width - 1, y, border_color); + } + } + + // Draw diagonals + let min_dim = width.min(height); + for i in 0..min_dim { + img.put_pixel(i, i, border_color); + img.put_pixel(i, height - 1 - i, border_color); + } + + img + } + + /// Convert EMF to a raster image + /// + /// This first attempts to extract any embedded bitmaps from the EMF. + /// If no bitmaps are found, it creates a placeholder image. + /// + /// TODO: Implement full EMF rendering engine for complete vector-to-raster conversion + pub fn convert_to_image(&self) -> Result { + let (target_width, target_height) = self.calculate_dimensions(); + + // Try to extract embedded bitmap first + if let Some(embedded) = self.extract_embedded_bitmap() { + // Resize if necessary + if embedded.width() != target_width || embedded.height() != target_height { + return Ok(DynamicImage::ImageRgba8(image::imageops::resize( + &embedded, + target_width, + target_height, + image::imageops::FilterType::Lanczos3, + ))); + } + return Ok(embedded); + } + + // No embedded bitmap found - create placeholder + // TODO: Full EMF rendering would go here + let placeholder = self.create_placeholder(target_width, target_height); + Ok(DynamicImage::ImageRgba8(placeholder)) + } + + /// Convert EMF to specified image format + /// + /// # Arguments + /// * `format` - Target image format (PNG, JPEG, WebP, etc.) + /// + /// # Returns + /// Encoded image bytes in the target format + pub fn convert_to_format(&self, format: ImageFormat) -> Result> { + let image = self.convert_to_image()?; + + let mut buffer = Cursor::new(Vec::new()); + image + .write_to(&mut buffer, format) + .map_err(|e| Error::ParseError(format!("Failed to encode image: {}", e)))?; + + Ok(buffer.into_inner()) + } + + /// Convert EMF to PNG bytes + pub fn convert_to_png(&self) -> Result> { + self.convert_to_format(ImageFormat::Png) + } + + /// Convert EMF to JPEG bytes + pub fn convert_to_jpeg(&self) -> Result> { + self.convert_to_format(ImageFormat::Jpeg) + } + + /// Convert EMF to WebP bytes + pub fn convert_to_webp(&self) -> Result> { + self.convert_to_format(ImageFormat::WebP) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_dimension_calculation() { + // This would require creating a valid EMF parser + // Placeholder for future tests + } +} diff --git a/src/images/emf/device_context.rs b/crates/litchi-imgconv/src/emf/device_context.rs similarity index 99% rename from src/images/emf/device_context.rs rename to crates/litchi-imgconv/src/emf/device_context.rs index bae5c84..15d8cf7 100644 --- a/src/images/emf/device_context.rs +++ b/crates/litchi-imgconv/src/emf/device_context.rs @@ -4,7 +4,7 @@ // objects, transforms, colors, and modes. use super::gdi_objects::{Brush, Font, Pen}; -use crate::images::svg::color::colorref_to_hex; +use crate::svg::color::colorref_to_hex; /// Text alignment modes #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/images/emf/gdi_objects.rs b/crates/litchi-imgconv/src/emf/gdi_objects.rs similarity index 99% rename from src/images/emf/gdi_objects.rs rename to crates/litchi-imgconv/src/emf/gdi_objects.rs index 5f71216..594b8c0 100644 --- a/src/images/emf/gdi_objects.rs +++ b/crates/litchi-imgconv/src/emf/gdi_objects.rs @@ -3,7 +3,7 @@ // This module manages GDI objects (pens, brushes, fonts) that are created, // selected, and deleted during EMF playback. -use crate::images::svg::color::colorref_to_hex; +use crate::svg::color::colorref_to_hex; use std::collections::HashMap; use xml_minifier::minified_xml_format; diff --git a/crates/litchi-imgconv/src/emf/mod.rs b/crates/litchi-imgconv/src/emf/mod.rs new file mode 100644 index 0000000..89d590d --- /dev/null +++ b/crates/litchi-imgconv/src/emf/mod.rs @@ -0,0 +1,149 @@ +// Enhanced Metafile (EMF) format parser and converter +// +// This module provides functionality to parse EMF data and convert it to +// modern image formats (PNG, JPEG, WebP). +// +// EMF is a 32-bit vector graphics format for Windows, introduced in Windows NT 3.1. +// It's an improved version of WMF with better support for modern graphics features. +// +// References: +// - [MS-EMF]: Enhanced Metafile Format Specification +// - https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-emf/ + +pub mod converter; +pub mod device_context; +pub mod gdi_objects; +pub mod parser; +pub mod record_parser; +pub mod records; +pub mod simd_ops; +pub mod svg; + +pub use converter::{EmfConverter, EmfToRasterOptions}; +pub use gdi_objects::{Brush, BrushStyle, Font, HatchStyle, Pen, PenStyle}; +pub use parser::{EmfParser, EmfRecord, EmfRecordRef, RecordRefIterator}; +pub use record_parser::*; +pub use svg::EmfSvgConverter; + +use image::ImageFormat; +use litchi_core::error::Result; + +/// Convert EMF data to a raster image in the specified format +/// +/// # Arguments +/// * `emf_data` - Raw EMF file data +/// * `format` - Target image format (PNG, JPEG, WebP) +/// * `width` - Optional output width (maintains aspect ratio if only one dimension specified) +/// * `height` - Optional output height +/// +/// # Returns +/// Encoded image bytes in the target format +/// +/// # Example +/// ```no_run +/// use litchi_imgconv::emf::convert_emf; +/// use image::ImageFormat; +/// +/// let emf_data = std::fs::read("image.emf")?; +/// let png_data = convert_emf(&emf_data, ImageFormat::Png, Some(800), None)?; +/// std::fs::write("output.png", png_data)?; +/// # Ok::<(), Box>(()) +/// ``` +pub fn convert_emf( + emf_data: &[u8], + format: ImageFormat, + width: Option, + height: Option, +) -> Result> { + let parser = EmfParser::new(emf_data)?; + let options = EmfToRasterOptions { + width, + height, + background_color: image::Rgba([255, 255, 255, 255]), + }; + + let converter = EmfConverter::new(parser, options); + converter.convert_to_format(format) +} + +/// Convert EMF data to PNG format +/// +/// # Arguments +/// * `emf_data` - Raw EMF file data +/// * `width` - Optional output width +/// * `height` - Optional output height +/// +/// # Returns +/// PNG-encoded image bytes +pub fn convert_emf_to_png( + emf_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_emf(emf_data, ImageFormat::Png, width, height) +} + +/// Convert EMF data to JPEG format +/// +/// # Arguments +/// * `emf_data` - Raw EMF file data +/// * `width` - Optional output width +/// * `height` - Optional output height +/// +/// # Returns +/// JPEG-encoded image bytes +pub fn convert_emf_to_jpeg( + emf_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_emf(emf_data, ImageFormat::Jpeg, width, height) +} + +/// Convert EMF data to WebP format +/// +/// # Arguments +/// * `emf_data` - Raw EMF file data +/// * `width` - Optional output width +/// * `height` - Optional output height +/// +/// # Returns +/// WebP-encoded image bytes +pub fn convert_emf_to_webp( + emf_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_emf(emf_data, ImageFormat::WebP, width, height) +} + +/// Convert EMF data to SVG format +/// +/// This converts vector graphics to minimal SVG while extracting embedded +/// raster images as PNG data URLs. Uses parallel processing for performance. +/// +/// # Arguments +/// * `emf_data` - Raw EMF file data +/// +/// # Returns +/// SVG document as string +/// +/// # Example +/// ```no_run +/// use litchi_imgconv::emf::convert_emf_to_svg; +/// +/// let emf_data = std::fs::read("image.emf")?; +/// let svg = convert_emf_to_svg(&emf_data)?; +/// std::fs::write("output.svg", svg)?; +/// # Ok::<(), Box>(()) +/// ``` +pub fn convert_emf_to_svg(emf_data: &[u8]) -> Result { + let parser = EmfParser::new(emf_data)?; + let converter = EmfSvgConverter::new(&parser); + converter.convert() +} + +/// Convert EMF data to SVG bytes +pub fn convert_emf_to_svg_bytes(emf_data: &[u8]) -> Result> { + Ok(convert_emf_to_svg(emf_data)?.into_bytes()) +} diff --git a/crates/litchi-imgconv/src/emf/parser.rs b/crates/litchi-imgconv/src/emf/parser.rs new file mode 100644 index 0000000..0fdcc22 --- /dev/null +++ b/crates/litchi-imgconv/src/emf/parser.rs @@ -0,0 +1,535 @@ +// EMF file parser +// +// Parses Enhanced Metafile records and extracts relevant information +// +// Performance optimizations: +// - Zero-copy parsing using zerocopy crate +// - Lazy record parsing (only parse when accessed) +// - Borrowed data instead of owned when possible +// - SIMD-friendly data layouts +// - Cache-friendly iteration patterns + +use litchi_core::error::{Error, Result}; +use zerocopy::FromBytes; + +/// EMF record types +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] +pub enum EmfRecordType { + Header = 0x00000001, + PolyBezier = 0x00000002, + Polygon = 0x00000003, + Polyline = 0x00000004, + PolyBezierTo = 0x00000005, + PolyLineTo = 0x00000006, + PolyPolyline = 0x00000007, + PolyPolygon = 0x00000008, + SetWindowExtEx = 0x00000009, + SetWindowOrgEx = 0x0000000A, + SetViewportExtEx = 0x0000000B, + SetViewportOrgEx = 0x0000000C, + SetBrushOrgEx = 0x0000000D, + Eof = 0x0000000E, + // Add more as needed +} + +impl EmfRecordType { + /// Create from u32 value + pub fn from_u32(value: u32) -> Option { + match value { + 0x00000001 => Some(Self::Header), + 0x00000002 => Some(Self::PolyBezier), + 0x00000003 => Some(Self::Polygon), + 0x00000004 => Some(Self::Polyline), + 0x00000005 => Some(Self::PolyBezierTo), + 0x00000006 => Some(Self::PolyLineTo), + 0x00000007 => Some(Self::PolyPolyline), + 0x00000008 => Some(Self::PolyPolygon), + 0x00000009 => Some(Self::SetWindowExtEx), + 0x0000000A => Some(Self::SetWindowOrgEx), + 0x0000000B => Some(Self::SetViewportExtEx), + 0x0000000C => Some(Self::SetViewportOrgEx), + 0x0000000D => Some(Self::SetBrushOrgEx), + 0x0000000E => Some(Self::Eof), + _ => None, + } + } +} + +/// EMF header information +#[derive(Debug, Clone)] +pub struct EmfHeader { + /// Bounds of the metafile in device units + pub bounds: (i32, i32, i32, i32), + /// Frame rectangle in .01 millimeter units + pub frame: (i32, i32, i32, i32), + /// Signature (must be 0x464D4520 "EMF ") + pub signature: u32, + /// Version + pub version: u32, + /// Size of the file in bytes + pub size: u32, + /// Number of records + pub num_records: u32, + /// Number of handles in handle table + pub num_handles: u16, + /// Size of description string + pub description_size: u16, + /// Offset to description string + pub description_offset: u32, + /// Number of palette entries + pub num_palette: u32, + /// Width of reference device in pixels + pub device_width: i32, + /// Height of reference device in pixels + pub device_height: i32, + /// Width of reference device in millimeters + pub device_width_mm: i32, + /// Height of reference device in millimeters + pub device_height_mm: i32, +} + +/// Raw EMF header structure for zerocopy parsing (88 bytes total) +#[derive(Debug, Clone, FromBytes)] +#[repr(C)] +struct RawEmfHeader { + /// Record type (must be 0x00000001) + record_type: u32, + /// Record size + record_size: u32, + /// Bounds left + bounds_left: i32, + /// Bounds top + bounds_top: i32, + /// Bounds right + bounds_right: i32, + /// Bounds bottom + bounds_bottom: i32, + /// Frame left + frame_left: i32, + /// Frame top + frame_top: i32, + /// Frame right + frame_right: i32, + /// Frame bottom + frame_bottom: i32, + /// Signature (must be 0x464D4520 "EMF ") + signature: u32, + /// Version + version: u32, + /// Size of the file in bytes + size: u32, + /// Number of records + num_records: u32, + /// Number of handles in handle table + num_handles: u16, + /// Reserved field + reserved: u16, + /// Size of description string + description_size: u32, + /// Offset to description string + description_offset: u32, + /// Number of palette entries + num_palette: u32, + /// Width of reference device in pixels + device_width: i32, + /// Height of reference device in pixels + device_height: i32, + /// Width of reference device in millimeters + device_width_mm: i32, + /// Height of reference device in millimeters + device_height_mm: i32, +} + +impl EmfHeader { + /// Parse EMF header from data + pub fn parse(data: &[u8]) -> Result { + if data.len() < 88 { + return Err(Error::ParseError("EMF header too short".into())); + } + + // Parse header using zerocopy - read_from_prefix returns (value, remaining) + let (raw_header, _) = RawEmfHeader::read_from_prefix(data) + .map_err(|_| Error::ParseError("Invalid EMF header format".into()))?; + + // Validate record type + if raw_header.record_type != 0x00000001 { + return Err(Error::ParseError(format!( + "Invalid EMF header record type: 0x{:08X}", + raw_header.record_type + ))); + } + + // Validate signature + if raw_header.signature != 0x464D4520 { + // "EMF " in little-endian + return Err(Error::ParseError(format!( + "Invalid EMF signature: 0x{:08X}", + raw_header.signature + ))); + } + + Ok(Self { + bounds: ( + raw_header.bounds_left, + raw_header.bounds_top, + raw_header.bounds_right, + raw_header.bounds_bottom, + ), + frame: ( + raw_header.frame_left, + raw_header.frame_top, + raw_header.frame_right, + raw_header.frame_bottom, + ), + signature: raw_header.signature, + version: raw_header.version, + size: raw_header.size, + num_records: raw_header.num_records, + num_handles: raw_header.num_handles, + description_size: raw_header.description_size as u16, + description_offset: raw_header.description_offset, + num_palette: raw_header.num_palette, + device_width: raw_header.device_width, + device_height: raw_header.device_height, + device_width_mm: raw_header.device_width_mm, + device_height_mm: raw_header.device_height_mm, + }) + } + + /// Get the width of the metafile in device units + pub fn width(&self) -> i32 { + self.bounds.2 - self.bounds.0 + } + + /// Get the height of the metafile in device units + pub fn height(&self) -> i32 { + self.bounds.3 - self.bounds.1 + } + + /// Get aspect ratio (width / height) + pub fn aspect_ratio(&self) -> f64 { + let w = self.width() as f64; + let h = self.height() as f64; + if h == 0.0 { 1.0 } else { w / h } + } +} + +/// EMF record with borrowed data for zero-copy parsing +/// +/// This struct uses borrowed data to avoid unnecessary allocations. +/// The lifetime 'a is tied to the source EMF data buffer. +#[derive(Debug, Clone)] +pub struct EmfRecord { + /// Record type + pub record_type: u32, + /// Record size in bytes + pub size: u32, + /// Record data (excluding type and size) - owned for now, can be optimized + /// TODO: Make this &'a [u8] when lifetime management is more complex + pub data: Vec, +} + +/// Zero-copy record reference for streaming/iteration +/// +/// This provides a lightweight view into the EMF data without allocations +#[derive(Debug, Copy, Clone)] +pub struct EmfRecordRef<'a> { + /// Record type + pub record_type: u32, + /// Record size in bytes + pub size: u32, + /// Borrowed record data (excluding type and size) + pub data: &'a [u8], +} + +/// Raw EMF record header for zerocopy parsing (8 bytes) +#[derive(Debug, Clone, zerocopy::FromBytes)] +#[repr(C)] +struct RawEmfRecordHeader { + /// Record type + record_type: u32, + /// Record size in bytes + size: u32, +} + +impl EmfRecord { + /// Parse an EMF record from data (creates owned copy) + /// + /// For high-performance scenarios, consider using `EmfRecordRef::parse_ref` instead + pub fn parse(data: &[u8], offset: usize) -> Result<(Self, usize)> { + let (record_ref, consumed) = EmfRecordRef::parse_ref(data, offset)?; + + Ok(( + Self { + record_type: record_ref.record_type, + size: record_ref.size, + data: record_ref.data.to_vec(), + }, + consumed, + )) + } +} + +impl<'a> EmfRecordRef<'a> { + /// Parse an EMF record reference (zero-copy) + /// + /// This is the most efficient way to parse records, returning a borrowed view + /// into the original data without any allocations. + #[inline] + pub fn parse_ref(data: &'a [u8], offset: usize) -> Result<(Self, usize)> { + if offset + 8 > data.len() { + return Err(Error::ParseError("Insufficient data for EMF record".into())); + } + + // Parse record header using zerocopy - highly optimized, no allocations + let (header, _) = RawEmfRecordHeader::read_from_prefix(&data[offset..]) + .map_err(|_| Error::ParseError("Invalid EMF record header".into()))?; + + let record_type = header.record_type; + let size = header.size; + + // Validate size with early return for better branch prediction + if size < 8 { + return Err(Error::ParseError(format!( + "EMF record size too small: {} at offset {}", + size, offset + ))); + } + + let end_offset = offset + .checked_add(size as usize) + .ok_or_else(|| Error::ParseError("EMF record size overflow".into()))?; + + if end_offset > data.len() { + return Err(Error::ParseError(format!( + "EMF record extends beyond data: size {} at offset {}, data length {}", + size, + offset, + data.len() + ))); + } + + // Zero-copy: just borrow the slice + let record_data = &data[offset + 8..end_offset]; + + Ok(( + Self { + record_type, + size, + data: record_data, + }, + size as usize, + )) + } + + /// Convert to owned record (requires allocation) + #[inline] + pub fn to_owned(&self) -> EmfRecord { + EmfRecord { + record_type: self.record_type, + size: self.size, + data: self.data.to_vec(), + } + } +} + +/// EMF file parser with performance optimizations +/// +/// This parser provides multiple modes of operation: +/// 1. Eager parsing (all records at once) - use `new()` +/// 2. Lazy parsing (on-demand) - use `iter_records()` +/// 3. Zero-copy streaming - use `iter_record_refs()` +#[derive(Debug)] +pub struct EmfParser { + /// EMF header + pub header: EmfHeader, + /// All records (excluding header) - eagerly parsed + pub records: Vec, + /// Raw EMF data - kept for zero-copy access + data: Vec, + /// Offset to first record after header (cached for performance) + first_record_offset: usize, +} + +impl EmfParser { + /// Create a new EMF parser from raw data + /// + /// This eagerly parses all records. For large files or streaming scenarios, + /// consider using `new_lazy()` or iterating with `iter_record_refs()`. + pub fn new(data: &[u8]) -> Result { + if data.len() < 88 { + return Err(Error::ParseError("EMF data too short".into())); + } + + let header = EmfHeader::parse(data)?; + + // Get header record size - extract once for efficiency + let header_record_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize; + + // Pre-allocate vector with expected capacity (from header.num_records if available) + let expected_records = header.num_records.saturating_sub(1) as usize; // -1 for header + let mut records = Vec::with_capacity(expected_records.min(10000)); // Cap at 10k for safety + + let mut offset = header_record_size; + + // Parse remaining records with optimized loop + while offset < data.len() { + match EmfRecord::parse(data, offset) { + Ok((record, consumed)) => { + let is_eof = record.record_type == 0x0000000E; + records.push(record); + if is_eof { + break; + } + offset += consumed; + }, + Err(_) => break, + } + } + + // Shrink to fit if we over-allocated + records.shrink_to_fit(); + + Ok(Self { + header, + records, + data: data.to_vec(), + first_record_offset: header_record_size, + }) + } + + /// Create a new EMF parser with header only (lazy record parsing) + /// + /// Records are not parsed until accessed. Use `iter_record_refs()` for + /// zero-copy iteration. + pub fn new_lazy(data: &[u8]) -> Result { + if data.len() < 88 { + return Err(Error::ParseError("EMF data too short".into())); + } + + let header = EmfHeader::parse(data)?; + let header_record_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize; + + Ok(Self { + header, + records: Vec::new(), // Empty - will be populated on demand + data: data.to_vec(), + first_record_offset: header_record_size, + }) + } + + /// Get an iterator over record references (zero-copy, most efficient) + /// + /// This is the most performant way to process EMF records as it avoids + /// all allocations and uses zero-copy techniques. + /// + /// # Example + /// ```no_run + /// # use litchi_imgconv::emf::parser::EmfParser; + /// # let data = &[0u8; 100]; + /// let parser = EmfParser::new_lazy(data)?; + /// for record_ref in parser.iter_record_refs() { + /// // Process record without any allocations + /// match record_ref.record_type { + /// 0x00000003 => { /* handle polygon */ } + /// _ => {} + /// } + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn iter_record_refs(&self) -> RecordRefIterator<'_> { + RecordRefIterator { + data: &self.data, + offset: self.first_record_offset, + } + } + + /// Get the raw EMF data + #[inline] + pub fn data(&self) -> &[u8] { + &self.data + } + + /// Get the width in device units + #[inline] + pub fn width(&self) -> i32 { + self.header.width() + } + + /// Get the height in device units + #[inline] + pub fn height(&self) -> i32 { + self.header.height() + } + + /// Get aspect ratio + #[inline] + pub fn aspect_ratio(&self) -> f64 { + self.header.aspect_ratio() + } + + /// Count records without allocating (fast) + /// + /// This is useful when you just need to know how many records exist + /// without parsing them all. + pub fn count_records(&self) -> Result { + if !self.records.is_empty() { + return Ok(self.records.len()); + } + + Ok(self.iter_record_refs().count()) + } +} + +/// Iterator over EMF record references (zero-copy) +/// +/// This iterator provides the most efficient way to process EMF records +/// by avoiding all allocations and using borrowed data. +pub struct RecordRefIterator<'a> { + data: &'a [u8], + offset: usize, +} + +impl<'a> Iterator for RecordRefIterator<'a> { + type Item = EmfRecordRef<'a>; + + #[inline] + fn next(&mut self) -> Option { + if self.offset >= self.data.len() { + return None; + } + + match EmfRecordRef::parse_ref(self.data, self.offset) { + Ok((record, consumed)) => { + let is_eof = record.record_type == 0x0000000E; + self.offset += consumed; + + if is_eof { + // Return EOF record and stop iteration + return Some(record); + } + + Some(record) + }, + Err(_) => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + // We don't know the exact count without parsing, but we can estimate + // Average EMF record is probably 20-50 bytes + let remaining = self.data.len() - self.offset; + let estimated = remaining / 30; // Conservative estimate + (0, Some(estimated)) + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_emf_signature() { + // "EMF " in little-endian + assert_eq!(0x464D4520u32.to_le_bytes(), [0x20, 0x45, 0x4D, 0x46]); + } +} diff --git a/src/images/emf/record_parser.rs b/crates/litchi-imgconv/src/emf/record_parser.rs similarity index 98% rename from src/images/emf/record_parser.rs rename to crates/litchi-imgconv/src/emf/record_parser.rs index 9caad63..7449b33 100644 --- a/src/images/emf/record_parser.rs +++ b/crates/litchi-imgconv/src/emf/record_parser.rs @@ -1,9 +1,9 @@ +use crate::emf::records::types::*; /// Performance-optimized EMF record parsing utilities /// /// This module provides high-performance parsing helpers that minimize allocations /// and maximize cache efficiency. -use crate::common::error::{Error, Result}; -use crate::images::emf::records::types::*; +use litchi_core::error::{Error, Result}; use zerocopy::FromBytes; /// Fast parser for point arrays (POINTL) diff --git a/src/images/emf/records/bitmap.rs b/crates/litchi-imgconv/src/emf/records/bitmap.rs similarity index 100% rename from src/images/emf/records/bitmap.rs rename to crates/litchi-imgconv/src/emf/records/bitmap.rs diff --git a/src/images/emf/records/drawing.rs b/crates/litchi-imgconv/src/emf/records/drawing.rs similarity index 100% rename from src/images/emf/records/drawing.rs rename to crates/litchi-imgconv/src/emf/records/drawing.rs diff --git a/src/images/emf/records/mod.rs b/crates/litchi-imgconv/src/emf/records/mod.rs similarity index 100% rename from src/images/emf/records/mod.rs rename to crates/litchi-imgconv/src/emf/records/mod.rs diff --git a/src/images/emf/records/objects.rs b/crates/litchi-imgconv/src/emf/records/objects.rs similarity index 100% rename from src/images/emf/records/objects.rs rename to crates/litchi-imgconv/src/emf/records/objects.rs diff --git a/src/images/emf/records/path.rs b/crates/litchi-imgconv/src/emf/records/path.rs similarity index 100% rename from src/images/emf/records/path.rs rename to crates/litchi-imgconv/src/emf/records/path.rs diff --git a/src/images/emf/records/state.rs b/crates/litchi-imgconv/src/emf/records/state.rs similarity index 100% rename from src/images/emf/records/state.rs rename to crates/litchi-imgconv/src/emf/records/state.rs diff --git a/src/images/emf/records/text.rs b/crates/litchi-imgconv/src/emf/records/text.rs similarity index 100% rename from src/images/emf/records/text.rs rename to crates/litchi-imgconv/src/emf/records/text.rs diff --git a/src/images/emf/records/types.rs b/crates/litchi-imgconv/src/emf/records/types.rs similarity index 100% rename from src/images/emf/records/types.rs rename to crates/litchi-imgconv/src/emf/records/types.rs diff --git a/src/images/emf/simd_ops.rs b/crates/litchi-imgconv/src/emf/simd_ops.rs similarity index 99% rename from src/images/emf/simd_ops.rs rename to crates/litchi-imgconv/src/emf/simd_ops.rs index 807b69a..4c26d76 100644 --- a/src/images/emf/simd_ops.rs +++ b/crates/litchi-imgconv/src/emf/simd_ops.rs @@ -7,7 +7,7 @@ /// - Point transformations: 4-8x faster /// - Bounding box calculations: 3-5x faster /// - Color conversions: 2-4x faster -use crate::images::emf::records::types::{PointL, XForm}; +use crate::emf::records::types::{PointL, XForm}; /// Transform multiple points using SIMD (when available) /// diff --git a/src/images/emf/svg/buffer.rs b/crates/litchi-imgconv/src/emf/svg/buffer.rs similarity index 98% rename from src/images/emf/svg/buffer.rs rename to crates/litchi-imgconv/src/emf/svg/buffer.rs index 3d8a48d..0b009a1 100644 --- a/src/images/emf/svg/buffer.rs +++ b/crates/litchi-imgconv/src/emf/svg/buffer.rs @@ -3,7 +3,7 @@ /// This module provides buffering and optimization of SVG elements during conversion /// to minimize output size without post-processing. use super::state::DeviceContext; -use crate::images::svg_utils::write_num; +use crate::svg_utils::write_num; /// Buffer for grouping and optimizing SVG elements pub struct ElementBuffer { @@ -224,7 +224,7 @@ impl Default for ElementBuffer { #[cfg(test)] mod tests { - use crate::images::svg_utils::fmt_num; + use crate::svg_utils::fmt_num; #[test] fn test_number_formatting() { diff --git a/crates/litchi-imgconv/src/emf/svg/converter.rs b/crates/litchi-imgconv/src/emf/svg/converter.rs new file mode 100644 index 0000000..061f2f3 --- /dev/null +++ b/crates/litchi-imgconv/src/emf/svg/converter.rs @@ -0,0 +1,632 @@ +/// Comprehensive EMF to SVG Converter with In-Place Optimizations +/// +/// Production-ready converter supporting all major EMF record types +/// +/// Optimizations applied during conversion: +/// - Merge consecutive lines into polylines +/// - Group elements with same styles +/// - Eliminate redundant attributes +/// - Optimize number precision (2 decimal places) +/// - Reuse styles via grouping +use super::{ + buffer::ElementBuffer, + path::PathBuilder, + state::{DeviceContext, RenderState}, +}; +use crate::emf::parser::EmfParser; +use crate::emf::records::*; +use crate::svg_utils::write_num; +use litchi_core::error::Result; +use litchi_core::xml::escape::escape_xml; +use std::fmt::Write; +use zerocopy::FromBytes; + +/// EMF to SVG Converter with in-place optimization +pub struct EmfSvgConverter<'a> { + parser: &'a EmfParser, +} + +impl<'a> EmfSvgConverter<'a> { + /// Create new converter + pub fn new(parser: &'a EmfParser) -> Self { + Self { parser } + } + + /// Convert EMF to SVG with in-place optimizations + pub fn convert(&self) -> Result { + let mut state = RenderState::new(); + let mut buffer = ElementBuffer::new(); + + // Process all records with buffering for optimization + for record in &self.parser.records { + if let Some(elements) = self.process_record(record, &mut state)? { + for element in elements { + buffer.add_element(element, &state.dc); + } + } + } + + // Flush any pending buffered elements + buffer.flush(); + + // Build final SVG + self.build_svg(&buffer.elements, &state) + } + + /// Process a single EMF record + fn process_record( + &self, + record: &super::super::parser::EmfRecord, + state: &mut RenderState, + ) -> Result>> { + let record_type = EmrType::from_u32(record.record_type); + + match record_type { + // State management + Some(EmrType::SaveDc) => { + state.push_dc(); + Ok(None) + }, + Some(EmrType::RestoreDc) => { + if record.data.len() >= 4 { + let index = i32::from_le_bytes([ + record.data[0], + record.data[1], + record.data[2], + record.data[3], + ]); + state.pop_dc(index); + } + Ok(None) + }, + + // Transform operations + Some(EmrType::SetWorldTransform) => { + if let Ok((xform, _)) = XForm::read_from_prefix(&record.data) { + state.dc.world_transform = xform; + } + Ok(None) + }, + Some(EmrType::ModifyWorldTransform) => { + if record.data.len() >= 28 + && let Ok((xform, rest)) = XForm::read_from_prefix(&record.data) + { + let mode = u32::from_le_bytes([rest[0], rest[1], rest[2], rest[3]]); + match mode { + 2 => state.dc.world_transform = state.dc.world_transform.multiply(&xform), // Left multiply + 3 => state.dc.world_transform = xform.multiply(&state.dc.world_transform), // Right multiply + _ => state.dc.world_transform = xform, // Set + } + } + Ok(None) + }, + + // Window/Viewport mapping + Some(EmrType::SetWindowExtEx) => { + if let Ok((extent, _)) = SizeL::read_from_prefix(&record.data) { + state.dc.window_ext = (extent.cx, extent.cy); + } + Ok(None) + }, + Some(EmrType::SetWindowOrgEx) => { + if let Ok((origin, _)) = PointL::read_from_prefix(&record.data) { + state.dc.window_org = (origin.x, origin.y); + } + Ok(None) + }, + Some(EmrType::SetViewportExtEx) => { + if let Ok((extent, _)) = SizeL::read_from_prefix(&record.data) { + state.dc.viewport_ext = (extent.cx, extent.cy); + } + Ok(None) + }, + Some(EmrType::SetViewportOrgEx) => { + if let Ok((origin, _)) = PointL::read_from_prefix(&record.data) { + state.dc.viewport_org = (origin.x, origin.y); + } + Ok(None) + }, + + // Colors and modes + Some(EmrType::SetTextColor) | Some(EmrType::SetBkColor) => { + if let Ok((color, _)) = ColorRef::read_from_prefix(&record.data) { + if record_type == Some(EmrType::SetTextColor) { + state.dc.text_color = color; + } else { + state.dc.bg_color = color; + } + } + Ok(None) + }, + Some(EmrType::SetBkMode) => { + if record.data.len() >= 4 { + state.dc.bg_mode = u32::from_le_bytes([ + record.data[0], + record.data[1], + record.data[2], + record.data[3], + ]); + } + Ok(None) + }, + Some(EmrType::SetPolyFillMode) => { + if record.data.len() >= 4 { + state.dc.poly_fill_mode = u32::from_le_bytes([ + record.data[0], + record.data[1], + record.data[2], + record.data[3], + ]); + } + Ok(None) + }, + Some(EmrType::SetTextAlign) => { + if record.data.len() >= 4 { + state.dc.text_align = u32::from_le_bytes([ + record.data[0], + record.data[1], + record.data[2], + record.data[3], + ]); + } + Ok(None) + }, + + // Path operations + Some(EmrType::BeginPath) => { + state.begin_path(); + Ok(None) + }, + Some(EmrType::EndPath) => { + state.end_path(); + Ok(None) + }, + Some(EmrType::CloseFigure) => { + if let Some(ref mut builder) = state.path_builder { + builder.close(); + } + Ok(None) + }, + Some(EmrType::MoveToEx) => { + if let Ok((point, _)) = PointL::read_from_prefix(&record.data) { + let (x, y) = state.dc.transform_point(point.x as f64, point.y as f64); + state.dc.current_pos = (x, y); + + if let Some(ref mut builder) = state.path_builder { + builder.move_to(x, y); + } + } + Ok(None) + }, + Some(EmrType::LineTo) => { + if let Ok((point, _)) = PointL::read_from_prefix(&record.data) { + let (x, y) = state.dc.transform_point(point.x as f64, point.y as f64); + + if let Some(ref mut builder) = state.path_builder { + builder.line_to(x, y); + } else { + // Direct line rendering + return Ok(Some(vec![self.render_line( + state.dc.current_pos.0, + state.dc.current_pos.1, + x, + y, + &state.dc, + )])); + } + + state.dc.current_pos = (x, y); + } + Ok(None) + }, + Some(EmrType::FillPath) + | Some(EmrType::StrokePath) + | Some(EmrType::StrokeAndFillPath) => { + if let Some(mut builder) = state.take_path() { + builder.optimize(); + let path_str = builder.build(); + + let fill = if record_type == Some(EmrType::FillPath) + || record_type == Some(EmrType::StrokeAndFillPath) + { + state.dc.get_fill_attr() + } else { + "fill=\"none\"".to_string() + }; + + let stroke = if record_type == Some(EmrType::StrokePath) + || record_type == Some(EmrType::StrokeAndFillPath) + { + state.dc.get_stroke_attrs() + } else { + "stroke=\"none\"".to_string() + }; + + let mut svg = format!(""); + + Ok(Some(vec![svg])) + } else { + Ok(None) + } + }, + + // Shape rendering + Some(EmrType::Rectangle) | Some(EmrType::Ellipse) => { + if let Ok((rect, _)) = RectL::read_from_prefix(&record.data) { + let (x1, y1) = state.dc.transform_point(rect.left as f64, rect.top as f64); + let (x2, y2) = state + .dc + .transform_point(rect.right as f64, rect.bottom as f64); + + let svg = if record_type == Some(EmrType::Rectangle) { + self.render_rectangle(x1, y1, x2 - x1, y2 - y1, &state.dc) + } else { + self.render_ellipse( + (x1 + x2) / 2.0, + (y1 + y2) / 2.0, + (x2 - x1) / 2.0, + (y2 - y1) / 2.0, + &state.dc, + ) + }; + + Ok(Some(vec![svg])) + } else { + Ok(None) + } + }, + Some(EmrType::RoundRect) => { + if let Ok((hdr, _)) = EmrRoundRect::read_from_prefix(&record.data) { + let (x1, y1) = state + .dc + .transform_point(hdr.rect.left as f64, hdr.rect.top as f64); + let (x2, y2) = state + .dc + .transform_point(hdr.rect.right as f64, hdr.rect.bottom as f64); + let rx = hdr.corner.cx as f64 / 2.0; + let ry = hdr.corner.cy as f64 / 2.0; + + Ok(Some(vec![self.render_rounded_rectangle( + (x1, y1, x2 - x1, y2 - y1), + (rx, ry), + &state.dc, + )])) + } else { + Ok(None) + } + }, + + // Polygon rendering + Some(EmrType::Polygon) | Some(EmrType::Polyline) | Some(EmrType::PolyBezier) => { + self.render_polygon(record, state, record_type) + }, + Some(EmrType::Polygon16) | Some(EmrType::Polyline16) | Some(EmrType::PolyBezier16) => { + self.render_polygon16(record, state, record_type) + }, + + // Object creation (store in object table) + Some(EmrType::CreatePen) => { + if let Ok((pen, _)) = EmrCreatePen::read_from_prefix(&record.data) { + state.dc.pen = + super::state::Pen::from_create_pen(pen.pen_style, pen.width, pen.color); + } + Ok(None) + }, + Some(EmrType::CreateBrushIndirect) => { + if let Ok((brush, _)) = EmrCreateBrushIndirect::read_from_prefix(&record.data) { + state.dc.brush.style = brush.brush_style; + state.dc.brush.color = brush.color; + state.dc.brush.hatch = Some(brush.brush_hatch); + } + Ok(None) + }, + + // Text rendering + Some(EmrType::ExtTextOutA) | Some(EmrType::ExtTextOutW) => { + self.render_text(record, state, record_type == Some(EmrType::ExtTextOutW)) + }, + + // Unimplemented records - log but don't error + _ => Ok(None), + } + } + + /// Render a line (optimized format - minimal whitespace, no trailing zeros) + fn render_line(&self, x1: f64, y1: f64, x2: f64, y2: f64, dc: &DeviceContext) -> String { + let mut s = String::with_capacity(128); + s.push_str(""); + s + } + + /// Render a rectangle (optimized) + fn render_rectangle( + &self, + x: f64, + y: f64, + width: f64, + height: f64, + dc: &DeviceContext, + ) -> String { + let mut s = String::with_capacity(128); + s.push_str(""); + s + } + + /// Render a rounded rectangle (optimized) + fn render_rounded_rectangle( + &self, + rect: (f64, f64, f64, f64), // (x, y, width, height) + corners: (f64, f64), // (rx, ry) + dc: &DeviceContext, + ) -> String { + let (x, y, width, height) = rect; + let (rx, ry) = corners; + let mut s = String::with_capacity(128); + s.push_str(""); + s + } + + /// Render an ellipse (optimized) + fn render_ellipse(&self, cx: f64, cy: f64, rx: f64, ry: f64, dc: &DeviceContext) -> String { + let mut s = String::with_capacity(128); + s.push_str(""); + s + } + + /// Render polygon (32-bit coordinates) + fn render_polygon( + &self, + record: &super::super::parser::EmfRecord, + state: &RenderState, + record_type: Option, + ) -> Result>> { + if let Ok((hdr, rest)) = EmrPolyHeader::read_from_prefix(&record.data) { + if let Some(poly_data) = PolygonData::from_poly32(rest, 0, hdr.count as usize) { + let mut builder = PathBuilder::new(); + + for (i, (x, y)) in poly_data.iter_points().enumerate() { + let (px, py) = state.dc.transform_point(x as f64, y as f64); + if i == 0 { + builder.move_to(px, py); + } else { + builder.line_to(px, py); + } + } + + if record_type == Some(EmrType::Polygon) { + builder.close(); + } + + builder.optimize(); + let path_str = builder.build(); + + let is_filled = record_type == Some(EmrType::Polygon); + let fill = if is_filled { + state.dc.get_fill_attr() + } else { + "fill=\"none\"".to_string() + }; + + Ok(Some(vec![format!( + "", + path_str, + fill, + state.dc.get_stroke_attrs() + )])) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + /// Render polygon (16-bit coordinates) + fn render_polygon16( + &self, + record: &super::super::parser::EmfRecord, + state: &RenderState, + record_type: Option, + ) -> Result>> { + if let Ok((hdr, rest)) = EmrPoly16Header::read_from_prefix(&record.data) { + if let Some(poly_data) = PolygonData::from_poly16(rest, 0, hdr.count as usize) { + let mut builder = PathBuilder::new(); + + for (i, (x, y)) in poly_data.iter_points().enumerate() { + let (px, py) = state.dc.transform_point(x as f64, y as f64); + if i == 0 { + builder.move_to(px, py); + } else { + builder.line_to(px, py); + } + } + + if record_type == Some(EmrType::Polygon16) { + builder.close(); + } + + builder.optimize(); + let path_str = builder.build(); + + let is_filled = record_type == Some(EmrType::Polygon16); + let fill = if is_filled { + state.dc.get_fill_attr() + } else { + "fill=\"none\"".to_string() + }; + + Ok(Some(vec![format!( + "", + path_str, + fill, + state.dc.get_stroke_attrs() + )])) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + /// Render text + fn render_text( + &self, + record: &super::super::parser::EmfRecord, + state: &RenderState, + is_unicode: bool, + ) -> Result>> { + if let Ok((hdr, _)) = EmrExtTextOutHeader::read_from_prefix(&record.data) { + let (x, y) = state + .dc + .transform_point(hdr.text.reference.x as f64, hdr.text.reference.y as f64); + + // The off_string is relative to the start of the EMF record (including type+size), + // but record.data starts AFTER type+size (8 bytes). So we need to subtract 8. + let string_offset = if hdr.text.off_string >= 8 { + (hdr.text.off_string - 8) as usize + } else { + hdr.text.off_string as usize + }; + + // Extract text string + let text = if is_unicode { + // Unicode (UTF-16LE) + self.extract_unicode_string( + &record.data, + string_offset, + hdr.text.num_chars as usize, + ) + } else { + // ANSI + self.extract_ansi_string(&record.data, string_offset, hdr.text.num_chars as usize) + }; + + if !text.is_empty() { + let mut svg = String::with_capacity(128 + text.len()); + svg.push_str("'); + svg.push_str(&escape_xml(&text)); + svg.push_str(""); + Ok(Some(vec![svg])) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + /// Extract Unicode string + fn extract_unicode_string(&self, data: &[u8], offset: usize, count: usize) -> String { + let byte_count = count * 2; + if offset + byte_count > data.len() { + return String::new(); + } + + let mut chars = Vec::with_capacity(count); + for i in 0..count { + let idx = offset + i * 2; + if idx + 2 <= data.len() { + let ch = u16::from_le_bytes([data[idx], data[idx + 1]]); + if ch != 0 { + chars.push(ch); + } + } + } + + String::from_utf16_lossy(&chars) + } + + /// Extract ANSI string + fn extract_ansi_string(&self, data: &[u8], offset: usize, count: usize) -> String { + if offset + count > data.len() { + return String::new(); + } + + String::from_utf8_lossy(&data[offset..offset + count]) + .trim_end_matches('\0') + .to_string() + } + + /// Build final SVG document + fn build_svg(&self, elements: &[String], _state: &RenderState) -> Result { + let header = &self.parser.header; + let width = header.width(); + let height = header.height(); + + let mut svg = format!( + "", + width, height, header.bounds.0, header.bounds.1, width, height + ); + + for element in elements { + svg.push_str(element); + } + + svg.push_str(""); + + Ok(svg) + } +} diff --git a/src/images/emf/svg/mod.rs b/crates/litchi-imgconv/src/emf/svg/mod.rs similarity index 100% rename from src/images/emf/svg/mod.rs rename to crates/litchi-imgconv/src/emf/svg/mod.rs diff --git a/src/images/emf/svg/path.rs b/crates/litchi-imgconv/src/emf/svg/path.rs similarity index 100% rename from src/images/emf/svg/path.rs rename to crates/litchi-imgconv/src/emf/svg/path.rs diff --git a/src/images/emf/svg/state.rs b/crates/litchi-imgconv/src/emf/svg/state.rs similarity index 99% rename from src/images/emf/svg/state.rs rename to crates/litchi-imgconv/src/emf/svg/state.rs index 4be996d..af90a01 100644 --- a/src/images/emf/svg/state.rs +++ b/crates/litchi-imgconv/src/emf/svg/state.rs @@ -2,11 +2,11 @@ /// /// Manages device context stack, transforms, clipping, and graphics state use super::path::PathBuilder; -use crate::images::emf::records::*; -use crate::images::svg_utils::write_num; +use crate::emf::records::*; +use crate::svg_utils::write_num; // Import hatch style constants -use crate::images::emf::records::hatch_style; +use crate::emf::records::hatch_style; use xml_minifier::minified_xml_format; diff --git a/crates/litchi-imgconv/src/lib.rs b/crates/litchi-imgconv/src/lib.rs new file mode 100644 index 0000000..dfe20de --- /dev/null +++ b/crates/litchi-imgconv/src/lib.rs @@ -0,0 +1,189 @@ +//! Image processing and conversion for Litchi. +//! +//! This crate provides parsing and conversion utilities for the image +//! formats used inside Microsoft Office documents — Enhanced Metafile +//! (EMF), Windows Metafile (WMF), Macintosh PICT — and helpers for the +//! BLIP (Binary Large Image or Picture) records that wrap them. +//! +//! It is intentionally a *leaf* crate: it depends only on `litchi-core` +//! plus the external `image`, `flate2`, `bytes`, `xml-minifier`, and +//! `zerocopy` crates. It does **not** depend on the OLE / OOXML / ODF +//! format crates. The integration glue that bridges OLE Escher records +//! to these decoders lives in the umbrella `litchi` crate (under +//! `litchi::images::extractor`) because that bridge has a hard +//! dependency on `crate::ole`. +//! +//! # Quick example +//! +//! ```no_run +//! use litchi_imgconv::blip::Blip; +//! use litchi_imgconv::convert_blip_to_png; +//! +//! let blip_data: Vec = vec![/* BLIP record bytes */]; +//! let blip = Blip::parse(&blip_data)?; +//! let png_bytes = convert_blip_to_png(&blip, Some(800), None)?; +//! # Ok::<(), litchi_core::error::Error>(()) +//! ``` + +#![allow(missing_docs)] + +pub mod blip; +pub mod bse; +pub mod emf; +pub mod pict; +pub mod svg; +pub mod svg_utils; +pub mod wmf; + +pub use blip::{BitmapBlip, Blip, BlipType, MetafileBlip, RecordHeader}; +pub use bse::{BlipStore, BlipStoreEntry}; +use image::ImageFormat; +use litchi_core::error::Result; + +/// Convert a BLIP record to a raster image format +/// +/// This is a high-level convenience function that handles all BLIP types +/// (EMF, WMF, PICT, JPEG, PNG, DIB, TIFF) and converts them to the specified format. +/// +/// # Arguments +/// * `blip` - Parsed BLIP record +/// * `format` - Target image format +/// * `width` - Optional output width +/// * `height` - Optional output height +/// +/// # Returns +/// Encoded image bytes in the target format +pub fn convert_blip_to_format<'data>( + blip: &Blip<'data>, + format: ImageFormat, + width: Option, + height: Option, +) -> Result> { + match blip { + Blip::Metafile(metafile) => { + let data = metafile.decompress()?; + match metafile.blip_type() { + Some(BlipType::Emf) => emf::convert_emf(&data, format, width, height), + Some(BlipType::Wmf) => wmf::convert_wmf(&data, format, width, height), + Some(BlipType::Pict) => pict::convert_pict(&data, format, width, height), + _ => Err(litchi_core::error::Error::ParseError( + "Unknown metafile BLIP type".into(), + )), + } + }, + Blip::Bitmap(bitmap) => { + // For bitmap formats that are already in a modern format, we may just need + // to re-encode or pass through + let img = image::load_from_memory(&bitmap.picture_data[..]).map_err(|e| { + litchi_core::error::Error::ParseError(format!("Failed to load bitmap: {}", e)) + })?; + + // Resize if requested + let img = match (width, height) { + (Some(w), Some(h)) if img.width() != w || img.height() != h => { + image::DynamicImage::ImageRgba8(image::imageops::resize( + &img, + w, + h, + image::imageops::FilterType::Lanczos3, + )) + }, + (Some(w), None) => { + let aspect = img.height() as f64 / img.width() as f64; + let h = (w as f64 * aspect) as u32; + image::DynamicImage::ImageRgba8(image::imageops::resize( + &img, + w, + h, + image::imageops::FilterType::Lanczos3, + )) + }, + (None, Some(h)) => { + let aspect = img.width() as f64 / img.height() as f64; + let w = (h as f64 * aspect) as u32; + image::DynamicImage::ImageRgba8(image::imageops::resize( + &img, + w, + h, + image::imageops::FilterType::Lanczos3, + )) + }, + _ => img, + }; + + // Encode to target format + let mut buffer = std::io::Cursor::new(Vec::new()); + img.write_to(&mut buffer, format).map_err(|e| { + litchi_core::error::Error::ParseError(format!("Failed to encode image: {}", e)) + })?; + + Ok(buffer.into_inner()) + }, + } +} + +/// Convert a BLIP record to PNG format +/// +/// # Arguments +/// * `blip` - Parsed BLIP record +/// * `width` - Optional output width +/// * `height` - Optional output height +/// +/// # Returns +/// PNG-encoded image bytes +/// +/// # Example +/// ```no_run +/// use litchi_imgconv::{blip::Blip, convert_blip_to_png}; +/// +/// let blip_data: Vec = vec![/* BLIP record bytes */]; +/// let blip = Blip::parse(&blip_data)?; +/// let png = convert_blip_to_png(&blip, Some(800), None)?; +/// # Ok::<(), litchi_core::error::Error>(()) +/// ``` +pub fn convert_blip_to_png<'data>( + blip: &Blip<'data>, + width: Option, + height: Option, +) -> Result> { + convert_blip_to_format(blip, ImageFormat::Png, width, height) +} + +/// Convert a BLIP record to JPEG format +pub fn convert_blip_to_jpeg<'data>( + blip: &Blip<'data>, + width: Option, + height: Option, +) -> Result> { + convert_blip_to_format(blip, ImageFormat::Jpeg, width, height) +} + +/// Convert a BLIP record to WebP format +pub fn convert_blip_to_webp<'data>( + blip: &Blip<'data>, + width: Option, + height: Option, +) -> Result> { + convert_blip_to_format(blip, ImageFormat::WebP, width, height) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_blip_type_extensions() { + assert_eq!(BlipType::Emf.extension(), "emf"); + assert_eq!(BlipType::Png.extension(), "png"); + assert_eq!(BlipType::Jpeg.extension(), "jpg"); + } + + #[test] + fn test_blip_type_classification() { + assert!(BlipType::Emf.is_metafile()); + assert!(BlipType::Wmf.is_metafile()); + assert!(BlipType::Pict.is_metafile()); + assert!(!BlipType::Jpeg.is_metafile()); + assert!(!BlipType::Png.is_metafile()); + } +} diff --git a/crates/litchi-imgconv/src/pict/converter.rs b/crates/litchi-imgconv/src/pict/converter.rs new file mode 100644 index 0000000..fec0005 --- /dev/null +++ b/crates/litchi-imgconv/src/pict/converter.rs @@ -0,0 +1,365 @@ +// PICT to raster image converter +// +// Converts Macintosh PICT files to modern raster formats (PNG, JPEG, WebP). + +use super::data::{get_bitmap_pixel, stretch_coordinates, unpack_bits}; +use super::parser::PictParser; +use super::types::{PictBitmap, PictRect}; +use image::{DynamicImage, ImageBuffer, ImageFormat, Rgba, RgbaImage}; +use litchi_core::error::{Error, Result}; +use std::io::Cursor; + +/// Options for PICT to raster conversion +#[derive(Debug, Clone)] +pub struct PictToRasterOptions { + /// Target width (None = use source dimensions) + pub width: Option, + /// Target height (None = use source dimensions) + pub height: Option, + /// Background color for rendering + pub background_color: Rgba, +} + +impl Default for PictToRasterOptions { + fn default() -> Self { + Self { + width: None, + height: None, + background_color: Rgba([255, 255, 255, 255]), + } + } +} + +/// Convert Rect fields from big-endian to native endianness +#[inline] +pub fn rect_to_native(rect: &mut super::types::PictRect) { + rect.top = i16::from_be(rect.top); + rect.left = i16::from_be(rect.left); + rect.bottom = i16::from_be(rect.bottom); + rect.right = i16::from_be(rect.right); +} + +/// Convert Bitmap fields from big-endian to native endianness +#[inline] +pub fn bitmap_to_native(bitmap: &mut super::types::PictBitmap) { + bitmap.row_bytes = i16::from_be(bitmap.row_bytes); + rect_to_native(&mut bitmap.bounds); + rect_to_native(&mut bitmap.src_rect); + rect_to_native(&mut bitmap.dst_rect); + bitmap.mode = i16::from_be(bitmap.mode); +} + +/// Convert Region fields from big-endian to native endianness +#[inline] +pub fn region_to_native(region: &mut super::types::PictRegion) { + region.region_size = i16::from_be(region.region_size); + rect_to_native(&mut region.rect); +} + +/// PICT to raster converter +pub struct PictConverter { + parser: PictParser, + options: PictToRasterOptions, +} + +impl PictConverter { + /// Create a new PICT converter + pub fn new(parser: PictParser, options: PictToRasterOptions) -> Self { + Self { parser, options } + } + + /// Calculate output dimensions maintaining aspect ratio + fn calculate_dimensions(&self) -> (u32, u32) { + let src_width = self.parser.width().max(1) as u32; + let src_height = self.parser.height().max(1) as u32; + + match (self.options.width, self.options.height) { + (Some(w), Some(h)) => (w, h), + (Some(w), None) => { + let aspect = src_height as f64 / src_width as f64; + let h = (w as f64 * aspect) as u32; + (w, h) + }, + (None, Some(h)) => { + let aspect = src_width as f64 / src_height as f64; + let w = (h as f64 * aspect) as u32; + (w, h) + }, + (None, None) => { + let max_dim = 4096; + if src_width > max_dim || src_height > max_dim { + let scale = (max_dim as f64) / src_width.max(src_height) as f64; + ( + (src_width as f64 * scale) as u32, + (src_height as f64 * scale) as u32, + ) + } else { + (src_width, src_height) + } + }, + } + } + + /// Try to extract embedded bitmap from PICT records + /// + /// PICT files can contain bitmap data in: + /// - DirectBitsRect (0x009A) + /// - PackedDirectBitsRect (0x009B) + /// - CompressedQuickTime (0x8200) + fn extract_embedded_bitmap(&self) -> Option { + for record in &self.parser.records { + match record.opcode { + 0x009A | 0x009B => { + // DirectBitsRect or PackedDirectBitsRect + if let Some(img) = self.parse_direct_bits(&record.data) { + return Some(img); + } + }, + 0x8200 => { + // CompressedQuickTime - contains JPEG or other compressed data + if let Some(img) = self.parse_compressed_quicktime(&record.data) { + return Some(img); + } + }, + _ => {}, + } + } + None + } + + /// Parse DirectBitsRect data + /// + /// Handles PackBitsRect (0x0098) and PackedDirectBitsRect (0x009B) opcodes. + /// These contain compressed bitmap data that needs to be decompressed and rendered. + fn parse_direct_bits(&self, data: &[u8]) -> Option { + if data.len() < std::mem::size_of::() { + return None; + } + + // Parse the bitmap header (big-endian format) + let mut bitmap: PictBitmap = + unsafe { std::ptr::read_unaligned(data.as_ptr() as *const PictBitmap) }; + + // Convert from big-endian to native endianness + bitmap_to_native(&mut bitmap); + + // Calculate dimensions + let width = (bitmap.bounds.right - bitmap.bounds.left) as u32; + let height = (bitmap.bounds.bottom - bitmap.bounds.top) as u32; + + if width == 0 || height == 0 || width > 8192 || height > 8192 { + return None; + } + + // Calculate bitmap data size + let _row_bytes = bitmap.row_bytes as usize; + let bitmap_data_start = std::mem::size_of::(); + let bitmap_data_end = data.len(); + + if bitmap_data_start >= bitmap_data_end { + return None; + } + + let compressed_data = &data[bitmap_data_start..]; + + // Create output image + let mut img = ImageBuffer::new(width, height); + + // Decompress and render each row + let mut data_offset = 0; + let expected_row_size = (width as usize).div_ceil(8); // Round up for byte alignment + + for y in 0..height as usize { + if data_offset >= compressed_data.len() { + break; + } + + // Read the byte count for this row + if data_offset + 1 >= compressed_data.len() { + break; + } + let byte_count = compressed_data[data_offset] as usize; + data_offset += 1; + + // Skip the compressed data for this row (we'll decompress it) + let row_compressed_start = data_offset; + let row_compressed_end = std::cmp::min(data_offset + byte_count, compressed_data.len()); + data_offset = row_compressed_end; + + if byte_count == 0 { + continue; + } + + // Decompress this row + let row_compressed = &compressed_data[row_compressed_start..row_compressed_end]; + match unpack_bits(row_compressed, expected_row_size) { + Ok(unpacked_row) => { + // Render the unpacked row to the image + self.render_bitmap_row(&unpacked_row, &bitmap, y as i32, &mut img); + }, + Err(_) => { + // If decompression fails, skip this row + continue; + }, + } + } + + Some(DynamicImage::ImageRgba8(img)) + } + + /// Render a single decompressed bitmap row to the image + fn render_bitmap_row( + &self, + unpacked_row: &[u8], + bitmap: &PictBitmap, + y: i32, + img: &mut ImageBuffer, Vec>, + ) { + let width = (bitmap.bounds.right - bitmap.bounds.left) as u32; + let _height = (bitmap.bounds.bottom - bitmap.bounds.top) as u32; + + // Calculate source and destination rectangles relative to image bounds + let src_width = bitmap.src_rect.right - bitmap.src_rect.left; + let src_height = bitmap.src_rect.bottom - bitmap.src_rect.top; + let dst_width = bitmap.dst_rect.right - bitmap.dst_rect.left; + let dst_height = bitmap.dst_rect.bottom - bitmap.dst_rect.top; + + // Create adjusted rectangles relative to bitmap bounds + let src_rect = PictRect { + left: bitmap.src_rect.left - bitmap.bounds.left, + top: bitmap.src_rect.top - bitmap.bounds.top, + right: bitmap.src_rect.left - bitmap.bounds.left + src_width, + bottom: bitmap.src_rect.top - bitmap.bounds.top + src_height, + }; + + let dst_rect = PictRect { + left: bitmap.dst_rect.left - self.parser.header.frame.1, + top: bitmap.dst_rect.top - self.parser.header.frame.0, + right: bitmap.dst_rect.left - self.parser.header.frame.1 + dst_width, + bottom: bitmap.dst_rect.top - self.parser.header.frame.0 + dst_height, + }; + + // Render each pixel in the destination row + for x in 0..width as i32 { + let mut src_x = 0; + let mut src_y = 0; + + stretch_coordinates(&dst_rect, &src_rect, x, y, &mut src_x, &mut src_y); + + let color_u32 = get_bitmap_pixel(unpacked_row, &bitmap.bounds, src_x, src_y); + let color = Rgba([ + ((color_u32 >> 16) & 0xFF) as u8, // R + ((color_u32 >> 8) & 0xFF) as u8, // G + (color_u32 & 0xFF) as u8, // B + ((color_u32 >> 24) & 0xFF) as u8, // A + ]); + + if x < img.width() as i32 && y < img.height() as i32 { + img.put_pixel(x as u32, y as u32, color); + } + } + } + + /// Parse CompressedQuickTime data + /// + /// QuickTime compressed images are often JPEG + fn parse_compressed_quicktime(&self, data: &[u8]) -> Option { + // QuickTime compressed data may contain JPEG or other formats + // Try to detect and decode + + // Look for JPEG markers + if data.len() > 2 { + for i in 0..data.len() - 2 { + if data[i] == 0xFF && data[i + 1] == 0xD8 { + // Found JPEG SOI marker + if let Ok(img) = image::load_from_memory(&data[i..]) { + return Some(img); + } + } + } + } + + None + } + + /// Create a placeholder image + fn create_placeholder(&self, width: u32, height: u32) -> RgbaImage { + let mut img = ImageBuffer::from_pixel(width, height, self.options.background_color); + + let border_color = Rgba([128, 128, 128, 255]); + + // Draw border + for x in 0..width { + if x < height { + img.put_pixel(x, 0, border_color); + img.put_pixel(x, height - 1, border_color); + } + } + for y in 0..height { + if y < width { + img.put_pixel(0, y, border_color); + img.put_pixel(width - 1, y, border_color); + } + } + + // Draw diagonals + let min_dim = width.min(height); + for i in 0..min_dim { + img.put_pixel(i, i, border_color); + if height > i { + img.put_pixel(i, height - 1 - i, border_color); + } + } + + img + } + + /// Convert PICT to a raster image + pub fn convert_to_image(&self) -> Result { + let (target_width, target_height) = self.calculate_dimensions(); + + // Try to extract embedded bitmap first + if let Some(embedded) = self.extract_embedded_bitmap() { + if embedded.width() != target_width || embedded.height() != target_height { + return Ok(DynamicImage::ImageRgba8(image::imageops::resize( + &embedded, + target_width, + target_height, + image::imageops::FilterType::Lanczos3, + ))); + } + return Ok(embedded); + } + + // Create placeholder + let placeholder = self.create_placeholder(target_width, target_height); + Ok(DynamicImage::ImageRgba8(placeholder)) + } + + /// Convert PICT to specified image format + pub fn convert_to_format(&self, format: ImageFormat) -> Result> { + let image = self.convert_to_image()?; + + let mut buffer = Cursor::new(Vec::new()); + image + .write_to(&mut buffer, format) + .map_err(|e| Error::ParseError(format!("Failed to encode image: {}", e)))?; + + Ok(buffer.into_inner()) + } + + /// Convert PICT to PNG bytes + pub fn convert_to_png(&self) -> Result> { + self.convert_to_format(ImageFormat::Png) + } + + /// Convert PICT to JPEG bytes + pub fn convert_to_jpeg(&self) -> Result> { + self.convert_to_format(ImageFormat::Jpeg) + } + + /// Convert PICT to WebP bytes + pub fn convert_to_webp(&self) -> Result> { + self.convert_to_format(ImageFormat::WebP) + } +} diff --git a/src/images/pict/data.rs b/crates/litchi-imgconv/src/pict/data.rs similarity index 99% rename from src/images/pict/data.rs rename to crates/litchi-imgconv/src/pict/data.rs index 076e41e..c614f68 100644 --- a/src/images/pict/data.rs +++ b/crates/litchi-imgconv/src/pict/data.rs @@ -3,7 +3,7 @@ //! Provides functions for decompressing and manipulating binary data, //! particularly for PICT format processing. -use crate::common::error::{Error, Result}; +use litchi_core::error::{Error, Result}; /// UnpackBits decompression algorithm /// diff --git a/crates/litchi-imgconv/src/pict/mod.rs b/crates/litchi-imgconv/src/pict/mod.rs new file mode 100644 index 0000000..93fce30 --- /dev/null +++ b/crates/litchi-imgconv/src/pict/mod.rs @@ -0,0 +1,91 @@ +// Macintosh PICT format parser and converter +// +// This module provides functionality to parse PICT data and convert it to +// modern image formats (PNG, JPEG, WebP). +// +// PICT is the native graphics metafile format for Mac OS Classic. There are +// two versions: PICT 1 (original) and PICT 2 (extended). +// +// References: +// - Inside Macintosh: Imaging With QuickDraw +// - Apple Technical Note TN1023: Understanding the PICT Format + +pub mod converter; +pub mod parser; + +/// Common data types for PICT format +mod types; + +/// Data manipulation and compression utilities +mod data; + +pub use converter::{PictConverter, PictToRasterOptions}; +pub use parser::{PictParser, PictVersion}; + +use image::ImageFormat; +use litchi_core::error::Result; + +/// Convert PICT data to a raster image in the specified format +/// +/// # Arguments +/// * `pict_data` - Raw PICT file data +/// * `format` - Target image format (PNG, JPEG, WebP) +/// * `width` - Optional output width (maintains aspect ratio if only one dimension specified) +/// * `height` - Optional output height +/// +/// # Returns +/// Encoded image bytes in the target format +/// +/// # Example +/// ```no_run +/// use litchi_imgconv::pict::convert_pict; +/// use image::ImageFormat; +/// +/// let pict_data = std::fs::read("image.pict")?; +/// let png_data = convert_pict(&pict_data, ImageFormat::Png, Some(800), None)?; +/// std::fs::write("output.png", png_data)?; +/// # Ok::<(), Box>(()) +/// ``` +pub fn convert_pict( + pict_data: &[u8], + format: ImageFormat, + width: Option, + height: Option, +) -> Result> { + let parser = PictParser::new(pict_data)?; + let options = PictToRasterOptions { + width, + height, + background_color: image::Rgba([255, 255, 255, 255]), + }; + + let converter = PictConverter::new(parser, options); + converter.convert_to_format(format) +} + +/// Convert PICT data to PNG format +pub fn convert_pict_to_png( + pict_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_pict(pict_data, ImageFormat::Png, width, height) +} + +/// Convert PICT data to JPEG format +pub fn convert_pict_to_jpeg( + pict_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_pict(pict_data, ImageFormat::Jpeg, width, height) +} + +/// Convert PICT data to WebP format +pub fn convert_pict_to_webp( + pict_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_pict(pict_data, ImageFormat::WebP, width, height) +} diff --git a/crates/litchi-imgconv/src/pict/parser.rs b/crates/litchi-imgconv/src/pict/parser.rs new file mode 100644 index 0000000..143734a --- /dev/null +++ b/crates/litchi-imgconv/src/pict/parser.rs @@ -0,0 +1,333 @@ +// PICT file parser +// +// Parses Macintosh PICT format records and extracts relevant information + +use litchi_core::error::{Error, Result}; +use zerocopy::{BE, FromBytes, I16, U16}; + +/// PICT file version +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PictVersion { + /// Version 1 (original format) + V1, + /// Version 2 (extended format) + V2, +} + +/// PICT file header +/// +/// PICT files may have an optional 512-byte header (used by some applications) +/// followed by the actual PICT data. +#[derive(Debug, Clone)] +pub struct PictHeader { + /// Version of the PICT file + pub version: PictVersion, + /// Picture frame (top, left, bottom, right) + pub frame: (i16, i16, i16, i16), + /// Whether this file has the 512-byte header + pub has_512_header: bool, +} + +impl PictHeader { + /// Parse PICT header from data + pub fn parse(data: &[u8]) -> Result { + if data.len() < 10 { + return Err(Error::ParseError("PICT data too short".into())); + } + + let mut offset = 0; + + // Check for optional 512-byte header + // This header is often all zeros or contains application-specific data + let has_512_header = data.len() > 512 && Self::check_512_header(data); + if has_512_header { + offset = 512; + } + + // PICT data starts with: + // - 10 bytes of size/frame info for version 1 + // - picSize (2 bytes) + picFrame (8 bytes) for both versions + + if offset + 10 > data.len() { + return Err(Error::ParseError( + "Insufficient data for PICT header".into(), + )); + } + + // Parse picture size (used in version 1, may be 0 in version 2) + let _pic_size = U16::::read_from_bytes(&data[offset..offset + 2]) + .map_err(|_| Error::ParseError("Failed to read pic size".into()))? + .get(); + offset += 2; + + // Parse picture frame (top, left, bottom, right) - big-endian + let top = I16::::read_from_bytes(&data[offset..offset + 2]) + .map_err(|_| Error::ParseError("Failed to read top".into()))? + .get(); + let left = I16::::read_from_bytes(&data[offset + 2..offset + 4]) + .map_err(|_| Error::ParseError("Failed to read left".into()))? + .get(); + let bottom = I16::::read_from_bytes(&data[offset + 4..offset + 6]) + .map_err(|_| Error::ParseError("Failed to read bottom".into()))? + .get(); + let right = I16::::read_from_bytes(&data[offset + 6..offset + 8]) + .map_err(|_| Error::ParseError("Failed to read right".into()))? + .get(); + offset += 8; + + // Determine version + // Version 2 files have a version opcode (0x0011) followed by 0x02FF + let version = if offset + 4 <= data.len() { + let op1 = U16::::read_from_bytes(&data[offset..offset + 2]) + .map_err(|_| Error::ParseError("Failed to read op1".into()))? + .get(); + let op2 = U16::::read_from_bytes(&data[offset + 2..offset + 4]) + .map_err(|_| Error::ParseError("Failed to read op2".into()))? + .get(); + + if op1 == 0x0011 && op2 == 0x02FF { + PictVersion::V2 + } else { + PictVersion::V1 + } + } else { + PictVersion::V1 + }; + + Ok(Self { + version, + frame: (top, left, bottom, right), + has_512_header, + }) + } + + /// Check if data starts with 512-byte header + fn check_512_header(data: &[u8]) -> bool { + if data.len() < 522 { + return false; + } + + // The 512-byte header is application-specific + // After it, we should see valid PICT data + // Check if byte 512-513 looks like a reasonable picture size + // or if bytes 522-523 look like a version opcode (0x0011) + let potential_size = U16::::read_from_bytes(&data[512..514]) + .map(|v| v.get()) + .unwrap_or(0); + let potential_opcode = U16::::read_from_bytes(&data[522..524]) + .map(|v| v.get()) + .unwrap_or(0); + + // If we see version opcode at expected position, likely has 512 header + potential_opcode == 0x0011 || potential_size < 0x4000 + } + + /// Get width of the picture + pub fn width(&self) -> i16 { + self.frame.3 - self.frame.1 + } + + /// Get height of the picture + pub fn height(&self) -> i16 { + self.frame.2 - self.frame.0 + } +} + +/// PICT opcode +/// +/// PICT files are composed of opcodes that describe drawing operations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PictOpcode { + /// No operation + Nop = 0x0000, + /// Clipping region + Clip = 0x0001, + /// Background pattern + BkPat = 0x0002, + /// Text font + TxFont = 0x0003, + /// Text face + TxFace = 0x0004, + /// Text mode + TxMode = 0x0005, + /// Version opcode + Version = 0x0011, + /// Extended version 2 header + HeaderOp = 0x0C00, + /// End of picture + EndPic = 0x00FF, + /// Direct bits rect (includes bitmap data) + DirectBitsRect = 0x009A, + /// Packed direct bits rect + PackedDirectBitsRect = 0x009B, + /// Compressed QuickTime image + CompressedQuickTime = 0x8200, +} + +impl PictOpcode { + /// Create from u16 value + pub fn from_u16(value: u16) -> Option { + match value { + 0x0000 => Some(Self::Nop), + 0x0001 => Some(Self::Clip), + 0x0002 => Some(Self::BkPat), + 0x0003 => Some(Self::TxFont), + 0x0004 => Some(Self::TxFace), + 0x0005 => Some(Self::TxMode), + 0x0011 => Some(Self::Version), + 0x0C00 => Some(Self::HeaderOp), + 0x00FF => Some(Self::EndPic), + 0x009A => Some(Self::DirectBitsRect), + 0x009B => Some(Self::PackedDirectBitsRect), + 0x8200 => Some(Self::CompressedQuickTime), + _ => None, + } + } +} + +/// PICT record +#[derive(Debug, Clone)] +pub struct PictRecord { + /// Opcode + pub opcode: u16, + /// Record data + pub data: Vec, +} + +/// PICT file parser +pub struct PictParser { + /// PICT header + pub header: PictHeader, + /// All opcodes/records + pub records: Vec, + /// Raw PICT data + data: Vec, +} + +impl PictParser { + /// Create a new PICT parser from raw data + pub fn new(data: &[u8]) -> Result { + let header = PictHeader::parse(data)?; + + let data_start = if header.has_512_header { 512 } else { 0 }; + let mut offset = data_start + 10; // Skip size and frame + + // Skip version opcode if present + if header.version == PictVersion::V2 && offset + 4 <= data.len() { + offset += 4; // Skip version opcode and data + } + + let mut records = Vec::new(); + + // Parse records + while offset < data.len() { + if offset + 2 > data.len() { + break; + } + + let opcode = U16::::read_from_bytes(&data[offset..offset + 2]) + .map_err(|_| Error::ParseError("Failed to read opcode".into()))? + .get(); + offset += 2; + + // Handle EndPic + if opcode == 0x00FF { + records.push(PictRecord { + opcode, + data: Vec::new(), + }); + break; + } + + // Determine data size based on opcode + let data_size = Self::get_opcode_data_size(opcode, data, offset)?; + + if offset + data_size > data.len() { + break; + } + + let record_data = data[offset..offset + data_size].to_vec(); + offset += data_size; + + records.push(PictRecord { + opcode, + data: record_data, + }); + } + + Ok(Self { + header, + records, + data: data.to_vec(), + }) + } + + /// Get the data size for an opcode + fn get_opcode_data_size(opcode: u16, data: &[u8], offset: usize) -> Result { + match opcode { + // Fixed size opcodes + 0x0000 => Ok(0), // Nop + 0x0003 => Ok(2), // TxFont + 0x0004 => Ok(1), // TxFace + 0x0005 => Ok(2), // TxMode + 0x0011 => Ok(2), // Version + // Variable size opcodes - read size from data + 0x0001 | // Clip + 0x00A1 | // Long comment + 0x009A | // DirectBitsRect + 0x009B => { // PackedDirectBitsRect + if offset + 2 > data.len() { + return Err(Error::ParseError("Insufficient data for opcode size".into())); + } + let size = U16::::read_from_bytes(&data[offset..offset + 2]) + .map_err(|_| Error::ParseError("Failed to read size".into()))? + .get() as usize; + Ok(size + 2) // Include size field itself + } + // Default: try to read size field + _ => { + if offset + 2 > data.len() { + return Ok(0); + } + // Many opcodes have a 2-byte size field + let size = U16::::read_from_bytes(&data[offset..offset + 2]) + .map_err(|_| Error::ParseError("Failed to read size".into()))? + .get() as usize; + Ok(size.min(data.len() - offset)) + } + } + } + + /// Get the raw PICT data + pub fn data(&self) -> &[u8] { + &self.data + } + + /// Get width + pub fn width(&self) -> i32 { + self.header.width() as i32 + } + + /// Get height + pub fn height(&self) -> i32 { + self.header.height() as i32 + } + + /// Get aspect ratio + pub fn aspect_ratio(&self) -> f64 { + let w = self.width() as f64; + let h = self.height() as f64; + if h == 0.0 { 1.0 } else { w / h } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pict_opcodes() { + assert_eq!(PictOpcode::from_u16(0x00FF), Some(PictOpcode::EndPic)); + assert_eq!(PictOpcode::from_u16(0x0011), Some(PictOpcode::Version)); + } +} diff --git a/src/images/pict/types.rs b/crates/litchi-imgconv/src/pict/types.rs similarity index 100% rename from src/images/pict/types.rs rename to crates/litchi-imgconv/src/pict/types.rs diff --git a/src/images/svg.rs b/crates/litchi-imgconv/src/svg.rs similarity index 100% rename from src/images/svg.rs rename to crates/litchi-imgconv/src/svg.rs diff --git a/src/images/svg_utils.rs b/crates/litchi-imgconv/src/svg_utils.rs similarity index 98% rename from src/images/svg_utils.rs rename to crates/litchi-imgconv/src/svg_utils.rs index c314770..0ad0eff 100644 --- a/src/images/svg_utils.rs +++ b/crates/litchi-imgconv/src/svg_utils.rs @@ -20,7 +20,7 @@ use std::fmt::Write; /// # Examples /// /// ``` -/// use litchi::images::svg_utils::write_num; +/// use litchi_imgconv::svg_utils::write_num; /// /// let mut buf = String::new(); /// write_num(&mut buf, 10.0); // writes "10" @@ -75,7 +75,7 @@ pub fn fmt_num(n: f64) -> String { /// # Examples /// /// ``` -/// use litchi::images::svg_utils::write_color_hex; +/// use litchi_imgconv::svg_utils::write_color_hex; /// /// let mut buf = String::new(); /// write_color_hex(&mut buf, 0x0000FF); // writes "#ff0000" (red in COLORREF is 0x0000FF) diff --git a/src/images/wmf/constants.rs b/crates/litchi-imgconv/src/wmf/constants.rs similarity index 100% rename from src/images/wmf/constants.rs rename to crates/litchi-imgconv/src/wmf/constants.rs diff --git a/crates/litchi-imgconv/src/wmf/converter.rs b/crates/litchi-imgconv/src/wmf/converter.rs new file mode 100644 index 0000000..55d296a --- /dev/null +++ b/crates/litchi-imgconv/src/wmf/converter.rs @@ -0,0 +1,214 @@ +// WMF to raster image converter +// +// Converts WMF metafiles to modern raster formats (PNG, JPEG, WebP). +// +// Similar to EMF, full WMF rendering requires implementing a complete GDI rendering engine. +// This implementation provides extraction and placeholder generation. + +use super::parser::WmfParser; +use image::{DynamicImage, ImageBuffer, ImageFormat, Rgba, RgbaImage}; +use litchi_core::error::{Error, Result}; +use std::io::Cursor; + +/// Options for WMF to raster conversion +#[derive(Debug, Clone)] +pub struct WmfToRasterOptions { + /// Target width (None = use source dimensions) + pub width: Option, + /// Target height (None = use source dimensions) + pub height: Option, + /// Background color for rendering + pub background_color: Rgba, +} + +impl Default for WmfToRasterOptions { + fn default() -> Self { + Self { + width: None, + height: None, + background_color: Rgba([255, 255, 255, 255]), + } + } +} + +/// WMF to raster converter +pub struct WmfConverter { + parser: WmfParser, + options: WmfToRasterOptions, +} + +impl WmfConverter { + /// Create a new WMF converter + pub fn new(parser: WmfParser, options: WmfToRasterOptions) -> Self { + Self { parser, options } + } + + /// Calculate output dimensions maintaining aspect ratio + fn calculate_dimensions(&self) -> (u32, u32) { + let src_width = self.parser.width().max(1) as u32; + let src_height = self.parser.height().max(1) as u32; + + match (self.options.width, self.options.height) { + (Some(w), Some(h)) => (w, h), + (Some(w), None) => { + let aspect = src_height as f64 / src_width as f64; + let h = (w as f64 * aspect) as u32; + (w, h) + }, + (None, Some(h)) => { + let aspect = src_width as f64 / src_height as f64; + let w = (h as f64 * aspect) as u32; + (w, h) + }, + (None, None) => { + // Use source dimensions, but cap at reasonable size + let max_dim = 4096; + if src_width > max_dim || src_height > max_dim { + let scale = (max_dim as f64) / src_width.max(src_height) as f64; + ( + (src_width as f64 * scale) as u32, + (src_height as f64 * scale) as u32, + ) + } else { + (src_width, src_height) + } + }, + } + } + + /// Try to extract embedded bitmap from WMF records + /// + /// WMF files can contain embedded bitmaps via: + /// - META_DIBSTRETCHBLT (0x0B41) + /// - META_STRETCHDIB (0x0F43) + /// - META_DIBBITBLT (0x0940) + fn extract_embedded_bitmap(&self) -> Option { + for record in &self.parser.records { + match record.function { + 0x0B41 | 0x0F43 | 0x0940 => { + // These records may contain DIB data + if let Some(img) = self.parse_dib_from_record(&record.params) { + return Some(img); + } + }, + _ => {}, + } + } + None + } + + /// Parse DIB data from WMF record parameters + fn parse_dib_from_record(&self, data: &[u8]) -> Option { + if data.len() < 40 { + return None; + } + + // Try to construct BMP from DIB + if let Ok(img) = self.construct_bmp_from_dib(data) { + return Some(img); + } + + None + } + + /// Construct a BMP image from DIB data + fn construct_bmp_from_dib(&self, dib_data: &[u8]) -> Result { + let file_size = 14u32 + dib_data.len() as u32; + let pixel_data_offset = 14u32 + 40u32; + + let mut bmp_data = Vec::with_capacity(file_size as usize); + + // BMP file header + bmp_data.extend_from_slice(b"BM"); + bmp_data.extend_from_slice(&file_size.to_le_bytes()); + bmp_data.extend_from_slice(&[0u8; 4]); + bmp_data.extend_from_slice(&pixel_data_offset.to_le_bytes()); + bmp_data.extend_from_slice(dib_data); + + match image::load_from_memory(&bmp_data) { + Ok(img) => Ok(img), + Err(_) => Err(Error::ParseError("Failed to load DIB as BMP".into())), + } + } + + /// Create a placeholder image + fn create_placeholder(&self, width: u32, height: u32) -> RgbaImage { + let mut img = ImageBuffer::from_pixel(width, height, self.options.background_color); + + let border_color = Rgba([128, 128, 128, 255]); + + // Draw border + for x in 0..width { + if x < height { + img.put_pixel(x, 0, border_color); + img.put_pixel(x, height - 1, border_color); + } + } + for y in 0..height { + if y < width { + img.put_pixel(0, y, border_color); + img.put_pixel(width - 1, y, border_color); + } + } + + // Draw diagonals + let min_dim = width.min(height); + for i in 0..min_dim { + img.put_pixel(i, i, border_color); + if height > i { + img.put_pixel(i, height - 1 - i, border_color); + } + } + + img + } + + /// Convert WMF to a raster image + pub fn convert_to_image(&self) -> Result { + let (target_width, target_height) = self.calculate_dimensions(); + + // Try to extract embedded bitmap first + if let Some(embedded) = self.extract_embedded_bitmap() { + if embedded.width() != target_width || embedded.height() != target_height { + return Ok(DynamicImage::ImageRgba8(image::imageops::resize( + &embedded, + target_width, + target_height, + image::imageops::FilterType::Lanczos3, + ))); + } + return Ok(embedded); + } + + // Create placeholder + let placeholder = self.create_placeholder(target_width, target_height); + Ok(DynamicImage::ImageRgba8(placeholder)) + } + + /// Convert WMF to specified image format + pub fn convert_to_format(&self, format: ImageFormat) -> Result> { + let image = self.convert_to_image()?; + + let mut buffer = Cursor::new(Vec::new()); + image + .write_to(&mut buffer, format) + .map_err(|e| Error::ParseError(format!("Failed to encode image: {}", e)))?; + + Ok(buffer.into_inner()) + } + + /// Convert WMF to PNG bytes + pub fn convert_to_png(&self) -> Result> { + self.convert_to_format(ImageFormat::Png) + } + + /// Convert WMF to JPEG bytes + pub fn convert_to_jpeg(&self) -> Result> { + self.convert_to_format(ImageFormat::Jpeg) + } + + /// Convert WMF to WebP bytes + pub fn convert_to_webp(&self) -> Result> { + self.convert_to_format(ImageFormat::WebP) + } +} diff --git a/crates/litchi-imgconv/src/wmf/mod.rs b/crates/litchi-imgconv/src/wmf/mod.rs new file mode 100644 index 0000000..f76b28a --- /dev/null +++ b/crates/litchi-imgconv/src/wmf/mod.rs @@ -0,0 +1,119 @@ +// Windows Metafile (WMF) format parser and converter +// +// This module provides functionality to parse WMF data and convert it to +// modern image formats (PNG, JPEG, WebP). +// +// WMF is a 16-bit vector graphics format for Windows, introduced in Windows 3.0. +// It's the predecessor to EMF (Enhanced Metafile). +// +// References: +// - [MS-WMF]: Windows Metafile Format Specification +// - https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-wmf/ + +mod constants; +pub mod converter; +pub mod parser; +mod svg; + +pub use constants::*; + +pub use converter::{WmfConverter, WmfToRasterOptions}; +pub use parser::WmfParser; +pub use svg::WmfConverter as WmfSvgConverter; + +use image::ImageFormat; +use litchi_core::error::Result; + +/// Convert WMF data to a raster image in the specified format +/// +/// # Arguments +/// * `wmf_data` - Raw WMF file data +/// * `format` - Target image format (PNG, JPEG, WebP) +/// * `width` - Optional output width (maintains aspect ratio if only one dimension specified) +/// * `height` - Optional output height +/// +/// # Returns +/// Encoded image bytes in the target format +/// +/// # Example +/// ```no_run +/// use litchi_imgconv::wmf::convert_wmf; +/// use image::ImageFormat; +/// +/// let wmf_data = std::fs::read("image.wmf")?; +/// let png_data = convert_wmf(&wmf_data, ImageFormat::Png, Some(800), None)?; +/// std::fs::write("output.png", png_data)?; +/// # Ok::<(), Box>(()) +/// ``` +pub fn convert_wmf( + wmf_data: &[u8], + format: ImageFormat, + width: Option, + height: Option, +) -> Result> { + let parser = WmfParser::new(wmf_data)?; + let options = WmfToRasterOptions { + width, + height, + background_color: image::Rgba([255, 255, 255, 255]), + }; + + let converter = WmfConverter::new(parser, options); + converter.convert_to_format(format) +} + +/// Convert WMF data to PNG format +pub fn convert_wmf_to_png( + wmf_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_wmf(wmf_data, ImageFormat::Png, width, height) +} + +/// Convert WMF data to JPEG format +pub fn convert_wmf_to_jpeg( + wmf_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_wmf(wmf_data, ImageFormat::Jpeg, width, height) +} + +/// Convert WMF data to WebP format +pub fn convert_wmf_to_webp( + wmf_data: &[u8], + width: Option, + height: Option, +) -> Result> { + convert_wmf(wmf_data, ImageFormat::WebP, width, height) +} + +/// Convert WMF data to SVG format +/// +/// +/// # Arguments +/// * `wmf_data` - Raw WMF file data +/// +/// # Returns +/// SVG document as string +/// +/// # Example +/// ```no_run +/// use litchi_imgconv::wmf::convert_wmf_to_svg; +/// +/// let wmf_data = std::fs::read("image.wmf")?; +/// let svg = convert_wmf_to_svg(&wmf_data)?; +/// std::fs::write("output.svg", svg)?; +/// # Ok::<(), Box>(()) +/// ``` +pub fn convert_wmf_to_svg(wmf_data: &[u8]) -> Result { + let parser = WmfParser::new(wmf_data)?; + let converter = WmfSvgConverter::new(parser); + converter.to_svg() +} + +/// Convert WMF data to SVG bytes +pub fn convert_wmf_to_svg_bytes(wmf_data: &[u8]) -> Result> { + Ok(convert_wmf_to_svg(wmf_data)?.into_bytes()) +} diff --git a/crates/litchi-imgconv/src/wmf/parser.rs b/crates/litchi-imgconv/src/wmf/parser.rs new file mode 100644 index 0000000..7436e66 --- /dev/null +++ b/crates/litchi-imgconv/src/wmf/parser.rs @@ -0,0 +1,363 @@ +// WMF file parser +// +// Parses Windows Metafile records and extracts relevant information +// +// ## Performance Optimizations +// +// This parser is optimized for minimal memory allocations: +// +// 1. **Zero-copy data storage**: Uses `Bytes` with reference counting instead of `Vec` +// - The input data is copied once into a `Bytes` buffer +// - All record params are zero-copy slices of this buffer via `Bytes::slice()` +// - Eliminates N allocations where N = number of records +// +// 2. **Pre-allocated records vector**: Estimates capacity based on file size +// - Reduces reallocation overhead during parsing +// - Typical WMF files have 20-50 bytes per record on average +// +// 3. **Manual byte parsing**: Avoids zerocopy alignment issues +// - Direct byte access for little-endian values +// - No intermediate allocations for header parsing +// +// These optimizations significantly reduce calls to `_platform_memmove`, +// `alloc::raw_vec::RawVec::grow_one`, and `szone_malloc_should_clear`. + +use bytes::Bytes; +use litchi_core::error::{Error, Result}; + +/// WMF file type +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WmfFileType { + /// Memory metafile + Memory = 1, + /// Disk metafile + Disk = 2, +} + +/// WMF header (Placeable Metafile Header) +/// +/// This is an optional header that may precede the standard WMF header +#[derive(Debug, Clone)] +pub struct WmfPlaceableHeader { + /// Key (should be 0x9AC6CDD7) + pub key: u32, + /// Left coordinate + pub left: i16, + /// Top coordinate + pub top: i16, + /// Right coordinate + pub right: i16, + /// Bottom coordinate + pub bottom: i16, + /// Units per inch + pub inch: u16, + /// Checksum + pub checksum: u16, +} + +impl WmfPlaceableHeader { + const PLACEABLE_KEY: u32 = 0x9AC6CDD7; + + /// Check if data starts with a placeable header + pub fn is_placeable(data: &[u8]) -> bool { + if data.len() < 4 { + return false; + } + let key = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + key == Self::PLACEABLE_KEY + } + + /// Parse placeable header from data + pub fn parse(data: &[u8]) -> Result { + if data.len() < 22 { + return Err(Error::ParseError("WMF placeable header too short".into())); + } + + // Parse header manually to avoid zerocopy alignment issues + let key = u32::from_le_bytes([data[0], data[1], data[2], data[3]]); + + if key != Self::PLACEABLE_KEY { + return Err(Error::ParseError(format!( + "Invalid WMF placeable key: 0x{:08X}", + key + ))); + } + + let left = i16::from_le_bytes([data[6], data[7]]); + let top = i16::from_le_bytes([data[8], data[9]]); + let right = i16::from_le_bytes([data[10], data[11]]); + let bottom = i16::from_le_bytes([data[12], data[13]]); + let inch = u16::from_le_bytes([data[14], data[15]]); + let checksum = u16::from_le_bytes([data[20], data[21]]); + + Ok(Self { + key, + left, + top, + right, + bottom, + inch, + checksum, + }) + } + + /// Get width + pub fn width(&self) -> i16 { + self.right - self.left + } + + /// Get height + pub fn height(&self) -> i16 { + self.bottom - self.top + } +} + +/// WMF standard header +#[derive(Debug, Clone)] +pub struct WmfHeader { + /// File type (1 = memory, 2 = disk) + pub file_type: u16, + /// Header size in words (always 9) + pub header_size: u16, + /// Windows version + pub version: u16, + /// Size of file in words + pub file_size: u32, + /// Number of objects + pub num_objects: u16, + /// Size of largest record in words + pub max_record: u32, + /// Not used (always 0) + pub num_params: u16, +} + +impl WmfHeader { + /// Parse WMF standard header + pub fn parse(data: &[u8]) -> Result { + if data.len() < 18 { + return Err(Error::ParseError("WMF header too short".into())); + } + + // Parse header manually to avoid zerocopy alignment issues + let file_type = u16::from_le_bytes([data[0], data[1]]); + let header_size = u16::from_le_bytes([data[2], data[3]]); + let version = u16::from_le_bytes([data[4], data[5]]); + let file_size = u32::from_le_bytes([data[6], data[7], data[8], data[9]]); + let num_objects = u16::from_le_bytes([data[10], data[11]]); + let max_record = u32::from_le_bytes([data[12], data[13], data[14], data[15]]); + let num_params = u16::from_le_bytes([data[16], data[17]]); + + Ok(Self { + file_type, + header_size, + version, + file_size, + num_objects, + max_record, + num_params, + }) + } +} + +/// WMF record +#[derive(Debug, Clone)] +pub struct WmfRecord { + /// Record size in words (including size and function) + pub size: u32, + /// Record function + pub function: u16, + /// Record parameters (zero-copy slice of the original data) + pub params: Bytes, +} + +impl WmfRecord { + /// Parse a WMF record + /// + /// # Arguments + /// * `data` - Zero-copy bytes buffer containing the WMF data + /// * `offset` - Offset in the buffer to start parsing + /// + /// # Returns + /// A tuple of (parsed record, bytes consumed) + pub fn parse(data: &Bytes, offset: usize) -> Result<(Self, usize)> { + if offset + 6 > data.len() { + return Err(Error::ParseError("Insufficient data for WMF record".into())); + } + + // Parse record header manually to avoid zerocopy alignment issues + let size = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + let function = u16::from_le_bytes([data[offset + 4], data[offset + 5]]); + + // Size is in words (16-bit), convert to bytes + let size_bytes = (size as usize) * 2; + + if size < 3 || offset + size_bytes > data.len() { + return Err(Error::ParseError(format!( + "Invalid WMF record size: {} at offset {}", + size, offset + ))); + } + + // Parameters start after size and function + let param_size = size_bytes - 6; + // Zero-copy slice: this creates a shallow copy with reference counting + let params = data.slice((offset + 6)..(offset + 6 + param_size)); + + Ok(( + Self { + size, + function, + params, + }, + size_bytes, + )) + } + + /// Check if this is an EOF record + pub const fn is_eof(&self) -> bool { + self.function == 0x0000 + } +} + +/// WMF file parser +#[derive(Debug)] +pub struct WmfParser { + /// Optional placeable header + pub placeable: Option, + /// Standard WMF header + pub header: WmfHeader, + /// All records + pub records: Vec, + /// Raw WMF data (zero-copy with reference counting) + data: Bytes, +} + +impl WmfParser { + /// Create a new WMF parser from raw data (borrowed) + /// + /// This uses zero-copy techniques with `Bytes` for optimal performance. + /// The input data is converted to `Bytes` once, and all records share + /// references to slices of this buffer without additional allocations. + /// + /// Note: This method copies the input data. Use [`Self::from_owned`] if you + /// already own the data to avoid the copy. + pub fn new(data: &[u8]) -> Result { + // Convert to Bytes - requires copying since input is borrowed + let data = Bytes::copy_from_slice(data); + Self::parse_internal(data) + } + + /// Create a new WMF parser from owned data (zero-copy) + /// + /// This is more efficient than [`Self::new`] as it takes ownership of the data + /// without copying. + /// + /// # Example + /// ```ignore + /// let data = std::fs::read("file.wmf")?; + /// let parser = WmfParser::from_owned(data)?; + /// ``` + pub fn from_owned(data: Vec) -> Result { + // Convert Vec to Bytes without copying + let data = Bytes::from(data); + Self::parse_internal(data) + } + + /// Internal parsing implementation shared by both constructors + fn parse_internal(data: Bytes) -> Result { + let mut offset = 0; + + // Check for placeable header + let placeable = if WmfPlaceableHeader::is_placeable(&data) { + let header = WmfPlaceableHeader::parse(&data)?; + offset = 22; // Placeable header is 22 bytes + Some(header) + } else { + None + }; + + // Parse standard header + if offset + 18 > data.len() { + return Err(Error::ParseError("WMF data too short for header".into())); + } + + let header = WmfHeader::parse(&data[offset..])?; + offset += 18; + + // Pre-allocate records vector with a reasonable fixed capacity. + // WMF files typically have 20-100 records. Using a moderate initial capacity + // (128) avoids both massive over-allocation and frequent reallocations. + // The Vec will grow efficiently if needed (typically doubling capacity). + let mut records = Vec::with_capacity(128); + + // Parse records - all params will be zero-copy slices of the data buffer + while offset < data.len() { + match WmfRecord::parse(&data, offset) { + Ok((record, consumed)) => { + let is_eof = record.is_eof(); + records.push(record); + offset += consumed; + + if is_eof { + break; + } + }, + Err(_) => break, + } + } + + Ok(Self { + placeable, + header, + records, + data, + }) + } + + /// Get the raw WMF data + pub fn data(&self) -> &[u8] { + &self.data + } + + /// Get width in logical units + pub fn width(&self) -> i32 { + if let Some(ref placeable) = self.placeable { + placeable.width() as i32 + } else { + // Without placeable header, use a default + 1000 + } + } + + /// Get height in logical units + pub fn height(&self) -> i32 { + if let Some(ref placeable) = self.placeable { + placeable.height() as i32 + } else { + // Without placeable header, use a default + 1000 + } + } + + /// Get aspect ratio + pub fn aspect_ratio(&self) -> f64 { + let w = self.width() as f64; + let h = self.height() as f64; + if h == 0.0 { 1.0 } else { w / h } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_placeable_key() { + assert_eq!(WmfPlaceableHeader::PLACEABLE_KEY, 0x9AC6CDD7); + } +} diff --git a/src/images/wmf/svg/bounds.rs b/crates/litchi-imgconv/src/wmf/svg/bounds.rs similarity index 99% rename from src/images/wmf/svg/bounds.rs rename to crates/litchi-imgconv/src/wmf/svg/bounds.rs index cd818bd..90f90d3 100644 --- a/src/images/wmf/svg/bounds.rs +++ b/crates/litchi-imgconv/src/wmf/svg/bounds.rs @@ -14,7 +14,7 @@ use super::super::constants::record; use super::super::parser::WmfRecord; -use crate::common::binary::{read_i16_le, read_u16_le}; +use litchi_core::binary::{read_i16_le, read_u16_le}; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; diff --git a/crates/litchi-imgconv/src/wmf/svg/mod.rs b/crates/litchi-imgconv/src/wmf/svg/mod.rs new file mode 100644 index 0000000..62d65e8 --- /dev/null +++ b/crates/litchi-imgconv/src/wmf/svg/mod.rs @@ -0,0 +1,175 @@ +//! Minimal WMF to SVG converter - produces compact, optimized SVG output +//! +//! This module implements a high-performance WMF to SVG converter that generates +//! minimal, optimized SVG following SVGO best practices while matching libwmf's +//! visual fidelity. +//! +//! # SVG Output Optimizations +//! +//! - **No whitespace**: Single-line output with no newlines or extra spaces +//! - **No metadata**: No DOCTYPE, comments, or description tags +//! - **Minimal attributes**: Only non-default SVG attributes included +//! - **Compact numbers**: Removes trailing zeros, uses integers when possible +//! - **Font family mapping**: Maps common fonts to generic CSS families +//! +//! The converter produces **56-89% smaller** files compared to libwmf's output. +//! +//! # Complete Feature Support +//! +//! ## Coordinate Transformation +//! - Matches libwmf's `svg_translate`, `svg_width`, `svg_height` behavior +//! - Handles placeable headers OR scanned bounding boxes +//! - Scales to 768x512 max dimensions (libwmf default) +//! - Preserves aspect ratio perfectly +//! +//! ## Pen Styling (Stroke) +//! - COLORREF to #RRGGBB color conversion +//! - Stroke widths scaled by coordinate transform +//! - Line caps: butt, round, square (PS_ENDCAP_*) +//! - Line joins: miter, bevel, round (PS_JOIN_*) +//! - Dash arrays scaled by pen width (libwmf algorithm): +//! - PS_DASH: 10x width dash pattern +//! - PS_DOT: 1x width dot pattern +//! - PS_DASHDOT: dash-dot pattern +//! - PS_DASHDOTDOT: dash-dot-dot pattern +//! +//! ## Brush Styling (Fill) +//! - Fill colors from COLORREF +//! - Fill rules based on poly_fill_mode: +//! - ALTERNATE (1) → `fill-rule="evenodd"` +//! - WINDING (2) → `fill-rule="nonzero"` +//! - BS_NULL handling (no fill) +//! - BS_SOLID handling (solid color fill) +//! +//! ## Font Handling +//! - Font families mapped to generic CSS families +//! - Font sizes scaled by coordinate transform +//! - Font weights (FW_BOLD detection) +//! - Font styles (italic) +//! - Text decoration (underline, strikethrough) +//! - Text rotation via transform matrix (escapement angle) +//! +//! ## Geometric Shapes +//! - Rectangle, RoundRect, Ellipse +//! - Polygon, Polyline, PolyPolygon (multiple polygons) +//! - Arc, Pie, Chord with proper SVG path commands +//! - LineTo with current position tracking +//! +//! ## Text Rendering +//! - TextOut and ExtTextOut support +//! - Proper coordinate transformation +//! - XML entity escaping +//! - Font attribute application +//! +//! # Architecture +//! +//! - `bounds`: Calculates bounding boxes from WMF records +//! - `transform`: Coordinate transformation from WMF to SVG space (matches libwmf) +//! - `state`: Graphics state management (pens, brushes, fonts, GDI objects) +//! - `style`: SVG style attribute generation (fill, stroke, font) +//! - `renderer`: Converts WMF records to SVG elements +//! +//! # Example +//! +//! ```no_run +//! use litchi_imgconv::wmf::convert_wmf_to_svg; +//! +//! let wmf_data = std::fs::read("drawing.wmf")?; +//! let svg = convert_wmf_to_svg(&wmf_data)?; +//! std::fs::write("drawing.svg", svg)?; +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # References +//! +//! - [MS-WMF]: Windows Metafile Format +//! - libwmf: Reference implementation for visual fidelity +//! - SVGO: SVG optimization best practices + +mod bounds; +mod renderer; +mod simd; +mod state; +mod style; +mod transform; + +use super::parser::WmfParser; +use crate::svg_utils::write_num; +use litchi_core::error::Result; +use std::fmt::Write; + +pub use bounds::BoundsCalculator; +pub use renderer::SvgRenderer; +pub use transform::CoordinateTransform; + +/// Minimal WMF to SVG converter +pub struct WmfConverter { + parser: WmfParser, +} + +impl WmfConverter { + pub fn new(parser: WmfParser) -> Self { + Self { parser } + } + + /// Convert to minimal SVG (no whitespace, minimal attributes) + pub fn to_svg(&self) -> Result { + // Calculate bounds from parser placeable header or scan records + let bbox = if let Some(ref p) = self.parser.placeable { + (p.left, p.top, p.right, p.bottom) + } else { + BoundsCalculator::scan_records(&self.parser.records) + }; + + // Calculate SVG dimensions (default max 768x512 like libwmf) + let (svg_width, svg_height) = Self::calculate_dimensions(bbox); + + let transform = CoordinateTransform::new(bbox, svg_width, svg_height); + + // Build SVG with no whitespace + let mut svg = String::with_capacity(4096); + + // Minimal SVG header (no newlines, no DOCTYPE) + svg.push_str(r#""#); + + // Render elements + let mut renderer = SvgRenderer::new(transform); + for record in &self.parser.records { + if let Some(element) = renderer.render_record(record) { + svg.push_str(&element); + } + } + + svg.push_str(""); + Ok(svg) + } + + fn calculate_dimensions(bbox: (i16, i16, i16, i16)) -> (f64, f64) { + let (left, top, right, bottom) = bbox; + let bbox_width = (right - left).abs() as f64; + let bbox_height = (bottom - top).abs() as f64; + + if bbox_width == 0.0 || bbox_height == 0.0 { + return (768.0, 512.0); + } + + // Scale to fit 768x512 while preserving aspect ratio (libwmf default) + const MAX_W: f64 = 768.0; + const MAX_H: f64 = 512.0; + + let ratio = bbox_height / bbox_width; + if ratio > MAX_H / MAX_W { + (MAX_H / ratio, MAX_H) + } else { + (MAX_W, MAX_W * ratio) + } + } +} diff --git a/src/images/wmf/svg/renderer.rs b/crates/litchi-imgconv/src/wmf/svg/renderer.rs similarity index 99% rename from src/images/wmf/svg/renderer.rs rename to crates/litchi-imgconv/src/wmf/svg/renderer.rs index 30adbdb..bd3bfa6 100644 --- a/src/images/wmf/svg/renderer.rs +++ b/crates/litchi-imgconv/src/wmf/svg/renderer.rs @@ -9,8 +9,8 @@ use super::super::parser::WmfRecord; use super::state::{Brush, Font, GdiObject, GraphicsState, Pen}; use super::style::{fill_attr, map_font_family, stroke_attrs}; use super::transform::CoordinateTransform; -use crate::common::binary::{read_i16_le, read_u16_le}; -use crate::images::svg_utils::{write_color_hex, write_num}; +use crate::svg_utils::{write_color_hex, write_num}; +use litchi_core::binary::{read_i16_le, read_u16_le}; /// Type of arc rendering #[derive(Debug, Clone, Copy)] diff --git a/src/images/wmf/svg/simd.rs b/crates/litchi-imgconv/src/wmf/svg/simd.rs similarity index 96% rename from src/images/wmf/svg/simd.rs rename to crates/litchi-imgconv/src/wmf/svg/simd.rs index c296661..6e204e3 100644 --- a/src/images/wmf/svg/simd.rs +++ b/crates/litchi-imgconv/src/wmf/svg/simd.rs @@ -111,9 +111,17 @@ impl SimdTransform { } /// AVX512F implementation (8 points per iteration) + /// + /// Note: AVX-512 intrinsics are stable since Rust 1.89 while the workspace + /// MSRV is 1.85. The `incompatible_msrv` clippy lint is allowed here because + /// the entire function is gated behind a runtime `is_x86_feature_detected!` + /// check at the call site, so users on older toolchains that lack AVX-512 + /// hardware never hit this code path. When the workspace MSRV is bumped to + /// 1.89+ this allow can be removed. #[cfg(target_arch = "x86_64")] #[target_feature(enable = "avx512f")] #[inline] + #[allow(clippy::incompatible_msrv)] unsafe fn transform_batch_avx512( &self, xs: &[i16], diff --git a/src/images/wmf/svg/state.rs b/crates/litchi-imgconv/src/wmf/svg/state.rs similarity index 100% rename from src/images/wmf/svg/state.rs rename to crates/litchi-imgconv/src/wmf/svg/state.rs diff --git a/crates/litchi-imgconv/src/wmf/svg/style.rs b/crates/litchi-imgconv/src/wmf/svg/style.rs new file mode 100644 index 0000000..d661842 --- /dev/null +++ b/crates/litchi-imgconv/src/wmf/svg/style.rs @@ -0,0 +1,167 @@ +//! SVG style attribute generation +//! +//! Converts WMF pen, brush, and font properties to minimal SVG attributes. +//! Only includes non-default attributes to minimize output size. + +use super::state::{Brush, Pen}; +use super::transform::CoordinateTransform; +use crate::svg_utils::{write_color_hex, write_num}; + +// Re-export commonly used functions from svg_utils for backward compatibility +#[allow(unused_imports)] +pub use crate::svg_utils::{color_hex, fmt_num}; + +/// Generate fill attribute (only if non-default) +pub fn fill_attr(brush: &Brush, poly_fill_mode: u16) -> Option { + let mut attrs = String::with_capacity(48); + + if brush.style == 1 { + // BS_NULL - no fill + attrs.push_str(r#" fill="none""#); + } else { + // Solid or patterned fill + attrs.push_str(&format!(r#" fill="{}""#, color_hex(brush.color))); + + // Add fill-rule based on poly_fill_mode (matches libwmf) + // 1=ALTERNATE (evenodd), 2=WINDING (nonzero) + if poly_fill_mode == 2 { + attrs.push_str(r#" fill-rule="nonzero""#); + } else if poly_fill_mode == 1 { + attrs.push_str(r#" fill-rule="evenodd""#); + } + } + + Some(attrs) +} + +/// Generate stroke attributes (matching libwmf behavior) +pub fn stroke_attrs(pen: &Pen, transform: &CoordinateTransform) -> String { + let style = pen.style & 0x0F; + + // PS_NULL (5) - no stroke + if style == 5 { + return r#" stroke="none""#.to_string(); + } + + let mut attrs = String::with_capacity(96); + + // Stroke color - inline color_hex to avoid allocation + attrs.push_str(" stroke=\""); + write_color_hex(&mut attrs, pen.color); + attrs.push('"'); + + // Stroke width (average of width and height like libwmf) + let width = transform.width(pen.width.max(1) as f64); + attrs.push_str(r#" stroke-width=""#); + write_num(&mut attrs, width); + attrs.push('"'); + + // Line cap + let endcap = (pen.style >> 8) & 0x0F; + let cap = match endcap { + 0x01 => "square", // PS_ENDCAP_SQUARE + 0x00 => "round", // PS_ENDCAP_ROUND + _ => "butt", // PS_ENDCAP_FLAT (default) + }; + if cap != "butt" { + attrs.push_str(r#" stroke-linecap=""#); + attrs.push_str(cap); + attrs.push('"'); + } + + // Line join + let join = (pen.style >> 12) & 0x0F; + let join_style = match join { + 0x01 => "bevel", // PS_JOIN_BEVEL + 0x02 => "round", // PS_JOIN_ROUND + _ => "miter", // PS_JOIN_MITER (default) + }; + if join_style != "miter" { + attrs.push_str(r#" stroke-linejoin=""#); + attrs.push_str(join_style); + attrs.push('"'); + } + + // Dash array (scaled by pen width like libwmf) + match style { + 1 => { + // PS_DASH - dashed line (10x width dash + 10x width gap) + let dash = width * 10.0; + attrs.push_str(r#" stroke-dasharray=""#); + write_num(&mut attrs, dash); + attrs.push(','); + write_num(&mut attrs, dash); + attrs.push('"'); + }, + 2 | 7 => { + // PS_DOT or PS_ALTERNATE - dotted line (width dash + 2x width gap) + let dash = width; + let gap = width * 2.0; + attrs.push_str(r#" stroke-dasharray=""#); + write_num(&mut attrs, dash); + attrs.push(','); + write_num(&mut attrs, gap); + attrs.push('"'); + }, + 3 => { + // PS_DASHDOT - dash-dot pattern + let long = width * 10.0; + let short = width; + let gap = width * 2.0; + attrs.push_str(r#" stroke-dasharray=""#); + write_num(&mut attrs, long); + attrs.push(','); + write_num(&mut attrs, gap); + attrs.push(','); + write_num(&mut attrs, short); + attrs.push(','); + write_num(&mut attrs, gap); + attrs.push('"'); + }, + 4 => { + // PS_DASHDOTDOT - dash-dot-dot pattern + let long = width * 10.0; + let short = width; + let gap = width * 2.0; + attrs.push_str(r#" stroke-dasharray=""#); + write_num(&mut attrs, long); + attrs.push(','); + write_num(&mut attrs, gap); + attrs.push(','); + write_num(&mut attrs, short); + attrs.push(','); + write_num(&mut attrs, gap); + attrs.push(','); + write_num(&mut attrs, short); + attrs.push(','); + write_num(&mut attrs, gap); + attrs.push('"'); + }, + _ => {}, // PS_SOLID (0) or PS_INSIDEFRAME (6) - no dasharray + } + + attrs +} + +/// Map WMF font name to generic family or keep specific +/// +/// Maps common Windows fonts to generic CSS font families for better compatibility +/// and smaller SVG output. Follows common font fallback patterns used in libwmf. +pub fn map_font_family(name: &str) -> &str { + match name { + // Serif fonts + "Times New Roman" | "Times" | "Georgia" | "Garamond" => "serif", + // Sans-serif fonts + "Arial" | "Helvetica" | "Verdana" | "Tahoma" | "Trebuchet MS" | "Arial Black" => { + "sans-serif" + }, + // Monospace fonts + "Courier New" | "Courier" | "Consolas" | "Monaco" | "Lucida Console" => "monospace", + // Cursive fonts + "Comic Sans MS" | "Brush Script MT" => "cursive", + // Fantasy fonts + "Impact" | "Papyrus" => "fantasy", + // Keep original name for other fonts + _ => name, + } +} diff --git a/src/images/wmf/svg/transform.rs b/crates/litchi-imgconv/src/wmf/svg/transform.rs similarity index 98% rename from src/images/wmf/svg/transform.rs rename to crates/litchi-imgconv/src/wmf/svg/transform.rs index 1c76e30..23e14c3 100644 --- a/src/images/wmf/svg/transform.rs +++ b/crates/litchi-imgconv/src/wmf/svg/transform.rs @@ -112,7 +112,7 @@ impl CoordinateTransform { buffer: &mut String, separator: char, ) { - use crate::images::svg_utils::write_num; + use crate::svg_utils::write_num; let len = xs.len().min(ys.len()); if len == 0 { diff --git a/crates/litchi-iwa/Cargo.toml b/crates/litchi-iwa/Cargo.toml new file mode 100644 index 0000000..ceebcd0 --- /dev/null +++ b/crates/litchi-iwa/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "litchi-iwa" +description = "Apple iWork (Pages, Numbers, Keynote) IWA archive parser for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +litchi-core = { workspace = true } +once_cell = { workspace = true } +phf = { workspace = true } +plist = { workspace = true } +prost = { workspace = true } +snap = { workspace = true } +soapberry-zip = { workspace = true } +thiserror = { workspace = true } + +[build-dependencies] +prost-build = { workspace = true } diff --git a/crates/litchi-iwa/README.md b/crates/litchi-iwa/README.md new file mode 100644 index 0000000..075b32c --- /dev/null +++ b/crates/litchi-iwa/README.md @@ -0,0 +1,46 @@ +# litchi-iwa + +Apple iWork archive parser for `.pages`, `.numbers`, and `.key` files. + +## Overview + +`litchi-iwa` reads Apple iWork bundles using their IWA (iWork Archive) layout: a ZIP container holding Snappy-compressed, protobuf-encoded object streams along with media assets and metadata. It exposes a unified `Document` API that handles all three iWork applications, plus lower-level access to archives, the object reference graph, and structured content (tables, slides, sections). + +## Usage + +```toml +[dependencies] +litchi-iwa = "0.0.1" +``` + +```rust +use litchi_iwa::Document; + +let doc = Document::open("document.pages")?; +let text = doc.text()?; +let stats = doc.stats(); +println!("objects: {}", stats.total_objects); + +let structured = doc.extract_structured_data()?; +println!("{}", structured.summary()); +# Ok::<(), litchi_iwa::Error>(()) +``` + +## Features + +- Parse Pages, Numbers, and Keynote bundles from a path or in-memory bytes +- Snappy decompression and protobuf decoding of `.iwa` streams +- Text extraction across all iWork applications +- Structured-data extraction: tables (with CSV export), slides, sections +- Media asset discovery and extraction + +## Build Requirements + +This crate compiles protobuf definitions via `prost-build`. The `protoc` compiler must be available on `PATH`: + +- Debian / Ubuntu: `apt install protobuf-compiler` +- macOS (Homebrew): `brew install protobuf` + +## License + +Licensed under the Apache License, Version 2.0. Part of the [Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-iwa/build.rs b/crates/litchi-iwa/build.rs new file mode 100644 index 0000000..648f52e --- /dev/null +++ b/crates/litchi-iwa/build.rs @@ -0,0 +1,40 @@ +fn main() -> std::io::Result<()> { + println!("cargo:rerun-if-changed=src/protos/"); + + // Configure prost-build + let mut config = prost_build::Config::new(); + + // Collect all .proto files from the protos directory for complete compilation + let all_proto_files = std::fs::read_dir("src/protos") + .expect("Failed to read protos directory") + .filter_map(|entry| { + let entry = entry.ok()?; + let path = entry.path(); + if path.extension()?.to_str()? == "proto" { + Some(path.to_string_lossy().to_string()) + } else { + None + } + }) + .collect::>(); + + println!( + "Compiling all {} protobuf files together for proper dependency resolution", + all_proto_files.len() + ); + + // Compile all protobuf files - will fail the build if any errors occur + match config + .enable_type_names() + .include_file("iwa_protos.rs") + .compile_protos(&all_proto_files, &["src/protos"]) + { + Ok(_) => println!("Successfully compiled all protobuf files"), + Err(e) => { + eprintln!("Failed to compile protobuf files: {}\n", e); + panic!("Protobuf compilation failed - check for syntax errors in .proto files"); + }, + } + + Ok(()) +} diff --git a/crates/litchi-iwa/examples/extract_structured.rs b/crates/litchi-iwa/examples/extract_structured.rs new file mode 100644 index 0000000..0b85d5a --- /dev/null +++ b/crates/litchi-iwa/examples/extract_structured.rs @@ -0,0 +1,78 @@ +//! Extract tables from an Apple Numbers (`.numbers`) document and print the +//! first table as CSV. +//! +//! # Run +//! +//! ```bash +//! cargo run -p litchi-iwa --example extract_structured -- /path/to/spreadsheet.numbers +//! ``` +//! +//! Numbers test fixtures are not bundled with this checkout. Drop a real +//! `.numbers` file into `test-data/iwa/numbers/` (or anywhere on disk) and +//! pass its path on the command line. + +use std::env; +use std::path::Path; + +use litchi_iwa::Document; + +const TEST_DATA_HINT: &str = "test-data/iwa/numbers"; + +fn main() -> Result<(), Box> { + let path = match env::args().nth(1) { + Some(p) => p, + None => { + eprintln!("usage: extract_structured "); + eprintln!(); + eprintln!( + "no path given. drop a .numbers file into `{}` and pass its path,", + TEST_DATA_HINT + ); + eprintln!("or point at any Numbers document on disk."); + return Ok(()); + }, + }; + + let path = Path::new(&path); + if !path.exists() { + eprintln!("file not found: {}", path.display()); + eprintln!("Numbers test fixtures are not committed; please supply a real .numbers path."); + return Ok(()); + } + + println!("opening: {}", path.display()); + let doc = Document::open(path)?; + let structured = doc.extract_structured_data()?; + + println!( + "found {} table(s), {} slide(s), {} section(s)", + structured.tables.len(), + structured.slides.len(), + structured.sections.len() + ); + + let Some(first) = structured.tables.first() else { + println!("no tables found in this document."); + return Ok(()); + }; + + println!( + "--- table: {} ({} rows x {} cols) ---", + first.name, first.row_count, first.column_count + ); + let csv = first.to_csv(); + if csv.is_empty() { + println!("(table is empty)"); + } else { + println!("{}", csv); + } + + if structured.tables.len() > 1 { + println!( + "... and {} more table(s) not shown.", + structured.tables.len() - 1 + ); + } + + Ok(()) +} diff --git a/crates/litchi-iwa/examples/read_iwork.rs b/crates/litchi-iwa/examples/read_iwork.rs new file mode 100644 index 0000000..22b96e9 --- /dev/null +++ b/crates/litchi-iwa/examples/read_iwork.rs @@ -0,0 +1,78 @@ +//! Read an Apple iWork document (`.pages`, `.numbers`, or `.key`) and print +//! a summary of its contents. +//! +//! # Run +//! +//! ```bash +//! cargo run -p litchi-iwa --example read_iwork -- /path/to/document.pages +//! ``` +//! +//! Apple iWork test fixtures are not bundled with this checkout. Drop a +//! `.pages` / `.numbers` / `.key` file into +//! `test-data/iwa/{pages,numbers,keynote}/` (or anywhere on disk) and pass +//! its path on the command line. + +use std::env; +use std::path::Path; + +use litchi_iwa::Document; + +const TEST_DATA_HINT: &str = "test-data/iwa/{pages,numbers,keynote}"; + +fn main() -> Result<(), Box> { + let path = match env::args().nth(1) { + Some(p) => p, + None => { + eprintln!("usage: read_iwork "); + eprintln!(); + eprintln!( + "no path given. drop a .pages/.numbers/.key file into `{}` and pass its path,", + TEST_DATA_HINT + ); + eprintln!("or point at any iWork bundle on disk."); + return Ok(()); + }, + }; + + let path = Path::new(&path); + if !path.exists() { + eprintln!("file not found: {}", path.display()); + eprintln!("iWork test fixtures are not committed; please supply a real document path."); + return Ok(()); + } + + println!("opening: {}", path.display()); + let doc = Document::open(path)?; + + // High-level statistics. + let stats = doc.stats(); + println!("--- document stats ---"); + println!("application: {:?}", stats.application); + println!("total objects: {}", stats.total_objects); + println!("archives: {}", stats.archives_count); + println!("top message types: {}", stats.message_type_summary()); + + // Plain-text extraction (truncated preview). + println!("--- text preview ---"); + let text = doc.text()?; + if text.is_empty() { + println!("(no text extracted)"); + } else { + let preview_end = text + .char_indices() + .nth(500) + .map(|(i, _)| i) + .unwrap_or(text.len()); + println!("{}", &text[..preview_end]); + if preview_end < text.len() { + println!("... ({} more chars)", text.len() - preview_end); + } + } + + // Structured data summary (tables / slides / sections). + println!("--- structured summary ---"); + let structured = doc.extract_structured_data()?; + println!("{}", structured.summary()); + + Ok(()) +} diff --git a/crates/litchi-iwa/fuzz/.gitignore b/crates/litchi-iwa/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/crates/litchi-iwa/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/litchi-iwa/fuzz/Cargo.toml b/crates/litchi-iwa/fuzz/Cargo.toml new file mode 100644 index 0000000..21cb3d0 --- /dev/null +++ b/crates/litchi-iwa/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "litchi-iwa-fuzz" +version = "0.0.0" +edition = "2024" +publish = false +authors = ["Ryker Zhu "] +license = "Apache-2.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +litchi-iwa = { path = ".." } + +[[bin]] +name = "parse_iwa" +path = "fuzz_targets/parse_iwa.rs" +test = false +doc = false +bench = false + +[profile.release] +debug = 1 +codegen-units = 1 +lto = "thin" + +[workspace] diff --git a/crates/litchi-iwa/fuzz/fuzz_targets/parse_iwa.rs b/crates/litchi-iwa/fuzz/fuzz_targets/parse_iwa.rs new file mode 100644 index 0000000..329030e --- /dev/null +++ b/crates/litchi-iwa/fuzz/fuzz_targets/parse_iwa.rs @@ -0,0 +1,13 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(doc) = litchi_iwa::Document::from_bytes(data) { + // Exercise downstream decoders: text extraction, structured data, + // and media stats all walk the snappy + protobuf object graph. + let _ = doc.text(); + let _ = doc.extract_structured_data(); + let _ = doc.media_stats(); + } +}); diff --git a/crates/litchi-iwa/src/archive.rs b/crates/litchi-iwa/src/archive.rs new file mode 100644 index 0000000..3d33754 --- /dev/null +++ b/crates/litchi-iwa/src/archive.rs @@ -0,0 +1,341 @@ +//! IWA Archive Format Parser +//! +//! This module handles parsing of IWA (iWork Archive) files, which contain +//! Protocol Buffers-encoded messages with ArchiveInfo and MessageInfo headers. + +use std::io::Read; + +use crate::protobuf::{DecodedMessage, decode}; +use crate::varint; +use crate::{Error, Result}; +use prost::Message; + +/// Archive information header for each object in an IWA file +#[derive(Debug, Clone, PartialEq)] +pub struct ArchiveInfo { + /// Unique identifier for this archive across the document + pub identifier: Option, + /// Information about the messages contained in this archive + pub message_infos: Vec, +} + +impl ArchiveInfo { + /// Parse ArchiveInfo from a reader + pub fn parse(reader: &mut R) -> Result { + let mut identifier = None; + let mut message_infos = Vec::new(); + + // Parse Protocol Buffer fields + while let Ok((field_number, wire_type)) = Self::read_field_header(reader) { + match (field_number, wire_type) { + (1, 0) => { + // identifier (varint) + identifier = Some(varint::decode_varint(reader)?); + }, + (2, 2) => { + // message_infos (length-delimited, repeated) + let length = varint::decode_varint(reader)?; + let mut data = vec![0u8; length as usize]; + reader.read_exact(&mut data)?; + let mut cursor = std::io::Cursor::new(data); + message_infos.push(MessageInfo::parse(&mut cursor)?); + }, + _ => { + // Skip unknown fields + Self::skip_field(reader, wire_type)?; + }, + } + } + + Ok(ArchiveInfo { + identifier, + message_infos, + }) + } + + fn read_field_header(reader: &mut R) -> Result<(u32, u32)> { + let tag = varint::decode_varint(reader)?; + let field_number = (tag >> 3) as u32; + let wire_type = (tag & 0x07) as u32; + Ok((field_number, wire_type)) + } + + fn skip_field(reader: &mut R, wire_type: u32) -> Result<()> { + match wire_type { + 0 => { + // varint + varint::decode_varint(reader)?; + }, + 1 => { + // 64-bit + let mut buf = [0u8; 8]; + reader.read_exact(&mut buf)?; + }, + 2 => { + // length-delimited + let length = varint::decode_varint(reader)?; + let mut buf = vec![0u8; length as usize]; + reader.read_exact(&mut buf)?; + }, + 5 => { + // 32-bit + let mut buf = [0u8; 4]; + reader.read_exact(&mut buf)?; + }, + _ => { + return Err(Error::InvalidFormat(format!( + "Unknown wire type: {}", + wire_type + ))); + }, + } + Ok(()) + } +} + +/// Information about a specific message within an archive +#[derive(Debug, Clone, PartialEq)] +pub struct MessageInfo { + /// Message type identifier (maps to specific protobuf message types) + pub type_: u32, + /// Version information for the message format + pub versions: Vec, + /// Length of the message data in bytes + pub length: u32, +} + +impl MessageInfo { + /// Parse MessageInfo from a reader + pub fn parse(reader: &mut R) -> Result { + let mut type_ = 0; + let mut versions = Vec::new(); + let mut length = 0; + + while let Ok((field_number, wire_type)) = Self::read_field_header(reader) { + match (field_number, wire_type) { + (1, 0) => { + // type (varint) + type_ = varint::decode_varint(reader)? as u32; + }, + (2, 0) => { + // version (varint, packed repeated) + versions.push(varint::decode_varint(reader)? as u32); + }, + (3, 0) => { + // length (varint) + length = varint::decode_varint(reader)? as u32; + }, + _ => { + // Skip unknown fields + Self::skip_field(reader, wire_type)?; + }, + } + } + + Ok(MessageInfo { + type_, + versions, + length, + }) + } + + fn read_field_header(reader: &mut R) -> Result<(u32, u32)> { + ArchiveInfo::read_field_header(reader) + } + + fn skip_field(reader: &mut R, wire_type: u32) -> Result<()> { + ArchiveInfo::skip_field(reader, wire_type) + } +} + +/// A parsed IWA archive containing multiple objects +#[derive(Debug)] +pub struct Archive { + /// The objects contained in this archive + pub objects: Vec, +} + +impl Archive { + /// Parse an IWA archive from decompressed data + /// + /// This function tracks byte offsets for each object to enable efficient + /// lazy loading and partial parsing. The implementation follows the IWA + /// format specification from Apple's iWorkFileFormat documentation. + /// + /// # Performance + /// + /// O(n) where n is the number of bytes in the decompressed data. + /// Memory usage is proportional to the number of objects. + pub fn parse(data: &[u8]) -> Result { + let mut objects = Vec::new(); + let mut cursor = std::io::Cursor::new(data); + + while cursor.position() < data.len() as u64 { + // Track the start of this object's header (before the varint length) + let varint_start_pos = cursor.position(); + + // Read archive info length + let archive_info_length = varint::decode_varint(&mut cursor)? as usize; + + // The header starts after the varint that encodes its length + let header_start_pos = cursor.position(); + let varint_length = header_start_pos - varint_start_pos; + + // Read archive info + let mut archive_info_data = vec![0u8; archive_info_length]; + cursor.read_exact(&mut archive_info_data)?; + let mut archive_info_cursor = std::io::Cursor::new(archive_info_data); + let archive_info = ArchiveInfo::parse(&mut archive_info_cursor)?; + + // Calculate total data length from all message infos + let total_data_length: u64 = archive_info + .message_infos + .iter() + .map(|mi| mi.length as u64) + .sum(); + + // Data starts immediately after the header + let data_start_pos = cursor.position(); + + // Read message data + let mut messages = Vec::new(); + let mut decoded_messages = Vec::new(); + + for message_info in &archive_info.message_infos { + let mut message_data = vec![0u8; message_info.length as usize]; + cursor.read_exact(&mut message_data)?; + + let raw_message = RawMessage { + type_: message_info.type_, + data: message_data.clone(), + }; + + messages.push(raw_message); + + // Try to decode the message using prost + match decode(message_info.type_, &message_data) { + Ok(decoded) => decoded_messages.push(decoded), + Err(_) => { + // Message type not registered - try parsing as StorageArchive anyway + // since many message types might contain text + if let Ok(storage_msg) = + crate::protobuf::tswp::StorageArchive::decode(&*message_data) + { + let wrapper = crate::protobuf::StorageArchiveWrapper(storage_msg); + decoded_messages + .push(Box::new(wrapper) + as Box); + } + }, + } + } + + objects.push(ArchiveObject { + archive_info, + messages, + decoded_messages, + header_offset: varint_start_pos, + header_length: varint_length + archive_info_length as u64, + data_offset: data_start_pos, + data_length: total_data_length, + }); + } + + Ok(Archive { objects }) + } +} + +/// A single object within an IWA archive +#[derive(Debug)] +pub struct ArchiveObject { + /// Archive metadata + pub archive_info: ArchiveInfo, + /// Raw message data (protobuf-encoded) + pub messages: Vec, + /// Decoded message objects (if successfully decoded) + pub decoded_messages: Vec>, + /// Byte offset of the ArchiveInfo header in the decompressed stream + pub header_offset: u64, + /// Length of the ArchiveInfo header in bytes + pub header_length: u64, + /// Byte offset of the message data (after the ArchiveInfo header) + pub data_offset: u64, + /// Total length of all message data in bytes + pub data_length: u64, +} + +/// Raw protobuf message data +#[derive(Debug, Clone)] +pub struct RawMessage { + /// Message type identifier + pub type_: u32, + /// Raw protobuf data + pub data: Vec, +} + +impl ArchiveObject { + /// Extract all text content from decoded messages + pub fn extract_text(&self) -> Vec { + let mut all_text = Vec::new(); + for decoded_msg in &self.decoded_messages { + all_text.extend(decoded_msg.extract_text()); + } + all_text + } + + /// Get the primary message type from this object + pub fn primary_message_type(&self) -> Option { + self.messages.first().map(|msg| msg.type_) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_archive_info_parsing() { + // Create a minimal ArchiveInfo protobuf message + // Field 1 (identifier): varint 123 + // Field 2 (message_infos): length-delimited with a MessageInfo + let mut data = Vec::new(); + + // Field 1: identifier = 123 + data.extend(varint::encode_varint(1 << 3)); // tag: field 1, wire type 0 + data.extend(varint::encode_varint(123)); + + // Field 2: message_infos (simplified) + data.extend(varint::encode_varint((2 << 3) | 2)); // tag: field 2, wire type 2 + let message_info_data = vec![ + 0x08, 0x01, // type = 1 + 0x18, 0x05, // length = 5 + ]; + data.extend(varint::encode_varint(message_info_data.len() as u64)); + data.extend(message_info_data); + + let mut cursor = std::io::Cursor::new(data); + let archive_info = ArchiveInfo::parse(&mut cursor).unwrap(); + + assert_eq!(archive_info.identifier, Some(123)); + assert_eq!(archive_info.message_infos.len(), 1); + assert_eq!(archive_info.message_infos[0].type_, 1); + assert_eq!(archive_info.message_infos[0].length, 5); + } + + #[test] + fn test_message_info_parsing() { + // Create a MessageInfo protobuf message + let data = vec![ + 0x08, 0x2A, // type = 42 + 0x10, 0x01, // version = 1 + 0x18, 0x0A, // length = 10 + ]; + + let mut cursor = std::io::Cursor::new(data); + let message_info = MessageInfo::parse(&mut cursor).unwrap(); + + assert_eq!(message_info.type_, 42); + assert_eq!(message_info.versions, vec![1]); + assert_eq!(message_info.length, 10); + } +} diff --git a/src/iwa/bundle.rs b/crates/litchi-iwa/src/bundle.rs similarity index 99% rename from src/iwa/bundle.rs rename to crates/litchi-iwa/src/bundle.rs index 96a8855..043a9ea 100644 --- a/src/iwa/bundle.rs +++ b/crates/litchi-iwa/src/bundle.rs @@ -13,9 +13,9 @@ use std::path::{Path, PathBuf}; use plist::Value; use soapberry_zip::office::ArchiveReader; -use crate::iwa::archive::{Archive, ArchiveObject}; -use crate::iwa::zip_utils::parse_iwa_files_from_archive; -use crate::iwa::{Error, Result}; +use crate::archive::{Archive, ArchiveObject}; +use crate::zip_utils::parse_iwa_files_from_archive; +use crate::{Error, Result}; /// Represents an iWork document bundle #[derive(Debug)] @@ -60,7 +60,7 @@ impl Bundle { /// # Examples /// /// ```rust,no_run - /// use litchi::iwa::Bundle; + /// use litchi_iwa::Bundle; /// use std::fs; /// /// let data = fs::read("document.pages")?; diff --git a/src/iwa/charts/metadata_extractor.rs b/crates/litchi-iwa/src/charts/metadata_extractor.rs similarity index 96% rename from src/iwa/charts/metadata_extractor.rs rename to crates/litchi-iwa/src/charts/metadata_extractor.rs index 6c03038..0c73060 100644 --- a/src/iwa/charts/metadata_extractor.rs +++ b/crates/litchi-iwa/src/charts/metadata_extractor.rs @@ -13,7 +13,7 @@ //! ## Example //! //! ```rust,ignore -//! use litchi::iwa::charts::ChartMetadataExtractor; +//! use litchi_iwa::charts::ChartMetadataExtractor; //! //! let extractor = ChartMetadataExtractor::new(&bundle, &index); //! let charts = extractor.extract_all_charts()?; @@ -25,10 +25,10 @@ //! } //! ``` -use crate::iwa::Result; -use crate::iwa::bundle::Bundle; -use crate::iwa::object_index::{ObjectIndex, ResolvedObject}; -use crate::iwa::protobuf::tsch; +use crate::Result; +use crate::bundle::Bundle; +use crate::object_index::{ObjectIndex, ResolvedObject}; +use crate::protobuf::tsch; use prost::Message; /// Metadata extracted from a chart @@ -166,7 +166,7 @@ impl<'a> ChartMetadataExtractor<'a> { /// Convert chart type enum to string fn chart_type_to_string(&self, chart_type: i32) -> String { - use crate::iwa::protobuf::tsch::ChartType; + use crate::protobuf::tsch::ChartType; match ChartType::try_from(chart_type) { Ok(ChartType::UndefinedChartType) => "Undefined".to_string(), @@ -244,8 +244,7 @@ impl<'a> ChartMetadataExtractor<'a> { // TSWP storage types if msg.type_ >= 2001 && msg.type_ <= 2022 - && let Ok(storage) = - crate::iwa::protobuf::tswp::StorageArchive::decode(&*msg.data) + && let Ok(storage) = crate::protobuf::tswp::StorageArchive::decode(&*msg.data) && !storage.text.is_empty() { return Ok(Some(storage.text.join(" "))); diff --git a/src/iwa/charts/mod.rs b/crates/litchi-iwa/src/charts/mod.rs similarity index 100% rename from src/iwa/charts/mod.rs rename to crates/litchi-iwa/src/charts/mod.rs diff --git a/crates/litchi-iwa/src/document.rs b/crates/litchi-iwa/src/document.rs new file mode 100644 index 0000000..b4f7b81 --- /dev/null +++ b/crates/litchi-iwa/src/document.rs @@ -0,0 +1,348 @@ +//! High-Level iWork Document API +//! +//! Provides user-friendly interfaces for working with iWork documents +//! (Pages, Keynote, Numbers) similar to the high-level APIs for +//! Microsoft Office formats. +//! +//! This module provides a unified `Document` interface that works with all +//! iWork formats. For application-specific features, use the specialized +//! modules: +//! +//! - `crate::pages::PagesDocument` for Pages-specific features +//! - `crate::numbers::NumbersDocument` for Numbers-specific features +//! - `crate::keynote::KeynoteDocument` for Keynote-specific features + +use std::collections::HashMap; +use std::path::Path; + +use crate::bundle::Bundle; +use crate::media::{MediaManager, MediaStats}; +use crate::object_index::{ObjectIndex, ResolvedObject}; +use crate::registry::{Application, detect_application}; +use crate::structured::{self, StructuredData}; +use crate::text::TextExtractor; +use crate::{Error, Result}; + +/// Unified iWork document interface +#[derive(Debug)] +pub struct Document { + /// The underlying bundle + bundle: Bundle, + /// Object index for cross-referencing + object_index: ObjectIndex, + /// Detected application type + application: Application, + /// Media manager for assets + media_manager: Option, +} + +impl Document { + /// Open an iWork document from a bundle path + pub fn open>(path: P) -> Result { + let path_ref = path.as_ref(); + let bundle = Bundle::open(path_ref)?; + let object_index = ObjectIndex::from_bundle(&bundle)?; + + // Detect application type from message types + let all_message_types: Vec = bundle + .archives() + .values() + .flat_map(|archive| &archive.objects) + .flat_map(|obj| &obj.messages) + .map(|msg| msg.type_) + .collect(); + + let application = detect_application(&all_message_types).unwrap_or(Application::Common); + + // Try to create media manager (may fail for single-file bundles) + let media_manager = MediaManager::new(path_ref).ok(); + + Ok(Document { + bundle, + object_index, + application, + media_manager, + }) + } + + /// Open an iWork document from raw bytes + /// + /// This allows parsing iWork documents directly from memory without + /// requiring file system access. Note that media extraction is not + /// available when opening from bytes. + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::Document; + /// use std::fs; + /// + /// let data = fs::read("document.pages")?; + /// let doc = Document::from_bytes(&data)?; + /// let text = doc.text()?; + /// println!("Extracted text: {}", text); + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_bytes(bytes: &[u8]) -> Result { + let bundle = Bundle::from_bytes(bytes)?; + let object_index = ObjectIndex::from_bundle(&bundle)?; + + // Detect application type from message types + let all_message_types: Vec = bundle + .archives() + .values() + .flat_map(|archive| &archive.objects) + .flat_map(|obj| &obj.messages) + .map(|msg| msg.type_) + .collect(); + + let application = detect_application(&all_message_types).unwrap_or(Application::Common); + + Ok(Document { + bundle, + object_index, + application, + media_manager: None, // No media access from bytes + }) + } + + /// Get the document's text content + /// + /// This method uses the modern text extraction API that efficiently + /// processes TSWP storage objects across all iWork applications. + pub fn text(&self) -> Result { + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(&self.bundle)?; + Ok(extractor.get_text()) + } + + /// Get all objects in the document + pub fn objects(&self) -> Vec { + self.object_index + .all_object_ids() + .iter() + .filter_map(|&id| { + self.object_index + .resolve_object(&self.bundle, id) + .ok() + .flatten() + }) + .collect() + } + + /// Get an object by ID + pub fn get_object(&self, id: u64) -> Result> { + self.object_index.resolve_object(&self.bundle, id) + } + + /// Get the application type + pub fn application(&self) -> Application { + self.application + } + + /// Get the underlying bundle + pub fn bundle(&self) -> &Bundle { + &self.bundle + } + + /// Get document metadata + pub fn metadata(&self) -> &crate::bundle::BundleMetadata { + self.bundle.metadata() + } + + /// Get the media manager (if available) + pub fn media_manager(&self) -> Option<&MediaManager> { + self.media_manager.as_ref() + } + + /// Get media statistics + pub fn media_stats(&self) -> Option { + self.media_manager.as_ref().map(|m| m.stats()) + } + + /// Extract a media asset by filename + pub fn extract_media(&self, filename: &str) -> Result> { + let manager = self + .media_manager + .as_ref() + .ok_or_else(|| Error::Bundle("Media manager not available".to_string()))?; + manager.extract(filename) + } + + /// Extract structured data from the document + /// + /// This returns tables, slides, sections, and other structured content + /// depending on the document type (Numbers, Keynote, or Pages). + pub fn extract_structured_data(&self) -> Result { + structured::extract_all(&self.bundle, &self.object_index) + } + + /// Get document statistics + pub fn stats(&self) -> DocumentStats { + let total_objects = self.object_index.all_object_ids().len(); + let archives_count = self.bundle.archives().len(); + + let mut message_type_counts = HashMap::new(); + for object in self.objects() { + for &msg_type in &object.message_types() { + *message_type_counts.entry(msg_type).or_insert(0) += 1; + } + } + + let media_stats = self.media_stats(); + + DocumentStats { + total_objects, + archives_count, + message_type_counts, + application: self.application, + media_stats, + } + } +} + +/// Statistics about a document +#[derive(Debug, Clone)] +pub struct DocumentStats { + /// Total number of objects + pub total_objects: usize, + /// Number of archives + pub archives_count: usize, + /// Count of each message type + pub message_type_counts: HashMap, + /// Application type + pub application: Application, + /// Media statistics (if available) + pub media_stats: Option, +} + +impl DocumentStats { + /// Get the most common message type + pub fn most_common_message_type(&self) -> Option<(u32, usize)> { + self.message_type_counts + .iter() + .max_by_key(|&(_, count)| count) + .map(|(&type_, &count)| (type_, count)) + } + + /// Get message type distribution as a string + pub fn message_type_summary(&self) -> String { + let mut types: Vec<_> = self.message_type_counts.iter().collect(); + types.sort_by_key(|&(_, count)| std::cmp::Reverse(*count)); + + let top_types: Vec = types + .into_iter() + .take(5) + .map(|(type_, count)| format!("{}: {}", type_, count)) + .collect(); + + if top_types.len() < self.message_type_counts.len() { + format!( + "{} (and {} more)", + top_types.join(", "), + self.message_type_counts.len() - top_types.len() + ) + } else { + top_types.join(", ") + } + } +} + +// Note: Application-specific document types have been moved to dedicated modules: +// - crate::pages::PagesDocument +// - crate::numbers::NumbersDocument +// - crate::keynote::KeynoteDocument +// +// The unified Document type above works with all formats and provides +// common functionality. For application-specific features, use the +// specialized document types in their respective modules. + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_document_stats() { + let mut message_counts = HashMap::new(); + message_counts.insert(1, 10); + message_counts.insert(2, 5); + message_counts.insert(3, 15); + + let stats = DocumentStats { + total_objects: 25, + archives_count: 3, + message_type_counts: message_counts, + application: Application::Pages, + media_stats: None, + }; + + assert_eq!(stats.total_objects, 25); + assert_eq!(stats.archives_count, 3); + assert_eq!(stats.most_common_message_type(), Some((3, 15))); + + let summary = stats.message_type_summary(); + assert!(summary.contains("3: 15")); + assert!(summary.contains("1: 10")); + } + + #[test] + fn test_application_detection() { + // Test Keynote detection (should work with current registry) + let keynote_types = vec![101, 102, 103]; // KN.* types + let keynote_result = detect_application(&keynote_types); + assert!(keynote_result.is_some()); // Should detect some application + + // Test with mixed types + let mixed_types = vec![1, 1, 1, 101]; // Mostly common types, one Keynote type + let mixed_result = detect_application(&mixed_types); + assert!(mixed_result.is_some()); // Should detect something + + // Test empty input + assert_eq!(detect_application(&[]), None); + } + + #[test] + fn test_document_parsing() { + let doc_path = std::path::Path::new("test.pages"); + if !doc_path.exists() { + // Skip test if test file doesn't exist + return; + } + + let doc_result = Document::open(doc_path); + assert!( + doc_result.is_ok(), + "Failed to open document: {:?}", + doc_result.err() + ); + + let doc = doc_result.unwrap(); + + // Verify we can get objects + let objects = doc.objects(); + assert!(!objects.is_empty(), "Document should contain objects"); + + // Verify we can get stats + let stats = doc.stats(); + assert!(stats.total_objects > 0, "Document should have objects"); + + // Test text extraction + let text_result = doc.text(); + assert!(text_result.is_ok()); + } + + #[test] + fn test_text_extraction() { + let doc_path = std::path::Path::new("test.pages"); + if !doc_path.exists() { + return; + } + + let doc = Document::open(doc_path).unwrap(); + let text_result = doc.text(); + assert!(text_result.is_ok()); + + // Text extraction should succeed even if result is empty + let _text = text_result.unwrap(); + } +} diff --git a/crates/litchi-iwa/src/keynote/document.rs b/crates/litchi-iwa/src/keynote/document.rs new file mode 100644 index 0000000..8daadd5 --- /dev/null +++ b/crates/litchi-iwa/src/keynote/document.rs @@ -0,0 +1,642 @@ +//! Keynote Document Implementation +//! +//! Provides high-level API for working with Apple Keynote presentations. + +use std::path::Path; + +use super::show::KeynoteShow; +use super::slide::KeynoteSlide; +use crate::Result; +use crate::bundle::Bundle; +use crate::object_index::ObjectIndex; +use crate::registry::Application; +use crate::text::TextExtractor; + +/// High-level interface for Keynote documents +pub struct KeynoteDocument { + /// Underlying bundle + bundle: Bundle, + /// Object index for cross-referencing + object_index: ObjectIndex, +} + +impl KeynoteDocument { + /// Open a Keynote document from a path + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::keynote::KeynoteDocument; + /// + /// let doc = KeynoteDocument::open("presentation.key")?; + /// println!("Loaded Keynote presentation"); + /// # Ok::<(), Box>(()) + /// ``` + pub fn open>(path: P) -> Result { + let bundle = Bundle::open(path)?; + let object_index = ObjectIndex::from_bundle(&bundle)?; + + Ok(Self { + bundle, + object_index, + }) + } + + /// Open a Keynote document from raw bytes + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::keynote::KeynoteDocument; + /// use std::fs; + /// + /// let data = fs::read("presentation.key")?; + /// let doc = KeynoteDocument::from_bytes(&data)?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_bytes(bytes: &[u8]) -> Result { + let bundle = Bundle::from_bytes(bytes)?; + let object_index = ObjectIndex::from_bundle(&bundle)?; + + Ok(Self { + bundle, + object_index, + }) + } + + /// Create a Keynote document from raw bytes (ZIP archive data). + /// + /// This is used for single-pass parsing where the ZIP archive has already + /// been validated during format detection. It avoids double-parsing. + pub fn from_archive_bytes(bytes: &[u8]) -> Result { + Self::from_bytes(bytes) + } + + /// Extract all text content from the presentation + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::keynote::KeynoteDocument; + /// + /// let doc = KeynoteDocument::open("presentation.key")?; + /// let text = doc.text()?; + /// println!("{}", text); + /// # Ok::<(), Box>(()) + /// ``` + pub fn text(&self) -> Result { + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(&self.bundle)?; + Ok(extractor.get_text()) + } + + /// Extract slides from the presentation + /// + /// Keynote presentations consist of slides with content, animations, and transitions. + /// This method parses the presentation structure and returns all slides. + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::keynote::KeynoteDocument; + /// + /// let doc = KeynoteDocument::open("presentation.key")?; + /// let slides = doc.slides()?; + /// + /// for slide in slides { + /// println!("Slide {}", slide.index + 1); + /// if let Some(title) = &slide.title { + /// println!(" Title: {}", title); + /// } + /// for text in &slide.text_content { + /// println!(" - {}", text); + /// } + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn slides(&self) -> Result> { + let mut slides = Vec::new(); + + // Find slide archives (message type 5/6 is KN.SlideArchive, type 1102 in our decoder) + let slide_objects = self.bundle.find_objects_by_type(1102); + + if slide_objects.is_empty() { + // Try alternate slide message types (5 and 6 from JSON) + let alt_slide_objects_5 = self.bundle.find_objects_by_type(5); + let alt_slide_objects_6 = self.bundle.find_objects_by_type(6); + + for (index, (_archive_name, object)) in alt_slide_objects_5 + .iter() + .chain(alt_slide_objects_6.iter()) + .enumerate() + { + let slide = self.parse_slide(index, object)?; + if !slide.is_empty() { + slides.push(slide); + } + } + } else { + for (index, (_archive_name, object)) in slide_objects.iter().enumerate() { + let slide = self.parse_slide(index, object)?; + if !slide.is_empty() { + slides.push(slide); + } + } + } + + // If no slides found, create a default slide with all text + if slides.is_empty() { + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(&self.bundle)?; + + if extractor.storage_count() > 0 { + let mut slide = KeynoteSlide::new(0); + for storage in extractor.storages() { + if !storage.is_empty() { + slide.text_storages.push(storage.clone()); + slide.text_content.push(storage.plain_text().to_string()); + } + } + if !slide.is_empty() { + slides.push(slide); + } + } + } + + Ok(slides) + } + + /// Parse a single slide from an object + fn parse_slide( + &self, + index: usize, + object: &crate::archive::ArchiveObject, + ) -> Result { + use prost::Message; + + let mut slide = KeynoteSlide::new(index); + + // Extract text content from the slide object + let text_parts = object.extract_text(); + + if !text_parts.is_empty() { + // First text part is typically the title or slide name + slide.title = text_parts.first().cloned(); + + // Remaining parts are content + slide.text_content = text_parts.into_iter().skip(1).collect(); + } + + // Parse the SlideArchive protobuf message + // KN.SlideArchive contains: + // - name: string (slide title) + // - note: reference to KN.NoteArchive (speaker notes) + // - drawables: references to drawable objects (shapes, text boxes, images) + // - builds: references to KN.BuildArchive (animations) + // - transition: TransitionArchive (transition effect) + // - master: reference to master slide + + if let Some(raw_message) = object.messages.first() { + // Try to decode as SlideArchive + if let Ok(slide_archive) = crate::protobuf::kn::SlideArchive::decode(&*raw_message.data) + { + // Extract slide name if available + if let Some(ref name) = slide_archive.name + && !name.is_empty() + { + slide.title = Some(name.clone()); + } + + // Extract master slide reference + if let Some(ref master) = slide_archive.master { + slide.master_slide_id = Some(master.identifier); + } + + // Extract build animations + for build_ref in &slide_archive.builds { + if let Ok(build) = self.extract_build_animation(build_ref.identifier) { + slide.builds.push(build); + } + } + + // Extract transition + slide.transition = self.parse_transition(&slide_archive.transition); + + // Resolve drawable references to get text boxes and other content + for drawable_ref in &slide_archive.drawables { + if let Ok(text_content) = self.extract_drawable_text(drawable_ref.identifier) + && !text_content.is_empty() + { + slide.text_content.push(text_content); + } + } + + // Extract speaker notes + if let Some(ref note_ref) = slide_archive.note + && let Ok(notes) = self.extract_speaker_notes(note_ref.identifier) + { + slide.notes = Some(notes); + } + } + } + + // Extract text from text storages + let extractor = TextExtractor::new(); + if let Ok(storage) = extractor.extract_from_object(object) + && !storage.is_empty() + { + slide.text_storages.push(storage); + } + + Ok(slide) + } + + /// Extract build animation from a BuildArchive object + fn extract_build_animation(&self, build_id: u64) -> Result { + use super::slide::{BuildAnimation, BuildAnimationType}; + use prost::Message; + + if let Some(resolved) = self.object_index.resolve_object(&self.bundle, build_id)? { + for msg in &resolved.messages { + if let Ok(build_archive) = crate::protobuf::kn::BuildArchive::decode(&*msg.data) { + let animation_type = Self::parse_build_delivery(&build_archive.delivery); + let target_id = Some(build_archive.drawable.identifier); + let duration = build_archive.duration as f32; + + return Ok(BuildAnimation { + animation_type, + target_id, + duration, + }); + } + } + } + + // Return a default build if parsing failed + Ok(BuildAnimation { + animation_type: BuildAnimationType::Other, + target_id: None, + duration: 0.0, + }) + } + + /// Parse build delivery string into animation type + fn parse_build_delivery(delivery: &str) -> super::slide::BuildAnimationType { + use super::slide::BuildAnimationType; + + match delivery.to_lowercase().as_str() { + s if s.contains("appear") => BuildAnimationType::Appear, + s if s.contains("dissolve") => BuildAnimationType::Dissolve, + s if s.contains("move") => BuildAnimationType::MoveIn, + s if s.contains("scale") && s.contains("fade") => BuildAnimationType::FadeAndScale, + s if s.contains("scale") => BuildAnimationType::Scale, + _ => BuildAnimationType::Other, + } + } + + /// Parse transition archive into slide transition + fn parse_transition( + &self, + transition: &crate::protobuf::kn::TransitionArchive, + ) -> Option { + use super::slide::{SlideTransition, TransitionType}; + + // Extract duration from attributes + // The attributes field is required (not Optional) + let duration = transition.attributes.database_duration.unwrap_or(0.0) as f32; + + // Determine transition type from attributes + // The actual transition type is embedded in the attributes structure + // For now, we use a generic transition type + let transition_type = TransitionType::Other; + + Some(SlideTransition { + transition_type, + duration, + }) + } + + /// Extract text content from a drawable object + fn extract_drawable_text(&self, drawable_id: u64) -> Result { + use prost::Message; + + if let Some(resolved) = self + .object_index + .resolve_object(&self.bundle, drawable_id)? + { + // Drawables can contain text storages + for msg in &resolved.messages { + // Try to extract text from TSWP storage messages (types 2001-2022) + if msg.type_ >= 2001 + && msg.type_ <= 2022 + && let Ok(storage) = crate::protobuf::tswp::StorageArchive::decode(&*msg.data) + && !storage.text.is_empty() + { + return Ok(storage.text.join(" ")); + } + } + + // Also try generic text extraction from the resolved object + for msg in &resolved.messages { + if let Ok(storage) = crate::protobuf::tswp::StorageArchive::decode(&*msg.data) + && !storage.text.is_empty() + { + return Ok(storage.text.join(" ")); + } + } + } + + Ok(String::new()) + } + + /// Extract speaker notes from a NoteArchive object + fn extract_speaker_notes(&self, note_id: u64) -> Result { + use prost::Message; + + if let Some(resolved) = self.object_index.resolve_object(&self.bundle, note_id)? { + for msg in &resolved.messages { + if let Ok(note_archive) = crate::protobuf::kn::NoteArchive::decode(&*msg.data) { + // The note contains a reference to a TSWP.StorageArchive + let storage_id = note_archive.contained_storage.identifier; + if let Some(storage_obj) = + self.object_index.resolve_object(&self.bundle, storage_id)? + { + for storage_msg in &storage_obj.messages { + if let Ok(storage) = + crate::protobuf::tswp::StorageArchive::decode(&*storage_msg.data) + { + let notes_text = storage.text.join("\n"); + if !notes_text.is_empty() { + return Ok(notes_text); + } + } + } + } + } + } + } + + Ok(String::new()) + } + + /// Extract presentation metadata. + /// + /// Returns metadata from the Keynote bundle's Properties.plist file. + /// This includes document properties like title, author, creation date, etc. + /// + /// # Performance + /// + /// This method performs minimal parsing, extracting only standard metadata + /// fields from the bundle's Properties.plist. The metadata is not cached + /// within KeynoteDocument to avoid duplication with the Presentation cache. + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::keynote::KeynoteDocument; + /// + /// let doc = KeynoteDocument::open("presentation.key")?; + /// if let Some(metadata) = doc.metadata()? { + /// if let Some(title) = metadata.title { + /// println!("Title: {}", title); + /// } + /// if let Some(author) = metadata.author { + /// println!("Author: {}", author); + /// } + /// } + /// # Ok::<(), Box>(()) + /// ``` + #[allow(unused_assignments)] // has_data is intentionally reassigned to track if any field was set + pub fn metadata(&self) -> Result> { + let bundle_metadata = self.bundle.metadata(); + + // Extract standard metadata fields from Properties.plist and bundle structure + let mut metadata = litchi_core::Metadata::default(); + let mut has_data = false; + + // Extract title (Keynote may store in show structure, try there first) + let show_title = self.show().ok().and_then(|show| show.title); + if let Some(title) = show_title { + metadata.title = Some(title); + has_data = true; + } + + // Try alternative title keys from Properties.plist + if metadata.title.is_none() { + if let Some(title) = bundle_metadata.get_property_string("Title") { + metadata.title = Some(title); + has_data = true; + } else if let Some(title) = bundle_metadata.get_property_string("kDocumentTitleKey") { + metadata.title = Some(title); + has_data = true; + } + } + + // Extract author + if let Some(author) = bundle_metadata.get_property_string("Author") { + metadata.author = Some(author); + has_data = true; + } else if let Some(author) = bundle_metadata.get_property_string("kDocumentAuthorKey") { + metadata.author = Some(author); + has_data = true; + } else if let Some(author) = bundle_metadata.get_property_string("kSFWPAuthorPropertyKey") { + metadata.author = Some(author); + has_data = true; + } + + // Extract keywords + if let Some(keywords) = bundle_metadata.get_property_string("Keywords") { + metadata.keywords = Some(keywords); + has_data = true; + } + + // Extract comments/description + if let Some(comments) = bundle_metadata.get_property_string("Comments") { + metadata.description = Some(comments); + has_data = true; + } + + // Extract application name (Keynote applications) + if let Some(app) = bundle_metadata.detected_application.as_ref() { + metadata.application = Some(app.clone()); + has_data = true; + } else { + // Default to Keynote if not detected + metadata.application = Some("Keynote".to_string()); + has_data = true; + } + + // Extract revision from Properties.plist + if let Some(revision) = bundle_metadata.get_property_string("revision") { + metadata.revision = Some(revision); + has_data = true; + } + + // Extract build version as additional version info + if let Some(version) = bundle_metadata.latest_build_version() { + // If we don't have revision yet, use build version + if metadata.revision.is_none() { + metadata.revision = Some(version.to_string()); + has_data = true; + } + } + + // Extract file format version + if let Some(format_version) = bundle_metadata.get_property_string("fileFormatVersion") { + // Store in content_status as it doesn't have a perfect mapping + metadata.content_status = Some(format!("Keynote Format Version {}", format_version)); + has_data = true; + } + + // Note: User-facing metadata like creation date, modification date, etc. + // are typically stored in DocumentMetadata.iwa or Metadata.iwa files, + // which would require additional IWA parsing. The current implementation + // extracts what's readily available from Properties.plist and show structure. + + // If we found any metadata, return it + if has_data { + Ok(Some(metadata)) + } else { + Ok(None) + } + } + + /// Extract the full show structure with all slides + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::keynote::KeynoteDocument; + /// + /// # fn main() -> Result<(), Box> { + /// let doc = KeynoteDocument::open("presentation.key")?; + /// let show = doc.show()?; + /// + /// println!("Presentation: {}", show.title.as_deref().unwrap_or_default()); + /// println!("Slides: {}", show.slide_count()); + /// # Ok::<(), Box>(()) + /// # } + /// ``` + pub fn show(&self) -> Result { + let mut show = KeynoteShow::new(); + + // Extract show metadata from ShowArchive (message type 2 is KN.ShowArchive) + let show_objects = self.bundle.find_objects_by_type(1101); + if let Some((_archive_name, object)) = show_objects.first() { + let text_parts = object.extract_text(); + show.title = text_parts.first().cloned(); + } + + // Add all slides + let slides = self.slides()?; + for slide in slides { + show.add_slide(slide); + } + + Ok(show) + } + + /// Get the underlying bundle + pub fn bundle(&self) -> &Bundle { + &self.bundle + } + + /// Get the object index + pub fn object_index(&self) -> &ObjectIndex { + &self.object_index + } + + /// Get document statistics + pub fn stats(&self) -> KeynoteDocumentStats { + let total_objects = self.object_index.all_object_ids().len(); + let slides_result = self.slides(); + let slide_count = slides_result.as_ref().map(|s| s.len()).unwrap_or(0); + + KeynoteDocumentStats { + total_objects, + slide_count, + application: Application::Keynote, + } + } +} + +/// Statistics about a Keynote document +#[derive(Debug, Clone)] +pub struct KeynoteDocumentStats { + /// Total number of objects + pub total_objects: usize, + /// Number of slides + pub slide_count: usize, + /// Application type (always Keynote) + pub application: Application, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_keynote_document_open() { + let doc_path = std::path::Path::new("test.key"); + if !doc_path.exists() { + // Skip test if test file doesn't exist + return; + } + + let doc_result = KeynoteDocument::open(doc_path); + assert!( + doc_result.is_ok(), + "Failed to open Keynote document: {:?}", + doc_result.err() + ); + + let doc = doc_result.unwrap(); + assert!(!doc.object_index.all_object_ids().is_empty()); + } + + #[test] + fn test_keynote_text_extraction() { + let doc_path = std::path::Path::new("test.key"); + if !doc_path.exists() { + return; + } + + let doc = KeynoteDocument::open(doc_path).unwrap(); + let text_result = doc.text(); + assert!(text_result.is_ok()); + } + + #[test] + fn test_keynote_slides() { + let doc_path = std::path::Path::new("test.key"); + if !doc_path.exists() { + return; + } + + let doc = KeynoteDocument::open(doc_path).unwrap(); + let slides_result = doc.slides(); + assert!(slides_result.is_ok()); + + let slides = slides_result.unwrap(); + // Presentation should have at least one slide + assert!( + !slides.is_empty(), + "Presentation should have at least one slide" + ); + } + + #[test] + fn test_keynote_show() { + let doc_path = std::path::Path::new("test.key"); + if !doc_path.exists() { + return; + } + + let doc = KeynoteDocument::open(doc_path).unwrap(); + let show_result = doc.show(); + assert!(show_result.is_ok()); + + let show = show_result.unwrap(); + assert!(!show.is_empty(), "Show should have slides"); + } +} diff --git a/crates/litchi-iwa/src/keynote/mod.rs b/crates/litchi-iwa/src/keynote/mod.rs new file mode 100644 index 0000000..639d852 --- /dev/null +++ b/crates/litchi-iwa/src/keynote/mod.rs @@ -0,0 +1,39 @@ +//! Keynote Presentation Support +//! +//! This module provides comprehensive support for parsing Apple Keynote presentations, +//! including slide extraction, build animations, and multimedia content. +//! +//! ## Features +//! +//! - Slide extraction with content +//! - Master slide identification +//! - Build animations and transitions +//! - Speaker notes +//! - Multimedia references +//! +//! ## Example +//! +//! ```rust,no_run +//! use litchi_iwa::keynote::KeynoteDocument; +//! +//! let doc = KeynoteDocument::open("presentation.key")?; +//! let slides = doc.slides()?; +//! +//! for slide in slides { +//! if let Some(title) = &slide.title { +//! println!("Slide {}: {}", slide.index + 1, title); +//! } +//! for text in &slide.text_content { +//! println!(" - {}", text); +//! } +//! } +//! # Ok::<(), Box>(()) +//! ``` + +pub mod document; +pub mod show; +pub mod slide; + +pub use document::KeynoteDocument; +pub use show::KeynoteShow; +pub use slide::{BuildAnimation, KeynoteSlide, SlideTransition}; diff --git a/src/iwa/keynote/show.rs b/crates/litchi-iwa/src/keynote/show.rs similarity index 100% rename from src/iwa/keynote/show.rs rename to crates/litchi-iwa/src/keynote/show.rs diff --git a/crates/litchi-iwa/src/keynote/slide.rs b/crates/litchi-iwa/src/keynote/slide.rs new file mode 100644 index 0000000..2916cff --- /dev/null +++ b/crates/litchi-iwa/src/keynote/slide.rs @@ -0,0 +1,220 @@ +//! Keynote Slide Structure +//! +//! Slides are the core content units in Keynote presentations. + +use crate::text::TextStorage; + +/// Represents a slide in a Keynote presentation +#[derive(Debug, Clone)] +pub struct KeynoteSlide { + /// Slide index (0-based) + pub index: usize, + /// Slide title + pub title: Option, + /// Text content on the slide (bullet points, text boxes) + pub text_content: Vec, + /// Speaker notes associated with the slide + pub notes: Option, + /// Text storages in this slide + pub text_storages: Vec, + /// Build animations on this slide + pub builds: Vec, + /// Slide transition + pub transition: Option, + /// Master slide reference + pub master_slide_id: Option, +} + +impl KeynoteSlide { + /// Create a new slide + pub fn new(index: usize) -> Self { + Self { + index, + title: None, + text_content: Vec::new(), + notes: None, + text_storages: Vec::new(), + builds: Vec::new(), + transition: None, + master_slide_id: None, + } + } + + /// Get all text from the slide (title + content + notes) + pub fn all_text(&self) -> Vec { + let mut all = Vec::new(); + if let Some(ref title) = self.title { + all.push(title.clone()); + } + all.extend(self.text_content.clone()); + if let Some(ref notes) = self.notes { + all.push(notes.clone()); + } + + // Include text from storages + for storage in &self.text_storages { + let text = storage.plain_text(); + if !text.is_empty() { + all.push(text.to_string()); + } + } + + all + } + + /// Get plain text content as a single string + pub fn plain_text(&self) -> String { + self.all_text().join("\n") + } + + /// Check if slide is empty + pub fn is_empty(&self) -> bool { + self.title.is_none() + && self.text_content.is_empty() + && self.notes.is_none() + && self.text_storages.is_empty() + } + + /// Get number of build animations + pub fn build_count(&self) -> usize { + self.builds.len() + } +} + +/// Represents a build animation on a slide +#[derive(Debug, Clone)] +pub struct BuildAnimation { + /// Animation type + pub animation_type: BuildAnimationType, + /// Target object reference + pub target_id: Option, + /// Animation duration (in seconds) + pub duration: f32, +} + +/// Types of build animations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BuildAnimationType { + /// Appear + Appear, + /// Dissolve + Dissolve, + /// Move in + MoveIn, + /// Scale + Scale, + /// Fade and scale + FadeAndScale, + /// Other/unknown + Other, +} + +impl BuildAnimationType { + /// Get a human-readable name + pub fn name(&self) -> &'static str { + match self { + Self::Appear => "Appear", + Self::Dissolve => "Dissolve", + Self::MoveIn => "Move In", + Self::Scale => "Scale", + Self::FadeAndScale => "Fade and Scale", + Self::Other => "Other", + } + } +} + +/// Represents a slide transition effect +#[derive(Debug, Clone)] +pub struct SlideTransition { + /// Transition type + pub transition_type: TransitionType, + /// Transition duration (in seconds) + pub duration: f32, +} + +/// Types of slide transitions +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TransitionType { + /// No transition + None, + /// Dissolve + Dissolve, + /// Push + Push, + /// Wipe + Wipe, + /// Flip + Flip, + /// Cube + Cube, + /// Other/unknown + Other, +} + +impl TransitionType { + /// Get a human-readable name + pub fn name(&self) -> &'static str { + match self { + Self::None => "None", + Self::Dissolve => "Dissolve", + Self::Push => "Push", + Self::Wipe => "Wipe", + Self::Flip => "Flip", + Self::Cube => "Cube", + Self::Other => "Other", + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_slide_creation() { + let mut slide = KeynoteSlide::new(0); + assert_eq!(slide.index, 0); + assert!(slide.is_empty()); + + slide.title = Some("Introduction".to_string()); + slide.text_content.push("Point 1".to_string()); + slide.text_content.push("Point 2".to_string()); + + assert!(!slide.is_empty()); + let text = slide.plain_text(); + assert!(text.contains("Introduction")); + assert!(text.contains("Point 1")); + } + + #[test] + fn test_slide_all_text() { + let mut slide = KeynoteSlide::new(0); + slide.title = Some("Title".to_string()); + slide.text_content.push("Content".to_string()); + slide.notes = Some("Notes".to_string()); + slide + .text_storages + .push(TextStorage::from_text("Storage".to_string())); + + let all_text = slide.all_text(); + assert_eq!(all_text.len(), 4); + assert_eq!(all_text[0], "Title"); + assert_eq!(all_text[1], "Content"); + assert_eq!(all_text[2], "Notes"); + assert_eq!(all_text[3], "Storage"); + } + + #[test] + fn test_build_animation_type_names() { + assert_eq!(BuildAnimationType::Appear.name(), "Appear"); + assert_eq!(BuildAnimationType::Dissolve.name(), "Dissolve"); + assert_eq!(BuildAnimationType::MoveIn.name(), "Move In"); + } + + #[test] + fn test_transition_type_names() { + assert_eq!(TransitionType::None.name(), "None"); + assert_eq!(TransitionType::Dissolve.name(), "Dissolve"); + assert_eq!(TransitionType::Push.name(), "Push"); + } +} diff --git a/crates/litchi-iwa/src/lib.rs b/crates/litchi-iwa/src/lib.rs new file mode 100644 index 0000000..f52b4ab --- /dev/null +++ b/crates/litchi-iwa/src/lib.rs @@ -0,0 +1,201 @@ +//! iWork Archive Format Support +//! +//! This module provides comprehensive support for parsing Apple's iWork file formats +//! (Pages, Keynote, Numbers) which use the IWA (iWork Archive) format. +//! +//! ## Quick Start +//! +//! ```rust,no_run +//! use litchi_iwa::Document; +//! +//! // Open an iWork document +//! let doc = Document::open("document.pages")?; +//! +//! // Extract text content +//! let text = doc.text()?; +//! println!("{}", text); +//! +//! // Get document statistics +//! let stats = doc.stats(); +//! println!("Objects: {}", stats.total_objects); +//! println!("Application: {:?}", stats.application); +//! +//! // Extract structured data (tables, slides, sections) +//! let structured = doc.extract_structured_data()?; +//! println!("{}", structured.summary()); +//! # Ok::<(), litchi_iwa::Error>(()) +//! ``` +//! +//! ## iWork File Structure +//! +//! iWork documents are bundles containing: +//! - `Index.zip`: Contains IWA files with serialized objects +//! - `Data/`: Directory containing media assets (images, videos, audio) +//! - `Metadata/`: Document metadata and properties +//! - Preview images at root level +//! +//! ## IWA Format +//! +//! Each `.iwa` file contains: +//! - Snappy-compressed data (custom framing without stream identifier) +//! - Protobuf-encoded messages +//! - Variable-length integers for message lengths +//! - ArchiveInfo and MessageInfo headers for metadata +//! +//! ## Features +//! +//! ### Text Extraction +//! - Automatic extraction from TSWP storage messages +//! - Support for all iWork applications +//! - Preserves document structure +//! +//! ### Media Management +//! - Automatic media asset discovery +//! - Support for images, videos, audio, PDFs +//! - Media extraction and statistics +//! +//! ### Structured Data +//! - Tables from Numbers (with CSV export) +//! - Slides from Keynote (with titles and content) +//! - Sections from Pages (with headings and paragraphs) +//! +//! ### Parsing from Bytes +//! - No file system access required +//! - Direct memory parsing +//! - Useful for web services and embedded systems +//! +//! ## Examples +//! +//! ### Parse from bytes +//! +//! ```rust,no_run +//! use litchi_iwa::Document; +//! use std::fs; +//! +//! let bytes = fs::read("document.pages")?; +//! let doc = Document::from_bytes(&bytes)?; +//! let text = doc.text()?; +//! # Ok::<(), Box>(()) +//! ``` +//! +//! ### Extract media +//! +//! ```rust,no_run +//! use litchi_iwa::Document; +//! +//! let doc = Document::open("presentation.key")?; +//! +//! // Get media statistics +//! if let Some(stats) = doc.media_stats() { +//! println!("Media: {}", stats.summary()); +//! } +//! +//! // Extract specific media file +//! if let Ok(data) = doc.extract_media("image.png") { +//! std::fs::write("extracted.png", data)?; +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! ### Extract tables +//! +//! ```rust,no_run +//! use litchi_iwa::Document; +//! +//! let doc = Document::open("spreadsheet.numbers")?; +//! let structured = doc.extract_structured_data()?; +//! +//! for table in &structured.tables { +//! let csv = table.to_csv(); +//! println!("Table: {}\n{}", table.name, csv); +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! ## Performance +//! +//! The implementation is optimized for: +//! - Fast decompression (50-100 MB/s per core) +//! - Efficient parsing (100-200 MB/s per core) +//! - Low memory overhead (~2-3x document size) +//! - O(1) message type lookups (perfect hash maps) +//! +//! ## Reference +//! +//! This implementation is based on: +//! - `libetonyek` - C++ library from Document Liberation Project +//! - `pyiwa` - Python iWork format reader +//! - `iWorkFileFormat` - Reverse-engineered format documentation + +// Core parsing modules +pub mod archive; +pub mod bundle; +pub mod media; +pub mod object_index; +pub mod protobuf; +pub mod ref_graph; +pub mod registry; +pub mod snappy; +pub mod structured; +pub mod varint; +pub mod zip_utils; + +/// Shared text extraction utilities +pub mod text; + +/// High-level iWork document types +pub mod document; + +pub mod keynote; +pub mod numbers; +/// Application-specific modules +pub mod pages; + +pub mod charts; +/// Cross-application content extractors +pub mod shapes; + +/// Re-export commonly used types +pub use archive::{ArchiveInfo, MessageInfo}; +pub use bundle::{Bundle, BundleMetadata, PropertyValue}; +pub use document::Document; +pub use media::{MediaAsset, MediaManager, MediaStats, MediaType}; +pub use ref_graph::ReferenceGraph; +pub use snappy::SnappyStream; +pub use structured::{CellValue, Section, Slide, StructuredData, Table}; +pub use text::{ParagraphStyle, TextExtractor, TextFragment, TextStorage, TextStyle}; +pub use zip_utils::{ + FileStructureInfo, analyze_file_structure, extract_message_types_from_archive, + parse_iwa_files_from_archive, +}; + +/// Error types for iWork parsing +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + #[error("Invalid IWA format: {0}")] + InvalidFormat(String), + + #[error("Snappy decompression error: {0}")] + Snappy(String), + + #[error("Protobuf decoding error: {0}")] + ProtobufDecode(#[from] prost::DecodeError), + + #[error("Unsupported message type: {0}")] + UnsupportedMessageType(u32), + + #[error("Archive parsing error: {0}")] + Archive(String), + + #[error("Bundle structure error: {0}")] + Bundle(String), + + #[error("Parse error: {0}")] + ParseError(String), +} + +/// Result type alias +pub type Result = std::result::Result; diff --git a/crates/litchi-iwa/src/media.rs b/crates/litchi-iwa/src/media.rs new file mode 100644 index 0000000..8b1b60c --- /dev/null +++ b/crates/litchi-iwa/src/media.rs @@ -0,0 +1,374 @@ +//! Media Asset Management for iWork Documents +//! +//! iWork documents store media assets (images, videos, audio) in the Data/ +//! directory within the bundle. This module provides utilities for extracting +//! and managing these media files. + +use std::collections::HashMap; +use std::fs; +use std::io::Read; +use std::path::{Path, PathBuf}; + +use crate::{Error, Result}; + +/// Types of media assets that can be found in iWork documents +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum MediaType { + /// Image file (PNG, JPEG, TIFF, etc.) + Image, + /// Video file (MP4, MOV, etc.) + Video, + /// Audio file (MP3, AAC, WAV, etc.) + Audio, + /// PDF document + Pdf, + /// Unknown or unsupported media type + Unknown, +} + +impl MediaType { + /// Detect media type from file extension + pub fn from_extension(ext: &str) -> Self { + match ext.to_lowercase().as_str() { + "png" | "jpg" | "jpeg" | "gif" | "tiff" | "tif" | "bmp" | "heic" | "heif" => { + MediaType::Image + }, + "mp4" | "mov" | "m4v" | "avi" | "mkv" => MediaType::Video, + "mp3" | "aac" | "m4a" | "wav" | "aiff" => MediaType::Audio, + "pdf" => MediaType::Pdf, + _ => MediaType::Unknown, + } + } + + /// Get a human-readable name for this media type + pub fn name(&self) -> &'static str { + match self { + MediaType::Image => "Image", + MediaType::Video => "Video", + MediaType::Audio => "Audio", + MediaType::Pdf => "PDF Document", + MediaType::Unknown => "Unknown", + } + } +} + +/// Information about a media asset +#[derive(Debug, Clone)] +pub struct MediaAsset { + /// Relative path within the bundle + pub path: PathBuf, + /// Media type + pub media_type: MediaType, + /// File size in bytes + pub size: u64, + /// File name without path + pub filename: String, +} + +impl MediaAsset { + /// Create a new media asset entry + pub fn new(path: PathBuf, size: u64) -> Self { + let filename = path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown") + .to_string(); + + let media_type = path + .extension() + .and_then(|e| e.to_str()) + .map(MediaType::from_extension) + .unwrap_or(MediaType::Unknown); + + Self { + path, + media_type, + size, + filename, + } + } + + /// Check if this is an image asset + pub fn is_image(&self) -> bool { + self.media_type == MediaType::Image + } + + /// Check if this is a video asset + pub fn is_video(&self) -> bool { + self.media_type == MediaType::Video + } + + /// Check if this is an audio asset + pub fn is_audio(&self) -> bool { + self.media_type == MediaType::Audio + } +} + +/// Manager for media assets in an iWork bundle +#[derive(Debug, Clone)] +pub struct MediaManager { + /// Bundle root path + bundle_path: PathBuf, + /// Map of media assets by filename + assets: HashMap, +} + +impl MediaManager { + /// Create a new media manager for a bundle + pub fn new>(bundle_path: P) -> Result { + let bundle_path = bundle_path.as_ref().to_path_buf(); + let mut assets = HashMap::new(); + + // Check if this is a directory bundle + if bundle_path.is_dir() { + Self::scan_directory_bundle(&bundle_path, &mut assets)?; + } + + Ok(Self { + bundle_path, + assets, + }) + } + + /// Scan a directory bundle for media assets + fn scan_directory_bundle( + bundle_path: &Path, + assets: &mut HashMap, + ) -> Result<()> { + let data_dir = bundle_path.join("Data"); + if !data_dir.exists() || !data_dir.is_dir() { + return Ok(()); // No Data directory is not an error + } + + Self::scan_directory_recursive(&data_dir, bundle_path, assets)?; + Ok(()) + } + + /// Recursively scan a directory for media files + fn scan_directory_recursive( + dir: &Path, + bundle_root: &Path, + assets: &mut HashMap, + ) -> Result<()> { + let entries = fs::read_dir(dir).map_err(Error::Io)?; + + for entry in entries { + let entry = entry.map_err(Error::Io)?; + let path = entry.path(); + + if path.is_dir() { + Self::scan_directory_recursive(&path, bundle_root, assets)?; + } else if path.is_file() + && let Ok(metadata) = fs::metadata(&path) + { + let relative_path = path + .strip_prefix(bundle_root) + .unwrap_or(&path) + .to_path_buf(); + + let asset = MediaAsset::new(relative_path.clone(), metadata.len()); + let filename = asset.filename.clone(); + assets.insert(filename, asset); + } + } + + Ok(()) + } + + /// Get all media assets + pub fn assets(&self) -> &HashMap { + &self.assets + } + + /// Get a media asset by filename + pub fn get(&self, filename: &str) -> Option<&MediaAsset> { + self.assets.get(filename) + } + + /// Get all assets of a specific type + pub fn assets_by_type(&self, media_type: MediaType) -> Vec<&MediaAsset> { + self.assets + .values() + .filter(|asset| asset.media_type == media_type) + .collect() + } + + /// Get all image assets + pub fn images(&self) -> Vec<&MediaAsset> { + self.assets_by_type(MediaType::Image) + } + + /// Get all video assets + pub fn videos(&self) -> Vec<&MediaAsset> { + self.assets_by_type(MediaType::Video) + } + + /// Get all audio assets + pub fn audio(&self) -> Vec<&MediaAsset> { + self.assets_by_type(MediaType::Audio) + } + + /// Extract a media asset to a byte vector + pub fn extract(&self, filename: &str) -> Result> { + let asset = self + .get(filename) + .ok_or_else(|| Error::Bundle(format!("Media asset not found: {}", filename)))?; + + let full_path = self.bundle_path.join(&asset.path); + let mut file = fs::File::open(&full_path).map_err(Error::Io)?; + let mut data = Vec::new(); + file.read_to_end(&mut data).map_err(Error::Io)?; + Ok(data) + } + + /// Extract a media asset to a file + pub fn extract_to_file(&self, filename: &str, output_path: &Path) -> Result<()> { + let data = self.extract(filename)?; + fs::write(output_path, data).map_err(Error::Io)?; + Ok(()) + } + + /// Get media statistics + pub fn stats(&self) -> MediaStats { + let mut stats = MediaStats { + total_count: self.assets.len(), + total_size: 0, + image_count: 0, + video_count: 0, + audio_count: 0, + pdf_count: 0, + unknown_count: 0, + }; + + for asset in self.assets.values() { + stats.total_size += asset.size; + match asset.media_type { + MediaType::Image => stats.image_count += 1, + MediaType::Video => stats.video_count += 1, + MediaType::Audio => stats.audio_count += 1, + MediaType::Pdf => stats.pdf_count += 1, + MediaType::Unknown => stats.unknown_count += 1, + } + } + + stats + } +} + +/// Statistics about media assets in a bundle +#[derive(Debug, Clone, Default)] +pub struct MediaStats { + /// Total number of media assets + pub total_count: usize, + /// Total size of all media assets in bytes + pub total_size: u64, + /// Number of image files + pub image_count: usize, + /// Number of video files + pub video_count: usize, + /// Number of audio files + pub audio_count: usize, + /// Number of PDF files + pub pdf_count: usize, + /// Number of unknown/unsupported files + pub unknown_count: usize, +} + +impl MediaStats { + /// Format the total size as a human-readable string + pub fn total_size_human(&self) -> String { + format_bytes(self.total_size) + } + + /// Get a summary string of the media statistics + pub fn summary(&self) -> String { + format!( + "{} files ({}) - {} images, {} videos, {} audio, {} PDFs", + self.total_count, + self.total_size_human(), + self.image_count, + self.video_count, + self.audio_count, + self.pdf_count + ) + } +} + +/// Format a byte count as a human-readable string +fn format_bytes(bytes: u64) -> String { + const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"]; + let mut size = bytes as f64; + let mut unit_index = 0; + + while size >= 1024.0 && unit_index < UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; + } + + format!("{:.2} {}", size, UNITS[unit_index]) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_media_type_detection() { + assert_eq!(MediaType::from_extension("png"), MediaType::Image); + assert_eq!(MediaType::from_extension("PNG"), MediaType::Image); + assert_eq!(MediaType::from_extension("jpg"), MediaType::Image); + assert_eq!(MediaType::from_extension("jpeg"), MediaType::Image); + assert_eq!(MediaType::from_extension("mp4"), MediaType::Video); + assert_eq!(MediaType::from_extension("mov"), MediaType::Video); + assert_eq!(MediaType::from_extension("mp3"), MediaType::Audio); + assert_eq!(MediaType::from_extension("pdf"), MediaType::Pdf); + assert_eq!(MediaType::from_extension("unknown"), MediaType::Unknown); + } + + #[test] + fn test_format_bytes() { + assert_eq!(format_bytes(0), "0.00 B"); + assert_eq!(format_bytes(1024), "1.00 KB"); + assert_eq!(format_bytes(1024 * 1024), "1.00 MB"); + assert_eq!(format_bytes(1536 * 1024), "1.50 MB"); + } + + #[test] + fn test_media_asset_creation() { + let path = PathBuf::from("Data/image.png"); + let asset = MediaAsset::new(path, 1024); + + assert_eq!(asset.filename, "image.png"); + assert_eq!(asset.media_type, MediaType::Image); + assert_eq!(asset.size, 1024); + assert!(asset.is_image()); + assert!(!asset.is_video()); + } + + #[test] + fn test_media_manager_with_pages() { + let bundle_path = std::path::Path::new("test.pages"); + if !bundle_path.exists() { + return; // Skip test if file doesn't exist + } + + let manager = MediaManager::new(bundle_path); + if let Ok(manager) = manager { + let stats = manager.stats(); + println!("Media stats: {}", stats.summary()); + + // Check if we found any media + if stats.total_count > 0 { + println!("Found {} media assets", stats.total_count); + for (filename, asset) in manager.assets() { + println!( + " - {}: {} ({})", + filename, + asset.media_type.name(), + format_bytes(asset.size) + ); + } + } + } + } +} diff --git a/src/iwa/numbers/cell.rs b/crates/litchi-iwa/src/numbers/cell.rs similarity index 100% rename from src/iwa/numbers/cell.rs rename to crates/litchi-iwa/src/numbers/cell.rs diff --git a/crates/litchi-iwa/src/numbers/document.rs b/crates/litchi-iwa/src/numbers/document.rs new file mode 100644 index 0000000..0f9fc08 --- /dev/null +++ b/crates/litchi-iwa/src/numbers/document.rs @@ -0,0 +1,322 @@ +//! Numbers Document Implementation +//! +//! Provides high-level API for working with Apple Numbers spreadsheets. + +use std::path::Path; + +use super::sheet::NumbersSheet; +use super::table::NumbersTable; +use crate::Result; +use crate::bundle::Bundle; +use crate::object_index::ObjectIndex; +use crate::registry::Application; +use crate::text::TextExtractor; + +/// High-level interface for Numbers documents +pub struct NumbersDocument { + /// Underlying bundle + bundle: Bundle, + /// Object index for cross-referencing + object_index: ObjectIndex, +} + +impl NumbersDocument { + /// Open a Numbers document from a path + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::numbers::NumbersDocument; + /// + /// let doc = NumbersDocument::open("spreadsheet.numbers")?; + /// println!("Loaded Numbers document"); + /// # Ok::<(), Box>(()) + /// ``` + pub fn open>(path: P) -> Result { + let bundle = Bundle::open(path)?; + let object_index = ObjectIndex::from_bundle(&bundle)?; + + Ok(Self { + bundle, + object_index, + }) + } + + /// Open a Numbers document from raw bytes + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::numbers::NumbersDocument; + /// use std::fs; + /// + /// let data = fs::read("spreadsheet.numbers")?; + /// let doc = NumbersDocument::from_bytes(&data)?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_bytes(bytes: &[u8]) -> Result { + let bundle = Bundle::from_bytes(bytes)?; + let object_index = ObjectIndex::from_bundle(&bundle)?; + + Ok(Self { + bundle, + object_index, + }) + } + + /// Create a Numbers document from raw bytes (ZIP archive data). + /// + /// This is used for single-pass parsing where the ZIP archive has already + /// been validated during format detection. It avoids double-parsing. + pub fn from_archive_bytes(bytes: &[u8]) -> Result { + Self::from_bytes(bytes) + } + + /// Extract all text content from the document + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::numbers::NumbersDocument; + /// + /// let doc = NumbersDocument::open("spreadsheet.numbers")?; + /// let text = doc.text()?; + /// println!("{}", text); + /// # Ok::<(), Box>(()) + /// ``` + pub fn text(&self) -> Result { + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(&self.bundle)?; + Ok(extractor.get_text()) + } + + /// Extract sheets from the document + /// + /// Numbers documents consist of multiple sheets, each containing tables. + /// This method parses the document structure and returns all sheets. + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::numbers::NumbersDocument; + /// + /// let doc = NumbersDocument::open("spreadsheet.numbers")?; + /// let sheets = doc.sheets()?; + /// + /// for sheet in sheets { + /// println!("Sheet: {}", sheet.name); + /// for table in &sheet.tables { + /// println!(" Table: {} ({}x{})", + /// table.name, table.row_count, table.column_count); + /// } + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn sheets(&self) -> Result> { + let mut sheets = Vec::new(); + + // Find sheet archives (message type 2 is TN.SheetArchive, type 1003 in our decoder) + let sheet_objects = self.bundle.find_objects_by_type(1003); + + if sheet_objects.is_empty() { + // Try alternate sheet message type (TN.SheetArchive from JSON) + let alt_sheet_objects = self.bundle.find_objects_by_type(2); + + for (index, (_archive_name, object)) in alt_sheet_objects.iter().enumerate() { + let sheet = self.parse_sheet(index, object)?; + if !sheet.is_empty() || !sheet.name.is_empty() { + sheets.push(sheet); + } + } + } else { + for (index, (_archive_name, object)) in sheet_objects.iter().enumerate() { + let sheet = self.parse_sheet(index, object)?; + if !sheet.is_empty() || !sheet.name.is_empty() { + sheets.push(sheet); + } + } + } + + // If no sheets found, try to extract tables directly + if sheets.is_empty() { + let tables = self.extract_all_tables()?; + if !tables.is_empty() { + let mut default_sheet = NumbersSheet::new("Sheet 1".to_string(), 0); + for table in tables { + default_sheet.add_table(table); + } + sheets.push(default_sheet); + } + } + + Ok(sheets) + } + + /// Parse a single sheet from an object + fn parse_sheet( + &self, + index: usize, + object: &crate::archive::ArchiveObject, + ) -> Result { + use prost::Message; + + // Extract sheet name from decoded messages + let text_parts = object.extract_text(); + let sheet_name = text_parts + .first() + .cloned() + .unwrap_or_else(|| format!("Sheet {}", index + 1)); + + let mut sheet = NumbersSheet::new(sheet_name, index); + + // Parse the SheetArchive protobuf message to get table references + if let Some(raw_message) = object.messages.first() + && let Ok(sheet_archive) = crate::protobuf::tn::SheetArchive::decode(&*raw_message.data) + { + // Extract table references from drawable_infos + // Tables in Numbers are stored as drawables + for drawable_ref in &sheet_archive.drawable_infos { + if let Ok(table) = self.extract_table_from_drawable(drawable_ref.identifier) { + sheet.add_table(table); + } + } + } + + // Fallback: Extract all tables from the document if sheet has none + if sheet.table_count() == 0 { + let tables = self.extract_all_tables()?; + for table in tables { + sheet.add_table(table); + } + } + + Ok(sheet) + } + + /// Extract all tables from the document + fn extract_all_tables(&self) -> Result> { + use super::table_extractor::TableDataExtractor; + + let extractor = TableDataExtractor::new(&self.bundle, &self.object_index); + extractor.extract_all_tables() + } + + /// Extract a table from a drawable reference + fn extract_table_from_drawable(&self, drawable_id: u64) -> Result { + use prost::Message; + + if let Some(resolved) = self + .object_index + .resolve_object(&self.bundle, drawable_id)? + { + // Look for TableInfoArchive which wraps the table model + for msg in &resolved.messages { + if let Ok(table_info) = crate::protobuf::tst::TableInfoArchive::decode(&*msg.data) { + // The table_model field contains a reference to the TableModelArchive + let table_model_id = table_info.table_model.identifier; + return self.extract_table_from_model(table_model_id); + } + } + } + + Err(crate::Error::ParseError( + "Could not extract table from drawable".to_string(), + )) + } + + /// Extract a table from a TableModelArchive reference + fn extract_table_from_model(&self, table_model_id: u64) -> Result { + use super::table_extractor::TableDataExtractor; + + let extractor = TableDataExtractor::new(&self.bundle, &self.object_index); + + if let Some(resolved) = self + .object_index + .resolve_object(&self.bundle, table_model_id)? + && let Some(table) = extractor.extract_table_from_object(&resolved)? + { + return Ok(table); + } + + Err(crate::Error::ParseError( + "Could not extract table from model".to_string(), + )) + } + + /// Get the underlying bundle + pub fn bundle(&self) -> &Bundle { + &self.bundle + } + + /// Get the object index + pub fn object_index(&self) -> &ObjectIndex { + &self.object_index + } + + /// Get document statistics + pub fn stats(&self) -> NumbersDocumentStats { + let total_objects = self.object_index.all_object_ids().len(); + let sheets_result = self.sheets(); + let sheet_count = sheets_result.as_ref().map(|s| s.len()).unwrap_or(0); + let table_count = sheets_result + .as_ref() + .map(|sheets| sheets.iter().map(|s| s.table_count()).sum()) + .unwrap_or(0); + + NumbersDocumentStats { + total_objects, + sheet_count, + table_count, + application: Application::Numbers, + } + } +} + +/// Statistics about a Numbers document +#[derive(Debug, Clone)] +pub struct NumbersDocumentStats { + /// Total number of objects + pub total_objects: usize, + /// Number of sheets + pub sheet_count: usize, + /// Total number of tables across all sheets + pub table_count: usize, + /// Application type (always Numbers) + pub application: Application, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_numbers_document_open() { + let doc_path = std::path::Path::new("test.numbers"); + if !doc_path.exists() { + // Skip test if test file doesn't exist + return; + } + + let doc_result = NumbersDocument::open(doc_path); + assert!( + doc_result.is_ok(), + "Failed to open Numbers document: {:?}", + doc_result.err() + ); + + let doc = doc_result.unwrap(); + assert!(!doc.object_index.all_object_ids().is_empty()); + } + + #[test] + fn test_numbers_text_extraction() { + let doc_path = std::path::Path::new("test.numbers"); + if !doc_path.exists() { + return; + } + + let doc = NumbersDocument::open(doc_path).unwrap(); + let text_result = doc.text(); + assert!(text_result.is_ok()); + } +} diff --git a/crates/litchi-iwa/src/numbers/mod.rs b/crates/litchi-iwa/src/numbers/mod.rs new file mode 100644 index 0000000..fda23a4 --- /dev/null +++ b/crates/litchi-iwa/src/numbers/mod.rs @@ -0,0 +1,42 @@ +//! Numbers Spreadsheet Support +//! +//! This module provides comprehensive support for parsing Apple Numbers spreadsheets, +//! including table extraction, cell data parsing, and formula support. +//! +//! ## Features +//! +//! - Sheet extraction +//! - Table parsing with cell data +//! - Formula extraction +//! - CSV export +//! - Cell formatting information +//! +//! ## Example +//! +//! ```rust,no_run +//! use litchi_iwa::numbers::NumbersDocument; +//! +//! let doc = NumbersDocument::open("spreadsheet.numbers")?; +//! let sheets = doc.sheets()?; +//! +//! for sheet in sheets { +//! println!("Sheet: {}", sheet.name); +//! for table in &sheet.tables { +//! println!(" Table: {}", table.name); +//! println!("{}", table.to_csv()); +//! } +//! } +//! # Ok::<(), Box>(()) +//! ``` + +pub mod cell; +pub mod document; +pub mod sheet; +pub mod table; +pub mod table_extractor; + +pub use cell::{CellType, CellValue}; +pub use document::NumbersDocument; +pub use sheet::NumbersSheet; +pub use table::NumbersTable; +pub use table_extractor::TableDataExtractor; diff --git a/crates/litchi-iwa/src/numbers/sheet.rs b/crates/litchi-iwa/src/numbers/sheet.rs new file mode 100644 index 0000000..7b44188 --- /dev/null +++ b/crates/litchi-iwa/src/numbers/sheet.rs @@ -0,0 +1,123 @@ +//! Numbers Sheet Structure +//! +//! Sheets in Numbers documents contain multiple tables and other content. + +use super::table::NumbersTable; + +/// Represents a sheet in a Numbers document +#[derive(Debug, Clone)] +pub struct NumbersSheet { + /// Sheet name + pub name: String, + /// Sheet index (0-based) + pub index: usize, + /// Tables in this sheet + pub tables: Vec, +} + +impl NumbersSheet { + /// Create a new sheet + pub fn new(name: String, index: usize) -> Self { + Self { + name, + index, + tables: Vec::new(), + } + } + + /// Add a table to the sheet + pub fn add_table(&mut self, table: NumbersTable) { + self.tables.push(table); + } + + /// Get a table by name + pub fn get_table(&self, name: &str) -> Option<&NumbersTable> { + self.tables.iter().find(|t| t.name == name) + } + + /// Get a mutable reference to a table by name + pub fn get_table_mut(&mut self, name: &str) -> Option<&mut NumbersTable> { + self.tables.iter_mut().find(|t| t.name == name) + } + + /// Get all table names + pub fn table_names(&self) -> Vec { + self.tables.iter().map(|t| t.name.clone()).collect() + } + + /// Check if sheet is empty + pub fn is_empty(&self) -> bool { + self.tables.is_empty() + } + + /// Get total number of tables + pub fn table_count(&self) -> usize { + self.tables.len() + } + + /// Get total number of cells across all tables + pub fn total_cell_count(&self) -> usize { + self.tables + .iter() + .map(|t| t.row_count * t.column_count) + .sum() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::numbers::cell::CellValue; + + #[test] + fn test_sheet_creation() { + let sheet = NumbersSheet::new("Sheet1".to_string(), 0); + assert_eq!(sheet.name, "Sheet1"); + assert_eq!(sheet.index, 0); + assert!(sheet.is_empty()); + assert_eq!(sheet.table_count(), 0); + } + + #[test] + fn test_sheet_add_table() { + let mut sheet = NumbersSheet::new("Sheet1".to_string(), 0); + + let mut table = NumbersTable::new("Table1".to_string()); + table.set_cell(0, 0, CellValue::Number(1.0)); + + sheet.add_table(table); + + assert_eq!(sheet.table_count(), 1); + assert!(!sheet.is_empty()); + } + + #[test] + fn test_sheet_get_table() { + let mut sheet = NumbersSheet::new("Sheet1".to_string(), 0); + + let table1 = NumbersTable::new("Table1".to_string()); + let table2 = NumbersTable::new("Table2".to_string()); + + sheet.add_table(table1); + sheet.add_table(table2); + + assert!(sheet.get_table("Table1").is_some()); + assert!(sheet.get_table("Table2").is_some()); + assert!(sheet.get_table("Table3").is_none()); + } + + #[test] + fn test_sheet_table_names() { + let mut sheet = NumbersSheet::new("Sheet1".to_string(), 0); + + sheet.add_table(NumbersTable::new("Table1".to_string())); + sheet.add_table(NumbersTable::new("Table2".to_string())); + sheet.add_table(NumbersTable::new("Table3".to_string())); + + let names = sheet.table_names(); + assert_eq!(names.len(), 3); + assert!(names.contains(&"Table1".to_string())); + assert!(names.contains(&"Table2".to_string())); + assert!(names.contains(&"Table3".to_string())); + } +} diff --git a/src/iwa/numbers/table.rs b/crates/litchi-iwa/src/numbers/table.rs similarity index 100% rename from src/iwa/numbers/table.rs rename to crates/litchi-iwa/src/numbers/table.rs diff --git a/src/iwa/numbers/table_extractor.rs b/crates/litchi-iwa/src/numbers/table_extractor.rs similarity index 90% rename from src/iwa/numbers/table_extractor.rs rename to crates/litchi-iwa/src/numbers/table_extractor.rs index c81e56c..4fec889 100644 --- a/src/iwa/numbers/table_extractor.rs +++ b/crates/litchi-iwa/src/numbers/table_extractor.rs @@ -14,9 +14,9 @@ //! ## Example //! //! ```rust,ignore -//! use litchi::iwa::numbers::table_extractor::TableDataExtractor; -//! use litchi::iwa::bundle::Bundle; -//! use litchi::iwa::object_index::ObjectIndex; +//! use litchi_iwa::numbers::table_extractor::TableDataExtractor; +//! use litchi_iwa::bundle::Bundle; +//! use litchi_iwa::object_index::ObjectIndex; //! //! let bundle = Bundle::open("document.numbers")?; //! let index = ObjectIndex::from_bundle(&bundle)?; @@ -31,10 +31,10 @@ use super::cell::CellValue; use super::table::NumbersTable; -use crate::iwa::bundle::Bundle; -use crate::iwa::object_index::{ObjectIndex, ResolvedObject}; -use crate::iwa::protobuf::{tsce, tst}; -use crate::iwa::{Error, Result}; +use crate::bundle::Bundle; +use crate::object_index::{ObjectIndex, ResolvedObject}; +use crate::protobuf::{tsce, tst}; +use crate::{Error, Result}; use prost::Message; use std::collections::HashMap; @@ -70,7 +70,7 @@ impl<'a> TableDataExtractor<'a> { /// Extract tables from object index entries fn extract_tables_from_entries( &self, - entries: Vec<&crate::iwa::object_index::ObjectIndexEntry>, + entries: Vec<&crate::object_index::ObjectIndexEntry>, ) -> Result> { let mut tables = Vec::new(); @@ -376,7 +376,7 @@ impl<'a> TableDataExtractor<'a> { /// O(n) where n is the number of AST nodes. Uses a stack-based algorithm /// for efficient conversion. fn extract_formula_string(&self, formula: &tsce::FormulaArchive) -> Result { - use crate::iwa::protobuf::tsce::ast_node_array_archive::AstNodeType; + use crate::protobuf::tsce::ast_node_array_archive::AstNodeType; let ast_array = &formula.ast_node_array; @@ -395,40 +395,30 @@ impl<'a> TableDataExtractor<'a> { match ast_node_type { // Arithmetic operators (binary) - AstNodeType::AdditionNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}+{})", left, right)); - } + AstNodeType::AdditionNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}+{})", left, right)); }, - AstNodeType::SubtractionNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}-{})", left, right)); - } + AstNodeType::SubtractionNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}-{})", left, right)); }, - AstNodeType::MultiplicationNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}*{})", left, right)); - } + AstNodeType::MultiplicationNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}*{})", left, right)); }, - AstNodeType::DivisionNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}/{})", left, right)); - } + AstNodeType::DivisionNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}/{})", left, right)); }, - AstNodeType::PowerNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}^{})", left, right)); - } + AstNodeType::PowerNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}^{})", left, right)); }, // Note: Comparison operators are handled differently in Numbers AST @@ -522,12 +512,10 @@ impl<'a> TableDataExtractor<'a> { }, // Concatenation - AstNodeType::ConcatenationNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}&{})", left, right)); - } + AstNodeType::ConcatenationNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}&{})", left, right)); }, // Other node types - handle gracefully @@ -624,8 +612,7 @@ impl<'a> TableDataExtractor<'a> { for msg in &resolved.messages { if msg.type_ >= 2001 && msg.type_ <= 2022 - && let Ok(storage) = - crate::iwa::protobuf::tswp::StorageArchive::decode(&*msg.data) + && let Ok(storage) = crate::protobuf::tswp::StorageArchive::decode(&*msg.data) && !storage.text.is_empty() { return Ok(Some(storage.text.join("\n"))); diff --git a/src/iwa/object_index.rs b/crates/litchi-iwa/src/object_index.rs similarity index 93% rename from src/iwa/object_index.rs rename to crates/litchi-iwa/src/object_index.rs index aaf72e7..b23e8ef 100644 --- a/src/iwa/object_index.rs +++ b/crates/litchi-iwa/src/object_index.rs @@ -6,10 +6,10 @@ use std::collections::HashMap; -use crate::iwa::archive::{Archive, ArchiveObject, RawMessage}; -use crate::iwa::bundle::Bundle; -use crate::iwa::ref_graph::ReferenceGraph; -use crate::iwa::{Error, Result}; +use crate::archive::{Archive, ArchiveObject, RawMessage}; +use crate::bundle::Bundle; +use crate::ref_graph::ReferenceGraph; +use crate::{Error, Result}; /// Represents an entry in the object index #[derive(Debug, Clone)] @@ -139,7 +139,7 @@ impl ObjectIndex { 6000 | 6001 => { // TST.TableModelArchive contains multiple style and data references if let Ok(table) = - crate::iwa::protobuf::tst::TableModelArchive::decode(&*raw_msg.data) + crate::protobuf::tst::TableModelArchive::decode(&*raw_msg.data) { // Extract style references self.extract_reference(object_id, &table.table_style); @@ -193,7 +193,7 @@ impl ObjectIndex { 2001..=2022 => { // TSWP.StorageArchive contains text content and may reference styles if let Ok(storage) = - crate::iwa::protobuf::tswp::StorageArchive::decode(&*raw_msg.data) + crate::protobuf::tswp::StorageArchive::decode(&*raw_msg.data) { // Extract stylesheet reference if present if let Some(ref style_sheet) = storage.style_sheet { @@ -208,9 +208,7 @@ impl ObjectIndex { // KN (Keynote) types 5 | 6 => { // KN.SlideArchive contains references to drawables, builds, and transitions - if let Ok(slide) = - crate::iwa::protobuf::kn::SlideArchive::decode(&*raw_msg.data) - { + if let Ok(slide) = crate::protobuf::kn::SlideArchive::decode(&*raw_msg.data) { // Extract style reference self.extract_reference(object_id, &slide.style); @@ -251,8 +249,7 @@ impl ObjectIndex { 2 => { // KN.ShowArchive (conflicts with TSP.MessageInfo, handle by context) // Try to decode as ShowArchive for Keynote documents - if let Ok(show) = crate::iwa::protobuf::kn::ShowArchive::decode(&*raw_msg.data) - { + if let Ok(show) = crate::protobuf::kn::ShowArchive::decode(&*raw_msg.data) { // Extract theme and stylesheet references self.extract_reference(object_id, &show.theme); self.extract_reference(object_id, &show.stylesheet); @@ -275,9 +272,7 @@ impl ObjectIndex { // TN (Numbers) types 3 => { // TN.SheetArchive / TN.FormBasedSheetArchive - if let Ok(sheet) = - crate::iwa::protobuf::tn::SheetArchive::decode(&*raw_msg.data) - { + if let Ok(sheet) = crate::protobuf::tn::SheetArchive::decode(&*raw_msg.data) { // Extract drawable info references for drawable_ref in &sheet.drawable_infos { self.extract_reference(object_id, drawable_ref); @@ -299,7 +294,7 @@ impl ObjectIndex { 3002 => { // TSD.DrawableArchive - base type for all drawables if let Ok(drawable) = - crate::iwa::protobuf::tsd::DrawableArchive::decode(&*raw_msg.data) + crate::protobuf::tsd::DrawableArchive::decode(&*raw_msg.data) { // Extract parent reference (drawable hierarchy) if let Some(ref parent) = drawable.parent { @@ -312,7 +307,7 @@ impl ObjectIndex { 3003 => { // TSD.ContainerArchive - container for grouped objects if let Ok(container) = - crate::iwa::protobuf::tsd::ContainerArchive::decode(&*raw_msg.data) + crate::protobuf::tsd::ContainerArchive::decode(&*raw_msg.data) { // Extract parent reference if let Some(ref parent) = container.parent { @@ -326,9 +321,7 @@ impl ObjectIndex { }, 3004 => { // TSD.ShapeArchive - shapes (rectangles, circles, polygons, etc.) - if let Ok(shape) = - crate::iwa::protobuf::tsd::ShapeArchive::decode(&*raw_msg.data) - { + if let Ok(shape) = crate::protobuf::tsd::ShapeArchive::decode(&*raw_msg.data) { // ShapeArchive embeds DrawableArchive in 'super' field (required) // Extract parent from the super DrawableArchive if let Some(ref parent) = shape.super_.parent { @@ -344,9 +337,7 @@ impl ObjectIndex { }, 3005 => { // TSD.ImageArchive - images - if let Ok(image) = - crate::iwa::protobuf::tsd::ImageArchive::decode(&*raw_msg.data) - { + if let Ok(image) = crate::protobuf::tsd::ImageArchive::decode(&*raw_msg.data) { // Extract parent from super DrawableArchive (required field) if let Some(ref parent) = image.super_.parent { self.extract_reference(object_id, parent); @@ -361,8 +352,7 @@ impl ObjectIndex { }, 3006 => { // TSD.MaskArchive - image masks - if let Ok(mask) = crate::iwa::protobuf::tsd::MaskArchive::decode(&*raw_msg.data) - { + if let Ok(mask) = crate::protobuf::tsd::MaskArchive::decode(&*raw_msg.data) { // Extract parent from super DrawableArchive (required field) if let Some(ref parent) = mask.super_.parent { self.extract_reference(object_id, parent); @@ -372,9 +362,7 @@ impl ObjectIndex { }, 3007 => { // TSD.MovieArchive - video objects - if let Ok(movie) = - crate::iwa::protobuf::tsd::MovieArchive::decode(&*raw_msg.data) - { + if let Ok(movie) = crate::protobuf::tsd::MovieArchive::decode(&*raw_msg.data) { // Extract parent from super DrawableArchive (required field) if let Some(ref parent) = movie.super_.parent { self.extract_reference(object_id, parent); @@ -388,9 +376,7 @@ impl ObjectIndex { }, 3008 => { // TSD.GroupArchive - grouped shapes/objects - if let Ok(group) = - crate::iwa::protobuf::tsd::GroupArchive::decode(&*raw_msg.data) - { + if let Ok(group) = crate::protobuf::tsd::GroupArchive::decode(&*raw_msg.data) { // Extract parent from super DrawableArchive (required field) if let Some(ref parent) = group.super_.parent { self.extract_reference(object_id, parent); @@ -404,7 +390,7 @@ impl ObjectIndex { 3009 => { // TSD.ConnectionLineArchive - connector lines between shapes if let Ok(conn_line) = - crate::iwa::protobuf::tsd::ConnectionLineArchive::decode(&*raw_msg.data) + crate::protobuf::tsd::ConnectionLineArchive::decode(&*raw_msg.data) { // Extract parent and style from super ShapeArchive (required field) // ConnectionLineArchive.super_ is ShapeArchive @@ -433,9 +419,7 @@ impl ObjectIndex { // This is a pre-unified format chart, structure may vary // Attempt basic reference extraction but may fail gracefully if let Ok(chart_info) = - crate::iwa::protobuf::tsch::pre_uff::ChartInfoArchive::decode( - &*raw_msg.data, - ) + crate::protobuf::tsch::pre_uff::ChartInfoArchive::decode(&*raw_msg.data) { // Extract chart style reference if present if let Some(ref style) = chart_info.style { @@ -448,7 +432,7 @@ impl ObjectIndex { 5004 => { // TSCH.ChartMediatorArchive - mediator between chart and data if let Ok(mediator) = - crate::iwa::protobuf::tsch::ChartMediatorArchive::decode(&*raw_msg.data) + crate::protobuf::tsch::ChartMediatorArchive::decode(&*raw_msg.data) { // Extract info reference (points to the chart drawable) if let Some(ref info) = mediator.info { @@ -461,7 +445,7 @@ impl ObjectIndex { 5020 => { // TSCH.ChartStylePreset - preset styles for charts if let Ok(preset) = - crate::iwa::protobuf::tsch::ChartStylePreset::decode(&*raw_msg.data) + crate::protobuf::tsch::ChartStylePreset::decode(&*raw_msg.data) { // Extract chart style reference if let Some(ref chart_style) = preset.chart_style { @@ -479,7 +463,7 @@ impl ObjectIndex { 5021 => { // TSCH.ChartDrawableArchive - main chart drawable if let Ok(chart_drawable) = - crate::iwa::protobuf::tsch::ChartDrawableArchive::decode(&*raw_msg.data) + crate::protobuf::tsch::ChartDrawableArchive::decode(&*raw_msg.data) { // Extract parent from super DrawableArchive if let Some(ref drawable) = chart_drawable.super_ @@ -497,9 +481,7 @@ impl ObjectIndex { // TP (Pages) types 10000 => { // TP.DocumentArchive - if let Ok(doc) = - crate::iwa::protobuf::tp::DocumentArchive::decode(&*raw_msg.data) - { + if let Ok(doc) = crate::protobuf::tp::DocumentArchive::decode(&*raw_msg.data) { // Extract theme reference if let Some(ref theme) = doc.theme { self.extract_reference(object_id, theme); @@ -545,11 +527,7 @@ impl ObjectIndex { /// /// O(1) average case for HashMap insertion. Uses efficient deduplication /// to avoid storing duplicate references. - fn extract_reference( - &mut self, - source_id: u64, - reference: &crate::iwa::protobuf::tsp::Reference, - ) { + fn extract_reference(&mut self, source_id: u64, reference: &crate::protobuf::tsp::Reference) { let target_id = reference.identifier; // Ignore null/zero references (0 typically means "no reference") @@ -886,7 +864,7 @@ pub struct ResolvedObject { /// Object identifier pub id: u64, /// Archive information - pub archive_info: crate::iwa::archive::ArchiveInfo, + pub archive_info: crate::archive::ArchiveInfo, /// Raw message data pub messages: Vec, } diff --git a/crates/litchi-iwa/src/pages/document.rs b/crates/litchi-iwa/src/pages/document.rs new file mode 100644 index 0000000..13039cd --- /dev/null +++ b/crates/litchi-iwa/src/pages/document.rs @@ -0,0 +1,292 @@ +//! Pages Document Implementation +//! +//! Provides high-level API for working with Apple Pages documents. + +use std::path::Path; + +use super::section::{PagesSection, PagesSectionType}; +use crate::Result; +use crate::bundle::Bundle; +use crate::object_index::ObjectIndex; +use crate::registry::Application; +use crate::text::TextExtractor; + +/// High-level interface for Pages documents +pub struct PagesDocument { + /// Underlying bundle + bundle: Bundle, + /// Object index for cross-referencing + object_index: ObjectIndex, +} + +impl PagesDocument { + /// Open a Pages document from a path + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::pages::PagesDocument; + /// + /// let doc = PagesDocument::open("document.pages")?; + /// println!("Loaded Pages document"); + /// # Ok::<(), Box>(()) + /// ``` + pub fn open>(path: P) -> Result { + let bundle = Bundle::open(path)?; + + // Verify this is a Pages document + Self::verify_application(&bundle)?; + + let object_index = ObjectIndex::from_bundle(&bundle)?; + + Ok(Self { + bundle, + object_index, + }) + } + + /// Open a Pages document from raw bytes + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::pages::PagesDocument; + /// use std::fs; + /// + /// let data = fs::read("document.pages")?; + /// let doc = PagesDocument::from_bytes(&data)?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_bytes(bytes: &[u8]) -> Result { + let bundle = Bundle::from_bytes(bytes)?; + + // Verify this is a Pages document + Self::verify_application(&bundle)?; + + let object_index = ObjectIndex::from_bundle(&bundle)?; + + Ok(Self { + bundle, + object_index, + }) + } + + /// Create a Pages document from raw bytes (ZIP archive data). + /// + /// This is used for single-pass parsing where the ZIP archive has already + /// been validated during format detection. It avoids double-parsing. + pub fn from_archive_bytes(bytes: &[u8]) -> Result { + Self::from_bytes(bytes) + } + + /// Verify that the bundle is a Pages document + fn verify_application(bundle: &Bundle) -> Result<()> { + // Check for Pages-specific message types (TP.* types in range 10000-10999) + // Message type 10000 is TP.DocumentArchive + let has_pages_types = bundle.archives().values().any(|archive| { + archive.objects.iter().any(|obj| { + obj.messages + .iter() + .any(|msg| msg.type_ == 10000 || (10000..11000).contains(&msg.type_)) + }) + }); + + if !has_pages_types { + // Be lenient - if we can't definitively identify it as another type, allow it + // This helps with documents that might not have explicit Pages markers + } + + Ok(()) + } + + /// Extract all text content from the document + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::pages::PagesDocument; + /// + /// let doc = PagesDocument::open("document.pages")?; + /// let text = doc.text()?; + /// println!("{}", text); + /// # Ok::<(), Box>(()) + /// ``` + pub fn text(&self) -> Result { + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(&self.bundle)?; + Ok(extractor.get_text()) + } + + /// Extract sections from the document + /// + /// Pages documents are organized into sections. This method parses the + /// document structure and returns all sections with their content. + /// + /// # Examples + /// + /// ```rust,no_run + /// use litchi_iwa::pages::PagesDocument; + /// + /// let doc = PagesDocument::open("document.pages")?; + /// let sections = doc.sections()?; + /// + /// for section in sections { + /// println!("Section {}: {}", section.index, section.section_type.name()); + /// println!("{}", section.plain_text()); + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn sections(&self) -> Result> { + let mut sections = Vec::new(); + + // Find section archives (message type 10011 is TP.SectionArchive) + let section_objects = self.bundle.find_objects_by_type(10011); + + if section_objects.is_empty() { + // If no explicit sections found, create a single body section + // with all text content + let mut section = PagesSection::new(0, PagesSectionType::Body); + + // Extract text from all TSWP storage objects + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(&self.bundle)?; + + for storage in extractor.storages() { + if !storage.is_empty() { + section.text_storages.push(storage.clone()); + section.paragraphs.push(storage.plain_text().to_string()); + } + } + + if !section.is_empty() { + sections.push(section); + } + } else { + // Parse explicit sections + for (index, (_archive_name, object)) in section_objects.iter().enumerate() { + let section = self.parse_section(index, object)?; + if !section.is_empty() { + sections.push(section); + } + } + } + + Ok(sections) + } + + /// Parse a single section from an object + fn parse_section( + &self, + index: usize, + object: &crate::archive::ArchiveObject, + ) -> Result { + let mut section = PagesSection::new(index, PagesSectionType::Body); + + // Extract text content from the section object + let text_parts = object.extract_text(); + section.paragraphs = text_parts; + + // Parse the SectionArchive protobuf message + // TP.SectionArchive contains references to: + // - Body storage (main text content) + // - Header/footer storages + // - Section properties (margins, columns, etc.) + + if let Some(_raw_message) = object.messages.first() { + // The SectionArchive structure is complex with many references + // For a production implementation, we would: + // 1. Parse the SectionArchive protobuf message + // 2. Resolve references to text storage objects + // 3. Extract section-specific properties (margins, headers, footers) + // 4. Build the complete section structure + // + // Note: The SectionArchive fields use names like: + // - obsolete_headers, obsolete_footers (legacy) + // - current implementations use different field names + // This would require careful mapping from the proto definitions + } + + // Extract text storages + let extractor = TextExtractor::new(); + if let Ok(storage) = extractor.extract_from_object(object) + && !storage.is_empty() + { + section.text_storages.push(storage); + } + + Ok(section) + } + + /// Get the underlying bundle + pub fn bundle(&self) -> &Bundle { + &self.bundle + } + + /// Get the object index + pub fn object_index(&self) -> &ObjectIndex { + &self.object_index + } + + /// Get document statistics + pub fn stats(&self) -> PagesDocumentStats { + let total_objects = self.object_index.all_object_ids().len(); + let sections_result = self.sections(); + let section_count = sections_result.as_ref().map(|s| s.len()).unwrap_or(0); + + PagesDocumentStats { + total_objects, + section_count, + application: Application::Pages, + } + } +} + +/// Statistics about a Pages document +#[derive(Debug, Clone)] +pub struct PagesDocumentStats { + /// Total number of objects + pub total_objects: usize, + /// Number of sections + pub section_count: usize, + /// Application type (always Pages) + pub application: Application, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pages_document_open() { + let doc_path = std::path::Path::new("test.pages"); + if !doc_path.exists() { + // Skip test if test file doesn't exist + return; + } + + let doc_result = PagesDocument::open(doc_path); + assert!( + doc_result.is_ok(), + "Failed to open Pages document: {:?}", + doc_result.err() + ); + + let doc = doc_result.unwrap(); + assert!(!doc.object_index.all_object_ids().is_empty()); + } + + #[test] + fn test_pages_text_extraction() { + let doc_path = std::path::Path::new("test.pages"); + if !doc_path.exists() { + return; + } + + let doc = PagesDocument::open(doc_path).unwrap(); + let text_result = doc.text(); + assert!(text_result.is_ok()); + + // Text might be empty for some documents, but extraction should succeed + let _text = text_result.unwrap(); + } +} diff --git a/crates/litchi-iwa/src/pages/mod.rs b/crates/litchi-iwa/src/pages/mod.rs new file mode 100644 index 0000000..d07717e --- /dev/null +++ b/crates/litchi-iwa/src/pages/mod.rs @@ -0,0 +1,36 @@ +//! Pages Document Support +//! +//! This module provides comprehensive support for parsing Apple Pages documents, +//! including text extraction, section management, and document structure analysis. +//! +//! ## Features +//! +//! - Document metadata extraction +//! - Section and paragraph parsing +//! - Text style information +//! - Floating drawables (images, shapes) +//! - Header and footer extraction +//! +//! ## Example +//! +//! ```rust,no_run +//! use litchi_iwa::pages::PagesDocument; +//! +//! let doc = PagesDocument::open("document.pages")?; +//! let text = doc.text()?; +//! let sections = doc.sections()?; +//! +//! for section in sections { +//! println!("Section: {:?}", section.heading); +//! for para in §ion.paragraphs { +//! println!(" {}", para); +//! } +//! } +//! # Ok::<(), Box>(()) +//! ``` + +pub mod document; +pub mod section; + +pub use document::PagesDocument; +pub use section::{PagesSection, PagesSectionType}; diff --git a/crates/litchi-iwa/src/pages/section.rs b/crates/litchi-iwa/src/pages/section.rs new file mode 100644 index 0000000..0b9aa28 --- /dev/null +++ b/crates/litchi-iwa/src/pages/section.rs @@ -0,0 +1,137 @@ +//! Pages Document Section Structure +//! +//! Pages documents are organized into sections, each with its own layout and content. + +use crate::text::TextStorage; + +/// Represents a section in a Pages document +#[derive(Debug, Clone)] +pub struct PagesSection { + /// Section index (0-based) + pub index: usize, + /// Section type + pub section_type: PagesSectionType, + /// Section heading/title + pub heading: Option, + /// Paragraphs in this section + pub paragraphs: Vec, + /// Text storages in this section + pub text_storages: Vec, + /// Page count in this section + pub page_count: Option, +} + +impl PagesSection { + /// Create a new section + pub fn new(index: usize, section_type: PagesSectionType) -> Self { + Self { + index, + section_type, + heading: None, + paragraphs: Vec::new(), + text_storages: Vec::new(), + page_count: None, + } + } + + /// Get all text from the section (heading + paragraphs) + pub fn all_text(&self) -> Vec { + let mut all = Vec::new(); + if let Some(ref heading) = self.heading { + all.push(heading.clone()); + } + all.extend(self.paragraphs.clone()); + + // Also include text from storages + for storage in &self.text_storages { + let text = storage.plain_text(); + if !text.is_empty() { + all.push(text.to_string()); + } + } + + all + } + + /// Get plain text content as a single string + pub fn plain_text(&self) -> String { + self.all_text().join("\n") + } + + /// Check if section is empty + pub fn is_empty(&self) -> bool { + self.heading.is_none() && self.paragraphs.is_empty() && self.text_storages.is_empty() + } +} + +/// Types of sections in a Pages document +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PagesSectionType { + /// Main body content + Body, + /// Header section + Header, + /// Footer section + Footer, + /// Floating/anchored section + Floating, +} + +impl PagesSectionType { + /// Get a human-readable name for the section type + pub fn name(&self) -> &'static str { + match self { + Self::Body => "Body", + Self::Header => "Header", + Self::Footer => "Footer", + Self::Floating => "Floating", + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pages_section_creation() { + let mut section = PagesSection::new(0, PagesSectionType::Body); + assert_eq!(section.index, 0); + assert_eq!(section.section_type, PagesSectionType::Body); + assert!(section.is_empty()); + + section.heading = Some("Introduction".to_string()); + section.paragraphs.push("First paragraph".to_string()); + + assert!(!section.is_empty()); + let text = section.plain_text(); + assert!(text.contains("Introduction")); + assert!(text.contains("First paragraph")); + } + + #[test] + fn test_section_type_names() { + assert_eq!(PagesSectionType::Body.name(), "Body"); + assert_eq!(PagesSectionType::Header.name(), "Header"); + assert_eq!(PagesSectionType::Footer.name(), "Footer"); + assert_eq!(PagesSectionType::Floating.name(), "Floating"); + } + + #[test] + fn test_all_text() { + let mut section = PagesSection::new(0, PagesSectionType::Body); + section.heading = Some("Title".to_string()); + section.paragraphs.push("Para 1".to_string()); + section.paragraphs.push("Para 2".to_string()); + section + .text_storages + .push(TextStorage::from_text("Storage text".to_string())); + + let all_text = section.all_text(); + assert_eq!(all_text.len(), 4); + assert_eq!(all_text[0], "Title"); + assert_eq!(all_text[1], "Para 1"); + assert_eq!(all_text[2], "Para 2"); + assert_eq!(all_text[3], "Storage text"); + } +} diff --git a/src/iwa/protobuf.rs b/crates/litchi-iwa/src/protobuf.rs similarity index 99% rename from src/iwa/protobuf.rs rename to crates/litchi-iwa/src/protobuf.rs index 10a6f05..bcf83c5 100644 --- a/src/iwa/protobuf.rs +++ b/crates/litchi-iwa/src/protobuf.rs @@ -3,7 +3,7 @@ //! This module provides support for decoding Protocol Buffers messages //! used in iWork IWA (iWork Archive) files using the prost crate. -use crate::iwa::{Error, Result}; +use crate::{Error, Result}; use phf::phf_map; use prost::Message; diff --git a/src/iwa/protos/KNArchives.proto b/crates/litchi-iwa/src/protos/KNArchives.proto similarity index 100% rename from src/iwa/protos/KNArchives.proto rename to crates/litchi-iwa/src/protos/KNArchives.proto diff --git a/src/iwa/protos/KNCommandArchives.proto b/crates/litchi-iwa/src/protos/KNCommandArchives.proto similarity index 100% rename from src/iwa/protos/KNCommandArchives.proto rename to crates/litchi-iwa/src/protos/KNCommandArchives.proto diff --git a/src/iwa/protos/TNArchives.proto b/crates/litchi-iwa/src/protos/TNArchives.proto similarity index 100% rename from src/iwa/protos/TNArchives.proto rename to crates/litchi-iwa/src/protos/TNArchives.proto diff --git a/src/iwa/protos/TNCommandArchives.proto b/crates/litchi-iwa/src/protos/TNCommandArchives.proto similarity index 100% rename from src/iwa/protos/TNCommandArchives.proto rename to crates/litchi-iwa/src/protos/TNCommandArchives.proto diff --git a/src/iwa/protos/TPArchives.proto b/crates/litchi-iwa/src/protos/TPArchives.proto similarity index 100% rename from src/iwa/protos/TPArchives.proto rename to crates/litchi-iwa/src/protos/TPArchives.proto diff --git a/src/iwa/protos/TPCommandArchives.proto b/crates/litchi-iwa/src/protos/TPCommandArchives.proto similarity index 100% rename from src/iwa/protos/TPCommandArchives.proto rename to crates/litchi-iwa/src/protos/TPCommandArchives.proto diff --git a/src/iwa/protos/TSAArchives.proto b/crates/litchi-iwa/src/protos/TSAArchives.proto similarity index 100% rename from src/iwa/protos/TSAArchives.proto rename to crates/litchi-iwa/src/protos/TSAArchives.proto diff --git a/src/iwa/protos/TSCEArchives.proto b/crates/litchi-iwa/src/protos/TSCEArchives.proto similarity index 100% rename from src/iwa/protos/TSCEArchives.proto rename to crates/litchi-iwa/src/protos/TSCEArchives.proto diff --git a/src/iwa/protos/TSCH3DArchives.proto b/crates/litchi-iwa/src/protos/TSCH3DArchives.proto similarity index 100% rename from src/iwa/protos/TSCH3DArchives.proto rename to crates/litchi-iwa/src/protos/TSCH3DArchives.proto diff --git a/src/iwa/protos/TSCHArchives.Common.proto b/crates/litchi-iwa/src/protos/TSCHArchives.Common.proto similarity index 100% rename from src/iwa/protos/TSCHArchives.Common.proto rename to crates/litchi-iwa/src/protos/TSCHArchives.Common.proto diff --git a/src/iwa/protos/TSCHArchives.GEN.proto b/crates/litchi-iwa/src/protos/TSCHArchives.GEN.proto similarity index 100% rename from src/iwa/protos/TSCHArchives.GEN.proto rename to crates/litchi-iwa/src/protos/TSCHArchives.GEN.proto diff --git a/src/iwa/protos/TSCHArchives.proto b/crates/litchi-iwa/src/protos/TSCHArchives.proto similarity index 100% rename from src/iwa/protos/TSCHArchives.proto rename to crates/litchi-iwa/src/protos/TSCHArchives.proto diff --git a/src/iwa/protos/TSCHCommandArchives.proto b/crates/litchi-iwa/src/protos/TSCHCommandArchives.proto similarity index 100% rename from src/iwa/protos/TSCHCommandArchives.proto rename to crates/litchi-iwa/src/protos/TSCHCommandArchives.proto diff --git a/src/iwa/protos/TSCHPreUFFArchives.proto b/crates/litchi-iwa/src/protos/TSCHPreUFFArchives.proto similarity index 100% rename from src/iwa/protos/TSCHPreUFFArchives.proto rename to crates/litchi-iwa/src/protos/TSCHPreUFFArchives.proto diff --git a/src/iwa/protos/TSDArchives.proto b/crates/litchi-iwa/src/protos/TSDArchives.proto similarity index 100% rename from src/iwa/protos/TSDArchives.proto rename to crates/litchi-iwa/src/protos/TSDArchives.proto diff --git a/src/iwa/protos/TSDCommandArchives.proto b/crates/litchi-iwa/src/protos/TSDCommandArchives.proto similarity index 100% rename from src/iwa/protos/TSDCommandArchives.proto rename to crates/litchi-iwa/src/protos/TSDCommandArchives.proto diff --git a/src/iwa/protos/TSKArchives.proto b/crates/litchi-iwa/src/protos/TSKArchives.proto similarity index 100% rename from src/iwa/protos/TSKArchives.proto rename to crates/litchi-iwa/src/protos/TSKArchives.proto diff --git a/src/iwa/protos/TSPArchiveMessages.proto b/crates/litchi-iwa/src/protos/TSPArchiveMessages.proto similarity index 100% rename from src/iwa/protos/TSPArchiveMessages.proto rename to crates/litchi-iwa/src/protos/TSPArchiveMessages.proto diff --git a/src/iwa/protos/TSPDatabaseMessages.proto b/crates/litchi-iwa/src/protos/TSPDatabaseMessages.proto similarity index 100% rename from src/iwa/protos/TSPDatabaseMessages.proto rename to crates/litchi-iwa/src/protos/TSPDatabaseMessages.proto diff --git a/src/iwa/protos/TSPMessages.proto b/crates/litchi-iwa/src/protos/TSPMessages.proto similarity index 100% rename from src/iwa/protos/TSPMessages.proto rename to crates/litchi-iwa/src/protos/TSPMessages.proto diff --git a/src/iwa/protos/TSSArchives.proto b/crates/litchi-iwa/src/protos/TSSArchives.proto similarity index 100% rename from src/iwa/protos/TSSArchives.proto rename to crates/litchi-iwa/src/protos/TSSArchives.proto diff --git a/src/iwa/protos/TSTArchives.proto b/crates/litchi-iwa/src/protos/TSTArchives.proto similarity index 100% rename from src/iwa/protos/TSTArchives.proto rename to crates/litchi-iwa/src/protos/TSTArchives.proto diff --git a/src/iwa/protos/TSTCommandArchives.proto b/crates/litchi-iwa/src/protos/TSTCommandArchives.proto similarity index 100% rename from src/iwa/protos/TSTCommandArchives.proto rename to crates/litchi-iwa/src/protos/TSTCommandArchives.proto diff --git a/src/iwa/protos/TSTStylePropertyArchiving.proto b/crates/litchi-iwa/src/protos/TSTStylePropertyArchiving.proto similarity index 100% rename from src/iwa/protos/TSTStylePropertyArchiving.proto rename to crates/litchi-iwa/src/protos/TSTStylePropertyArchiving.proto diff --git a/src/iwa/protos/TSWPArchives.proto b/crates/litchi-iwa/src/protos/TSWPArchives.proto similarity index 100% rename from src/iwa/protos/TSWPArchives.proto rename to crates/litchi-iwa/src/protos/TSWPArchives.proto diff --git a/src/iwa/protos/TSWPCommandArchives.proto b/crates/litchi-iwa/src/protos/TSWPCommandArchives.proto similarity index 100% rename from src/iwa/protos/TSWPCommandArchives.proto rename to crates/litchi-iwa/src/protos/TSWPCommandArchives.proto diff --git a/src/iwa/ref_graph.rs b/crates/litchi-iwa/src/ref_graph.rs similarity index 99% rename from src/iwa/ref_graph.rs rename to crates/litchi-iwa/src/ref_graph.rs index 0c3eae7..bcddb11 100644 --- a/src/iwa/ref_graph.rs +++ b/crates/litchi-iwa/src/ref_graph.rs @@ -15,7 +15,7 @@ //! # Example //! //! ```rust,ignore -//! use litchi::iwa::ref_graph::ReferenceGraph; +//! use litchi_iwa::ref_graph::ReferenceGraph; //! //! let mut graph = ReferenceGraph::new(); //! diff --git a/src/iwa/registry.rs b/crates/litchi-iwa/src/registry.rs similarity index 100% rename from src/iwa/registry.rs rename to crates/litchi-iwa/src/registry.rs diff --git a/src/iwa/shapes/mod.rs b/crates/litchi-iwa/src/shapes/mod.rs similarity index 100% rename from src/iwa/shapes/mod.rs rename to crates/litchi-iwa/src/shapes/mod.rs diff --git a/src/iwa/shapes/text_extractor.rs b/crates/litchi-iwa/src/shapes/text_extractor.rs similarity index 96% rename from src/iwa/shapes/text_extractor.rs rename to crates/litchi-iwa/src/shapes/text_extractor.rs index ac2d50b..454aa0a 100644 --- a/src/iwa/shapes/text_extractor.rs +++ b/crates/litchi-iwa/src/shapes/text_extractor.rs @@ -15,10 +15,10 @@ //! referenced from the shape. The shape itself contains geometry and styling, //! while the actual text is stored separately. -use crate::iwa::Result; -use crate::iwa::bundle::Bundle; -use crate::iwa::object_index::{ObjectIndex, ResolvedObject}; -use crate::iwa::protobuf::tsd; +use crate::Result; +use crate::bundle::Bundle; +use crate::object_index::{ObjectIndex, ResolvedObject}; +use crate::protobuf::tsd; use prost::Message; /// Extractor for text content from shapes @@ -168,7 +168,7 @@ impl<'a> ShapeTextExtractor<'a> { // TSWP storage types range from 2001-2022 if msg.type_ >= 2001 && msg.type_ <= 2022 - && let Ok(storage) = crate::iwa::protobuf::tswp::StorageArchive::decode(&*msg.data) + && let Ok(storage) = crate::protobuf::tswp::StorageArchive::decode(&*msg.data) && !storage.text.is_empty() { return Ok(Some(storage.text.join("\n"))); diff --git a/src/iwa/snappy.rs b/crates/litchi-iwa/src/snappy.rs similarity index 99% rename from src/iwa/snappy.rs rename to crates/litchi-iwa/src/snappy.rs index b75be48..937eb11 100644 --- a/src/iwa/snappy.rs +++ b/crates/litchi-iwa/src/snappy.rs @@ -8,7 +8,7 @@ use snap::raw::Decoder; use std::io::{self, Cursor, Read}; -use crate::iwa::Error; +use crate::Error; /// Custom Snappy stream decompressor for iWork IWA files #[derive(Debug)] diff --git a/src/iwa/structured.rs b/crates/litchi-iwa/src/structured.rs similarity index 96% rename from src/iwa/structured.rs rename to crates/litchi-iwa/src/structured.rs index b6de692..b43fc12 100644 --- a/src/iwa/structured.rs +++ b/crates/litchi-iwa/src/structured.rs @@ -7,12 +7,12 @@ use std::collections::HashMap; -use crate::iwa::Result; -use crate::iwa::bundle::Bundle; -use crate::iwa::charts::metadata_extractor::ChartMetadataExtractor; -use crate::iwa::numbers::table_extractor::TableDataExtractor; -use crate::iwa::object_index::ObjectIndex; -use crate::iwa::shapes::text_extractor::ShapeTextExtractor; +use crate::Result; +use crate::bundle::Bundle; +use crate::charts::metadata_extractor::ChartMetadataExtractor; +use crate::numbers::table_extractor::TableDataExtractor; +use crate::object_index::ObjectIndex; +use crate::shapes::text_extractor::ShapeTextExtractor; /// Represents a table extracted from a Numbers document #[derive(Debug, Clone)] @@ -205,8 +205,8 @@ pub fn extract_tables(bundle: &Bundle, object_index: &ObjectIndex) -> Result CellValue { - use crate::iwa::numbers::CellValue as NC; +fn convert_numbers_cell_to_structured(cell: crate::numbers::CellValue) -> CellValue { + use crate::numbers::CellValue as NC; match cell { NC::Empty => CellValue::Empty, @@ -285,9 +285,7 @@ pub fn extract_sections(bundle: &Bundle, object_index: &ObjectIndex) -> Result Result pub fn extract_chart_metadata( bundle: &Bundle, object_index: &ObjectIndex, -) -> Result> { +) -> Result> { let extractor = ChartMetadataExtractor::new(bundle, object_index); extractor.extract_all_charts() } diff --git a/crates/litchi-iwa/src/text/extractor.rs b/crates/litchi-iwa/src/text/extractor.rs new file mode 100644 index 0000000..5582d70 --- /dev/null +++ b/crates/litchi-iwa/src/text/extractor.rs @@ -0,0 +1,117 @@ +//! High-Level Text Extraction API +//! +//! Provides utilities for extracting text from iWork document objects. + +use super::storage::{TextStorage, parse_storage_archive}; +use crate::Result; +use crate::archive::ArchiveObject; +use crate::bundle::Bundle; + +/// Text extractor for iWork documents +pub struct TextExtractor { + /// Extracted text storages + storages: Vec, +} + +impl TextExtractor { + /// Create a new text extractor + pub fn new() -> Self { + Self { + storages: Vec::new(), + } + } + + /// Extract text from a bundle + pub fn extract_from_bundle(&mut self, bundle: &Bundle) -> Result<()> { + // Find all TSWP storage objects (message types 200-205, 2001-2022) + let storage_types = [ + 200, 201, 202, 203, 204, 205, 2001, 2002, 2003, 2004, 2005, 2011, 2012, 2022, + ]; + + for type_id in storage_types { + let objects = bundle.find_objects_by_type(type_id); + for (_archive_name, object) in objects { + if let Ok(storage) = self.extract_from_object(object) + && !storage.is_empty() + { + self.storages.push(storage); + } + } + } + + Ok(()) + } + + /// Extract text from a single archive object + pub fn extract_from_object(&self, object: &ArchiveObject) -> Result { + // Extract text from decoded messages + let text_lines = object.extract_text(); + + if text_lines.is_empty() { + return Ok(TextStorage::new()); + } + + parse_storage_archive(&text_lines) + } + + /// Get all extracted text as a single string + pub fn get_text(&self) -> String { + self.storages + .iter() + .map(|s| s.plain_text()) + .collect::>() + .join("\n") + } + + /// Get all text storages + pub fn storages(&self) -> &[TextStorage] { + &self.storages + } + + /// Get number of text storages found + pub fn storage_count(&self) -> usize { + self.storages.len() + } + + /// Clear all extracted text + pub fn clear(&mut self) { + self.storages.clear(); + } +} + +impl Default for TextExtractor { + fn default() -> Self { + Self::new() + } +} + +/// Quick text extraction function for convenience +pub fn extract_text_from_bundle(bundle: &Bundle) -> Result { + let mut extractor = TextExtractor::new(); + extractor.extract_from_bundle(bundle)?; + Ok(extractor.get_text()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_text_extractor_creation() { + let extractor = TextExtractor::new(); + assert_eq!(extractor.storage_count(), 0); + assert_eq!(extractor.get_text(), ""); + } + + #[test] + fn test_text_extractor_clear() { + let mut extractor = TextExtractor::new(); + extractor + .storages + .push(TextStorage::from_text("Test".to_string())); + assert_eq!(extractor.storage_count(), 1); + + extractor.clear(); + assert_eq!(extractor.storage_count(), 0); + } +} diff --git a/src/iwa/text/mod.rs b/crates/litchi-iwa/src/text/mod.rs similarity index 100% rename from src/iwa/text/mod.rs rename to crates/litchi-iwa/src/text/mod.rs diff --git a/src/iwa/text/storage.rs b/crates/litchi-iwa/src/text/storage.rs similarity index 99% rename from src/iwa/text/storage.rs rename to crates/litchi-iwa/src/text/storage.rs index 2906a25..2630745 100644 --- a/src/iwa/text/storage.rs +++ b/crates/litchi-iwa/src/text/storage.rs @@ -3,7 +3,7 @@ //! iWork documents store text in TSWP (Text Word Processing) storage objects //! that contain rich text with styling information. -use crate::iwa::Result; +use crate::Result; /// Represents a contiguous block of text storage #[derive(Debug, Clone)] diff --git a/src/iwa/text/style.rs b/crates/litchi-iwa/src/text/style.rs similarity index 100% rename from src/iwa/text/style.rs rename to crates/litchi-iwa/src/text/style.rs diff --git a/src/iwa/varint.rs b/crates/litchi-iwa/src/varint.rs similarity index 100% rename from src/iwa/varint.rs rename to crates/litchi-iwa/src/varint.rs diff --git a/src/iwa/zip_utils.rs b/crates/litchi-iwa/src/zip_utils.rs similarity index 96% rename from src/iwa/zip_utils.rs rename to crates/litchi-iwa/src/zip_utils.rs index a3995b4..07bcf3f 100644 --- a/src/iwa/zip_utils.rs +++ b/crates/litchi-iwa/src/zip_utils.rs @@ -10,9 +10,9 @@ use std::io::Cursor; use soapberry_zip::office::ArchiveReader; -use crate::iwa::archive::Archive; -use crate::iwa::snappy::SnappyStream; -use crate::iwa::{Error, Result}; +use crate::archive::Archive; +use crate::snappy::SnappyStream; +use crate::{Error, Result}; /// Parse all IWA files from a ZIP archive and return parsed Archives. /// @@ -31,7 +31,7 @@ use crate::iwa::{Error, Result}; /// /// ```rust,no_run /// use soapberry_zip::office::ArchiveReader; -/// use litchi::iwa::zip_utils::parse_iwa_files_from_archive; +/// use litchi_iwa::zip_utils::parse_iwa_files_from_archive; /// /// let data = std::fs::read("document.pages")?; /// let archive = ArchiveReader::new(&data)?; @@ -79,7 +79,7 @@ pub fn parse_iwa_files_from_archive( /// /// ```rust,no_run /// use soapberry_zip::office::ArchiveReader; -/// use litchi::iwa::zip_utils::extract_message_types_from_archive; +/// use litchi_iwa::zip_utils::extract_message_types_from_archive; /// /// let data = std::fs::read("document.pages")?; /// let archive = ArchiveReader::new(&data)?; diff --git a/crates/litchi-markdown/Cargo.toml b/crates/litchi-markdown/Cargo.toml new file mode 100644 index 0000000..69eb907 --- /dev/null +++ b/crates/litchi-markdown/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "litchi-markdown" +description = "Format-agnostic Markdown emission helpers for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +litchi-core = { workspace = true } +phf = { workspace = true } diff --git a/crates/litchi-markdown/README.md b/crates/litchi-markdown/README.md new file mode 100644 index 0000000..4852494 --- /dev/null +++ b/crates/litchi-markdown/README.md @@ -0,0 +1,36 @@ +# litchi-markdown + +Format-agnostic Markdown emission helpers for the Litchi office-formats library. + +## Overview + +`litchi-markdown` provides the `ToMarkdown` trait and configuration types used by Litchi's higher-level format crates (and the `litchi` umbrella crate) to render Office documents and presentations as Markdown. It deliberately has no knowledge of any concrete document format; per-format `impl ToMarkdown for ...` blocks live alongside their respective format crates. + +## Usage + +```toml +[dependencies] +litchi-markdown = "0.0.1" +``` + +```rust +use litchi_markdown::{MarkdownOptions, TableStyle, ToMarkdown}; + +fn render(value: &T) -> String { + let opts = MarkdownOptions { + table_style: TableStyle::Pipe, + ..MarkdownOptions::default() + }; + value.to_markdown(&opts) +} +``` + +## Features + +- `ToMarkdown` trait for converting types to Markdown. +- `MarkdownOptions` plus `FormulaStyle`, `ScriptStyle`, `StrikethroughStyle`, `TableStyle` enums for tuning the output. +- Unicode helpers for rendering super- and subscript characters. + +## License + +Licensed under the Apache License, Version 2.0. Part of the [Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-markdown/examples/simple_doc.rs b/crates/litchi-markdown/examples/simple_doc.rs new file mode 100644 index 0000000..8dd5a6f --- /dev/null +++ b/crates/litchi-markdown/examples/simple_doc.rs @@ -0,0 +1,98 @@ +//! Render a simple in-memory "document" to Markdown using `ToMarkdown`. +//! +//! `litchi-markdown` is format-agnostic: it ships the [`ToMarkdown`] trait and +//! configuration types, but no concrete `impl ToMarkdown for ...` blocks (those +//! live alongside the format crates). This example therefore defines its own +//! tiny `MyDoc` struct, implements `ToMarkdown` for it, and exercises the +//! default [`MarkdownOptions`]. +//! +//! # Run +//! +//! ```bash +//! cargo run -p litchi-markdown --example simple_doc +//! ``` +use std::fmt::Write as _; + +use litchi_core::Result; +use litchi_markdown::{MarkdownOptions, ToMarkdown}; + +/// A toy document with a title, free-form paragraphs, and a single 2D table. +struct MyDoc { + title: String, + paragraphs: Vec, + /// Row-major table. The first row is rendered as the table header. + table: Vec>, +} + +impl ToMarkdown for MyDoc { + fn to_markdown_with_options(&self, _options: &MarkdownOptions) -> Result { + // Pre-allocate a reasonable upper bound to avoid reallocations. + let mut out = String::with_capacity(256); + + // H1 title. + writeln!(out, "# {}", self.title).unwrap(); + writeln!(out).unwrap(); + + // Paragraphs separated by blank lines. + for para in &self.paragraphs { + writeln!(out, "{para}").unwrap(); + writeln!(out).unwrap(); + } + + // Markdown table (assumes first row is the header). + if let Some((header, body)) = self.table.split_first() { + // Header row. + out.push('|'); + for cell in header { + write!(out, " {cell} |").unwrap(); + } + out.push('\n'); + + // Separator row. + out.push('|'); + for _ in header { + out.push_str("---|"); + } + out.push('\n'); + + // Body rows. + for row in body { + out.push('|'); + for cell in row { + write!(out, " {cell} |").unwrap(); + } + out.push('\n'); + } + } + + Ok(out) + } +} + +fn main() -> Result<()> { + let doc = MyDoc { + title: "Litchi Markdown Demo".to_owned(), + paragraphs: vec![ + "This document was generated by a tiny `ToMarkdown` impl living \ + entirely inside the example." + .to_owned(), + "Real format crates (litchi-docx, litchi-odf, ...) provide their \ + own implementations on top of the same trait." + .to_owned(), + ], + table: vec![ + vec!["Format".to_owned(), "Crate".to_owned()], + vec!["DOCX".to_owned(), "litchi-ooxml".to_owned()], + vec!["ODT".to_owned(), "litchi-odf".to_owned()], + vec!["RTF".to_owned(), "litchi-rtf".to_owned()], + ], + }; + + // `to_markdown` uses `MarkdownOptions::default()` under the hood. + let rendered = doc.to_markdown()?; + + println!("--- Rendered Markdown ---"); + println!("{rendered}"); + + Ok(()) +} diff --git a/crates/litchi-markdown/examples/style_options.rs b/crates/litchi-markdown/examples/style_options.rs new file mode 100644 index 0000000..280c85b --- /dev/null +++ b/crates/litchi-markdown/examples/style_options.rs @@ -0,0 +1,172 @@ +//! Compare `MarkdownOptions` style variants on a single input. +//! +//! Renders a tiny `MathSnippet` (a subscript fragment + a superscript fragment + +//! an inline formula) through every relevant variant of [`TableStyle`], +//! [`FormulaStyle`], and [`ScriptStyle`] so you can eyeball the differences. +//! +//! Note on naming: the task brief mentioned `TableStyle::Pipe` and +//! `ScriptStyle::Markdown`, but the real enum variants in `litchi-markdown` are +//! `TableStyle::{Markdown, MinimalHtml, StyledHtml}` and +//! `ScriptStyle::{Html, Unicode}`. This example uses the actual variants. +//! +//! # Run +//! +//! ```bash +//! cargo run -p litchi-markdown --example style_options +//! ``` +use std::fmt::Write as _; + +use litchi_core::Result; +use litchi_markdown::{ + FormulaStyle, MarkdownOptions, ScriptStyle, StrikethroughStyle, TableStyle, ToMarkdown, + unicode::{convert_to_subscript, convert_to_superscript}, +}; + +/// A tiny "math snippet" with a subscript label, a superscript exponent, and an +/// inline formula. The struct itself stores raw text; rendering decisions are +/// driven entirely by [`MarkdownOptions`]. +struct MathSnippet { + /// Raw text to render in subscript position, e.g. `"i+1"`. + subscript: String, + /// Raw text to render in superscript position, e.g. `"n2"`. + superscript: String, + /// LaTeX source for an inline formula, without delimiters, e.g. `"a^2+b^2=c^2"`. + formula: String, + /// One-row, one-cell table used purely to show `TableStyle` differences. + cell: String, +} + +impl ToMarkdown for MathSnippet { + fn to_markdown_with_options(&self, options: &MarkdownOptions) -> Result { + let mut out = String::with_capacity(256); + + // --- Script rendering ------------------------------------------------- + // ScriptStyle::Html -> wrap in /; preserves all characters. + // ScriptStyle::Unicode -> map char-by-char via `unicode` helpers, falling + // back to original chars where no Unicode equivalent exists. + match options.script_style { + ScriptStyle::Html => { + writeln!(out, "x{}", self.subscript).unwrap(); + writeln!(out, "x{}", self.superscript).unwrap(); + }, + ScriptStyle::Unicode => { + writeln!(out, "x{}", convert_to_subscript(&self.subscript)).unwrap(); + writeln!(out, "x{}", convert_to_superscript(&self.superscript)).unwrap(); + }, + } + + // --- Formula rendering ------------------------------------------------ + match options.formula_style { + FormulaStyle::LaTeX => writeln!(out, "Inline: \\({}\\)", self.formula).unwrap(), + FormulaStyle::Dollar => writeln!(out, "Inline: ${}$", self.formula).unwrap(), + } + + // --- Strikethrough sample (so the option isn't silent) --------------- + match options.strikethrough_style { + StrikethroughStyle::Markdown => writeln!(out, "~~old~~").unwrap(), + StrikethroughStyle::Html => writeln!(out, "old").unwrap(), + } + + // --- Single-cell table ---------------------------------------------- + match options.table_style { + TableStyle::Markdown => { + writeln!(out, "| Header |").unwrap(); + writeln!(out, "|--------|").unwrap(); + writeln!(out, "| {} |", self.cell).unwrap(); + }, + TableStyle::MinimalHtml => { + writeln!( + out, + "
Header
{}
", + self.cell + ) + .unwrap(); + }, + TableStyle::StyledHtml => { + let pad = " ".repeat(options.html_table_indent); + writeln!(out, "").unwrap(); + writeln!(out, "{pad}").unwrap(); + writeln!(out, "{pad}{pad}").unwrap(); + writeln!(out, "{pad}").unwrap(); + writeln!(out, "{pad}").unwrap(); + writeln!(out, "{pad}{pad}", self.cell).unwrap(); + writeln!(out, "{pad}").unwrap(); + writeln!(out, "
Header
{}
").unwrap(); + }, + } + + Ok(out) + } +} + +fn render(label: &str, snippet: &MathSnippet, options: &MarkdownOptions) -> Result<()> { + println!("===== {label} ====="); + println!("{}", snippet.to_markdown_with_options(options)?); + Ok(()) +} + +fn main() -> Result<()> { + let snippet = MathSnippet { + subscript: "i+1".to_owned(), + superscript: "n2".to_owned(), + formula: "a^2+b^2=c^2".to_owned(), + cell: "value".to_owned(), + }; + + // TableStyle variants (Markdown, MinimalHtml, StyledHtml). + render( + "TableStyle::Markdown (default)", + &snippet, + &MarkdownOptions::new().with_table_style(TableStyle::Markdown), + )?; + render( + "TableStyle::MinimalHtml", + &snippet, + &MarkdownOptions::new().with_table_style(TableStyle::MinimalHtml), + )?; + render( + "TableStyle::StyledHtml (indent=2)", + &snippet, + &MarkdownOptions::new() + .with_table_style(TableStyle::StyledHtml) + .with_html_table_indent(2), + )?; + + // FormulaStyle variants. + render( + "FormulaStyle::LaTeX (default)", + &snippet, + &MarkdownOptions::new().with_formula_style(FormulaStyle::LaTeX), + )?; + render( + "FormulaStyle::Dollar", + &snippet, + &MarkdownOptions::new().with_formula_style(FormulaStyle::Dollar), + )?; + + // ScriptStyle variants -- this exercises the `unicode` helpers. + render( + "ScriptStyle::Html (default)", + &snippet, + &MarkdownOptions::new().with_script_style(ScriptStyle::Html), + )?; + render( + "ScriptStyle::Unicode", + &snippet, + &MarkdownOptions::new().with_script_style(ScriptStyle::Unicode), + )?; + + // StrikethroughStyle variants, for completeness. + render( + "StrikethroughStyle::Markdown (default)", + &snippet, + &MarkdownOptions::new().with_strikethrough_style(StrikethroughStyle::Markdown), + )?; + render( + "StrikethroughStyle::Html", + &snippet, + &MarkdownOptions::new().with_strikethrough_style(StrikethroughStyle::Html), + )?; + + Ok(()) +} diff --git a/src/markdown/config.rs b/crates/litchi-markdown/src/config.rs similarity index 94% rename from src/markdown/config.rs rename to crates/litchi-markdown/src/config.rs index 848b235..8decda6 100644 --- a/src/markdown/config.rs +++ b/crates/litchi-markdown/src/config.rs @@ -10,7 +10,7 @@ /// # Examples /// /// ```rust -/// use litchi::markdown::{MarkdownOptions, TableStyle}; +/// use litchi_markdown::{MarkdownOptions, TableStyle}; /// /// // Create with defaults /// let options = MarkdownOptions::default(); @@ -73,7 +73,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::MarkdownOptions; + /// use litchi_markdown::MarkdownOptions; /// /// let options = MarkdownOptions::new(); /// ``` @@ -90,7 +90,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::MarkdownOptions; + /// use litchi_markdown::MarkdownOptions; /// /// let options = MarkdownOptions::new().with_styles(true); /// ``` @@ -108,7 +108,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::MarkdownOptions; + /// use litchi_markdown::MarkdownOptions; /// /// let options = MarkdownOptions::new().with_metadata(true); /// ``` @@ -123,7 +123,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::{MarkdownOptions, TableStyle}; + /// use litchi_markdown::{MarkdownOptions, TableStyle}; /// /// let options = MarkdownOptions::new() /// .with_table_style(TableStyle::MinimalHtml); @@ -141,7 +141,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::MarkdownOptions; + /// use litchi_markdown::MarkdownOptions; /// /// let options = MarkdownOptions::new().with_html_table_indent(4); /// ``` @@ -156,7 +156,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::{MarkdownOptions, FormulaStyle}; + /// use litchi_markdown::{MarkdownOptions, FormulaStyle}; /// /// let options = MarkdownOptions::new() /// .with_formula_style(FormulaStyle::Dollar); @@ -174,7 +174,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::MarkdownOptions; + /// use litchi_markdown::MarkdownOptions; /// /// let options = MarkdownOptions::new().with_list_indent(4); /// ``` @@ -189,7 +189,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::{MarkdownOptions, ScriptStyle}; + /// use litchi_markdown::{MarkdownOptions, ScriptStyle}; /// /// let options = MarkdownOptions::new() /// .with_script_style(ScriptStyle::Unicode); @@ -205,7 +205,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::{MarkdownOptions, StrikethroughStyle}; + /// use litchi_markdown::{MarkdownOptions, StrikethroughStyle}; /// /// let options = MarkdownOptions::new() /// .with_strikethrough_style(StrikethroughStyle::Html); @@ -225,7 +225,7 @@ impl MarkdownOptions { /// # Examples /// /// ```rust - /// use litchi::markdown::MarkdownOptions; + /// use litchi_markdown::MarkdownOptions; /// /// // Enable parallel processing (default) /// let options = MarkdownOptions::new() diff --git a/crates/litchi-markdown/src/lib.rs b/crates/litchi-markdown/src/lib.rs new file mode 100644 index 0000000..347c1fd --- /dev/null +++ b/crates/litchi-markdown/src/lib.rs @@ -0,0 +1,24 @@ +//! Format-agnostic Markdown emission helpers for the Litchi office-formats library. +//! +//! This crate provides the building blocks used by Litchi's higher-level format +//! crates (and the `litchi` umbrella crate) to render Office documents and +//! presentations as Markdown: +//! +//! - The [`ToMarkdown`] trait for converting types to Markdown. +//! - [`MarkdownOptions`] and related enums for configuring the output. +//! - Unicode helpers for rendering super- and subscript characters. +//! +//! The crate intentionally has no knowledge of any concrete document format; +//! per-format `impl ToMarkdown for ...` blocks live alongside their respective +//! format crates. +//! +//! # Re-exports +//! +//! The most commonly used items are re-exported at the crate root for +//! convenience. +pub mod config; +pub mod traits; +pub mod unicode; + +pub use config::{FormulaStyle, MarkdownOptions, ScriptStyle, StrikethroughStyle, TableStyle}; +pub use traits::ToMarkdown; diff --git a/src/markdown/traits.rs b/crates/litchi-markdown/src/traits.rs similarity index 93% rename from src/markdown/traits.rs rename to crates/litchi-markdown/src/traits.rs index cc3ec75..06d9fec 100644 --- a/src/markdown/traits.rs +++ b/crates/litchi-markdown/src/traits.rs @@ -1,9 +1,9 @@ -use super::config::MarkdownOptions; +use crate::config::MarkdownOptions; /// Core trait for Markdown conversion. /// /// This module defines the `ToMarkdown` trait that enables types to be /// converted to Markdown format. -use crate::common::Result; +use litchi_core::Result; /// Core trait for types that can be converted to Markdown. /// @@ -12,7 +12,7 @@ use crate::common::Result; /// /// # Examples /// -/// ```rust,no_run +/// ```rust,ignore /// use litchi::{Document, markdown::ToMarkdown}; /// /// # fn main() -> Result<(), litchi::Error> { @@ -34,7 +34,7 @@ pub trait ToMarkdown { /// /// # Examples /// - /// ```rust,no_run + /// ```rust,ignore /// use litchi::{Document, markdown::ToMarkdown}; /// /// # fn main() -> Result<(), litchi::Error> { @@ -55,7 +55,7 @@ pub trait ToMarkdown { /// /// # Examples /// - /// ```rust,no_run + /// ```rust,ignore /// use litchi::{Document, markdown::{ToMarkdown, MarkdownOptions}}; /// /// # fn main() -> Result<(), litchi::Error> { diff --git a/crates/litchi-markdown/src/unicode.rs b/crates/litchi-markdown/src/unicode.rs new file mode 100644 index 0000000..6b8320a --- /dev/null +++ b/crates/litchi-markdown/src/unicode.rs @@ -0,0 +1,390 @@ +//! Unicode superscript and subscript character conversion. +//! +//! This module provides zero-cost compile-time lookup tables for converting +//! regular characters to their Unicode superscript and subscript equivalents. +//! Uses `phf` for efficient perfect hash function lookups. +//! +//! # Unicode Character Coverage +//! +//! ## Superscripts +//! - Digits: 0-9 → ⁰¹²³⁴⁵⁶⁷⁸⁹ +//! - Latin letters: ⁱⁿᵃᵇᶜᵈᵉᶠᵍʰʲᵏˡᵐⁿᵒᵖʳˢᵗᵘᵛʷˣʸᶻ +//! - Greek letters: ᵝᵞᵟᵠᵡ +//! - Symbols: ⁺⁻⁼⁽⁾ +//! +//! ## Subscripts +//! - Digits: 0-9 → ₀₁₂₃₄₅₆₇₈₉ +//! - Latin letters: ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓ +//! - Symbols: ₊₋₌₍₎ +//! +//! # Examples +//! +//! ```rust +//! use litchi_markdown::unicode::{to_superscript, to_subscript}; +//! +//! // Convert single character +//! assert_eq!(to_superscript('2'), Some('²')); +//! assert_eq!(to_subscript('0'), Some('₀')); +//! +//! // Convert string (note: 'x' also has superscript) +//! let superscript = "x2".chars().map(|c| to_superscript(c).unwrap_or(c)).collect::(); +//! assert_eq!(superscript, "ˣ²"); +//! ``` +use phf::phf_map; + +/// Compile-time lookup table for superscript characters. +/// +/// Maps regular characters to their Unicode superscript equivalents. +/// Uses perfect hash function for O(1) lookup with zero runtime cost. +static SUPERSCRIPT_MAP: phf::Map = phf_map! { + // Digits + '0' => '⁰', + '1' => '¹', + '2' => '²', + '3' => '³', + '4' => '⁴', + '5' => '⁵', + '6' => '⁶', + '7' => '⁷', + '8' => '⁸', + '9' => '⁹', + + // Latin lowercase letters + 'a' => 'ᵃ', + 'b' => 'ᵇ', + 'c' => 'ᶜ', + 'd' => 'ᵈ', + 'e' => 'ᵉ', + 'f' => 'ᶠ', + 'g' => 'ᵍ', + 'h' => 'ʰ', + 'i' => 'ⁱ', + 'j' => 'ʲ', + 'k' => 'ᵏ', + 'l' => 'ˡ', + 'm' => 'ᵐ', + 'n' => 'ⁿ', + 'o' => 'ᵒ', + 'p' => 'ᵖ', + 'r' => 'ʳ', + 's' => 'ˢ', + 't' => 'ᵗ', + 'u' => 'ᵘ', + 'v' => 'ᵛ', + 'w' => 'ʷ', + 'x' => 'ˣ', + 'y' => 'ʸ', + 'z' => 'ᶻ', + + // Latin uppercase letters (limited support in Unicode) + 'A' => 'ᴬ', + 'B' => 'ᴮ', + 'D' => 'ᴰ', + 'E' => 'ᴱ', + 'G' => 'ᴳ', + 'H' => 'ᴴ', + 'I' => 'ᴵ', + 'J' => 'ᴶ', + 'K' => 'ᴷ', + 'L' => 'ᴸ', + 'M' => 'ᴹ', + 'N' => 'ᴺ', + 'O' => 'ᴼ', + 'P' => 'ᴾ', + 'R' => 'ᴿ', + 'T' => 'ᵀ', + 'U' => 'ᵁ', + 'V' => 'ᵛ', + 'W' => 'ᵂ', + + // Greek letters + 'β' => 'ᵝ', + 'γ' => 'ᵞ', + 'δ' => 'ᵟ', + 'φ' => 'ᵠ', + 'χ' => 'ᵡ', + + // Symbols + '+' => '⁺', + '-' => '⁻', + '=' => '⁼', + '(' => '⁽', + ')' => '⁾', +}; + +/// Compile-time lookup table for subscript characters. +/// +/// Maps regular characters to their Unicode subscript equivalents. +/// Uses perfect hash function for O(1) lookup with zero runtime cost. +static SUBSCRIPT_MAP: phf::Map = phf_map! { + // Digits + '0' => '₀', + '1' => '₁', + '2' => '₂', + '3' => '₃', + '4' => '₄', + '5' => '₅', + '6' => '₆', + '7' => '₇', + '8' => '₈', + '9' => '₉', + + // Latin lowercase letters (limited support) + 'a' => 'ₐ', + 'e' => 'ₑ', + 'h' => 'ₕ', + 'i' => 'ᵢ', + 'j' => 'ⱼ', + 'k' => 'ₖ', + 'l' => 'ₗ', + 'm' => 'ₘ', + 'n' => 'ₙ', + 'o' => 'ₒ', + 'p' => 'ₚ', + 'r' => 'ᵣ', + 's' => 'ₛ', + 't' => 'ₜ', + 'u' => 'ᵤ', + 'v' => 'ᵥ', + 'x' => 'ₓ', + + // Greek letters + 'β' => 'ᵦ', + 'γ' => 'ᵧ', + 'ρ' => 'ᵨ', + 'φ' => 'ᵩ', + 'χ' => 'ᵪ', + + // Symbols + '+' => '₊', + '-' => '₋', + '=' => '₌', + '(' => '₍', + ')' => '₎', +}; + +/// Convert a character to its Unicode superscript equivalent. +/// +/// Returns `Some(char)` if a superscript equivalent exists, `None` otherwise. +/// +/// # Examples +/// +/// ```rust +/// use litchi_markdown::unicode::to_superscript; +/// +/// assert_eq!(to_superscript('2'), Some('²')); +/// assert_eq!(to_superscript('n'), Some('ⁿ')); +/// assert_eq!(to_superscript('+'), Some('⁺')); +/// assert_eq!(to_superscript('q'), None); // No Unicode superscript for 'q' +/// ``` +/// +/// # Performance +/// +/// This function uses a compile-time perfect hash function for O(1) lookup +/// with zero runtime cost. The lookup table is embedded directly in the binary. +#[inline] +pub fn to_superscript(c: char) -> Option { + SUPERSCRIPT_MAP.get(&c).copied() +} + +/// Convert a character to its Unicode subscript equivalent. +/// +/// Returns `Some(char)` if a subscript equivalent exists, `None` otherwise. +/// +/// # Examples +/// +/// ```rust +/// use litchi_markdown::unicode::to_subscript; +/// +/// assert_eq!(to_subscript('0'), Some('₀')); +/// assert_eq!(to_subscript('i'), Some('ᵢ')); +/// assert_eq!(to_subscript('+'), Some('₊')); +/// assert_eq!(to_subscript('b'), None); // No Unicode subscript for 'b' +/// ``` +/// +/// # Performance +/// +/// This function uses a compile-time perfect hash function for O(1) lookup +/// with zero runtime cost. The lookup table is embedded directly in the binary. +#[inline] +pub fn to_subscript(c: char) -> Option { + SUBSCRIPT_MAP.get(&c).copied() +} + +/// Convert a string to superscript, falling back to original characters for unsupported ones. +/// +/// This function attempts to convert each character in the input string to its +/// superscript equivalent. Characters without a superscript mapping remain unchanged. +/// +/// # Examples +/// +/// ```rust +/// use litchi_markdown::unicode::convert_to_superscript; +/// +/// // 'x' has superscript, so it converts too +/// assert_eq!(convert_to_superscript("x2"), "ˣ²"); +/// assert_eq!(convert_to_superscript("n+1"), "ⁿ⁺¹"); +/// assert_eq!(convert_to_superscript("2nd"), "²ⁿᵈ"); +/// // Some uppercase letters have superscripts +/// assert_eq!(convert_to_superscript("H2O"), "ᴴ²ᴼ"); +/// // Characters without superscript remain unchanged (e.g., 'C') +/// assert_eq!(convert_to_superscript("CO2"), "Cᴼ²"); +/// ``` +/// +/// # Performance +/// +/// This function pre-allocates the output string with the exact capacity needed, +/// minimizing allocations. Character conversion uses zero-cost lookups. +#[inline] +pub fn convert_to_superscript(text: &str) -> String { + // Pre-allocate with same capacity as input (superscript chars are same byte size or larger) + let mut result = String::with_capacity(text.len() * 2); + + for c in text.chars() { + result.push(to_superscript(c).unwrap_or(c)); + } + + result +} + +/// Convert a string to subscript, falling back to original characters for unsupported ones. +/// +/// This function attempts to convert each character in the input string to its +/// subscript equivalent. Characters without a subscript mapping remain unchanged. +/// +/// # Examples +/// +/// ```rust +/// use litchi_markdown::unicode::convert_to_subscript; +/// +/// // Uppercase 'H' and 'O' have no subscript (only lowercase letters have subscripts) +/// assert_eq!(convert_to_subscript("H2O"), "H₂O"); +/// // Lowercase letters with subscripts convert +/// assert_eq!(convert_to_subscript("h2o"), "ₕ₂ₒ"); +/// // 'x' has subscript, so it converts too +/// assert_eq!(convert_to_subscript("x0"), "ₓ₀"); +/// assert_eq!(convert_to_subscript("a+b"), "ₐ₊b"); // 'b' has no subscript +/// ``` +/// +/// # Performance +/// +/// This function pre-allocates the output string with the exact capacity needed, +/// minimizing allocations. Character conversion uses zero-cost lookups. +#[inline] +pub fn convert_to_subscript(text: &str) -> String { + // Pre-allocate with same capacity as input (subscript chars are same byte size or larger) + let mut result = String::with_capacity(text.len() * 2); + + for c in text.chars() { + result.push(to_subscript(c).unwrap_or(c)); + } + + result +} + +/// Check if all characters in a string can be converted to superscript. +/// +/// Returns `true` if all characters have Unicode superscript equivalents. +/// +/// # Examples +/// +/// ```rust +/// use litchi_markdown::unicode::can_convert_to_superscript; +/// +/// assert!(can_convert_to_superscript("123")); +/// assert!(can_convert_to_superscript("n+1")); +/// assert!(!can_convert_to_superscript("query")); // 'q' has no superscript +/// ``` +#[inline] +pub fn can_convert_to_superscript(text: &str) -> bool { + text.chars().all(|c| to_superscript(c).is_some()) +} + +/// Check if all characters in a string can be converted to subscript. +/// +/// Returns `true` if all characters have Unicode subscript equivalents. +/// +/// # Examples +/// +/// ```rust +/// use litchi_markdown::unicode::can_convert_to_subscript; +/// +/// assert!(can_convert_to_subscript("123")); +/// assert!(can_convert_to_subscript("i+1")); +/// assert!(!can_convert_to_subscript("abc")); // 'b' and 'c' have no subscript +/// ``` +#[inline] +pub fn can_convert_to_subscript(text: &str) -> bool { + text.chars().all(|c| to_subscript(c).is_some()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_superscript_digits() { + assert_eq!(to_superscript('0'), Some('⁰')); + assert_eq!(to_superscript('1'), Some('¹')); + assert_eq!(to_superscript('2'), Some('²')); + assert_eq!(to_superscript('9'), Some('⁹')); + } + + #[test] + fn test_subscript_digits() { + assert_eq!(to_subscript('0'), Some('₀')); + assert_eq!(to_subscript('1'), Some('₁')); + assert_eq!(to_subscript('2'), Some('₂')); + assert_eq!(to_subscript('9'), Some('₉')); + } + + #[test] + fn test_superscript_letters() { + assert_eq!(to_superscript('n'), Some('ⁿ')); + assert_eq!(to_superscript('i'), Some('ⁱ')); + assert_eq!(to_superscript('x'), Some('ˣ')); + assert_eq!(to_superscript('q'), None); // No superscript for 'q' + } + + #[test] + fn test_subscript_letters() { + assert_eq!(to_subscript('i'), Some('ᵢ')); + assert_eq!(to_subscript('n'), Some('ₙ')); + assert_eq!(to_subscript('b'), None); // No subscript for 'b' + } + + #[test] + fn test_convert_to_superscript() { + // 'x' has a superscript equivalent 'ˣ', so it gets converted + assert_eq!(convert_to_superscript("x2"), "ˣ²"); + assert_eq!(convert_to_superscript("n+1"), "ⁿ⁺¹"); + assert_eq!(convert_to_superscript("2nd"), "²ⁿᵈ"); + // Uppercase H and O have superscript forms, so they convert + assert_eq!(convert_to_superscript("H2O"), "ᴴ²ᴼ"); + assert_eq!(convert_to_superscript("y=mx+b"), "ʸ⁼ᵐˣ⁺ᵇ"); // all have superscripts + // Characters without superscript remain unchanged + assert_eq!(convert_to_superscript("C6H12O6"), "C⁶ᴴ¹²ᴼ⁶"); // 'C' has no superscript + } + + #[test] + fn test_convert_to_subscript() { + // Uppercase 'H' and 'O' have no subscript, only '2' converts + assert_eq!(convert_to_subscript("H2O"), "H₂O"); + // Lowercase 'h', '2', and 'o' all have subscripts + assert_eq!(convert_to_subscript("h2o"), "ₕ₂ₒ"); + // 'x' has a subscript equivalent 'ₓ', so it gets converted + assert_eq!(convert_to_subscript("x0"), "ₓ₀"); + // Characters without subscript remain unchanged + assert_eq!(convert_to_subscript("abc"), "ₐbc"); // only 'a' has subscript + } + + #[test] + fn test_can_convert_checks() { + assert!(can_convert_to_superscript("123")); + assert!(can_convert_to_superscript("n+1")); + assert!(!can_convert_to_superscript("query")); + + assert!(can_convert_to_subscript("123")); + assert!(!can_convert_to_subscript("abc")); + } +} diff --git a/crates/litchi-odf/Cargo.toml b/crates/litchi-odf/Cargo.toml new file mode 100644 index 0000000..0637bcb --- /dev/null +++ b/crates/litchi-odf/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "litchi-odf" +description = "OpenDocument Format (ODF) parser for the Litchi office-formats library." +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +atoi_simd = { workspace = true } +chrono = { workspace = true } +fast-float2 = { workspace = true } +litchi-core = { workspace = true, features = ["odf"] } +memchr = { workspace = true } +phf = { workspace = true } +quick-xml = { workspace = true } +smallvec = { workspace = true } +soapberry-zip = { workspace = true } +xml-minifier = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } +zip = { version = "8", default-features = false, features = ["deflate"] } diff --git a/crates/litchi-odf/README.md b/crates/litchi-odf/README.md new file mode 100644 index 0000000..23af0e8 --- /dev/null +++ b/crates/litchi-odf/README.md @@ -0,0 +1,39 @@ +# litchi-odf + +OpenDocument Format (ODF) reader and writer for `.odt`, `.ods`, and `.odp` files. + +## Overview + +`litchi-odf` parses and produces OpenDocument files conforming to ISO/IEC 26300 (ODF 1.2): text documents (`.odt`), spreadsheets (`.ods`), and presentations (`.odp`). It includes a ZIP-based package layer, XML element model for content / styles / metadata, and high-level types for each document family. Builders and `Mutable*` views are provided for creating and editing files in addition to read-only access. + +## Usage + +```toml +[dependencies] +litchi-odf = "0.0.1" +``` + +```rust +use litchi_odf::{Document, Spreadsheet, Presentation}; + +let mut doc = Document::open("document.odt")?; +let text = doc.text()?; + +let mut sheet = Spreadsheet::open("data.ods")?; +let csv = sheet.to_csv()?; + +let mut pres = Presentation::open("slides.odp")?; +let slides = pres.slide_count()?; +# Ok::<(), litchi_core::Error>(()) +``` + +## Features + +- Read and write ODF text documents (`.odt`) with paragraphs, lists, tables, and styles +- Read and write ODF spreadsheets (`.ods`) with typed cell values and formulas +- Read and write ODF presentations (`.odp`) with slides and shapes +- Metadata extraction (title, author, statistics) for all three formats + +## License + +Licensed under the Apache License, Version 2.0. Part of the [Litchi](https://github.com/DevExzh/litchi) workspace. diff --git a/crates/litchi-odf/examples/read_odp.rs b/crates/litchi-odf/examples/read_odp.rs new file mode 100644 index 0000000..2c592f0 --- /dev/null +++ b/crates/litchi-odf/examples/read_odp.rs @@ -0,0 +1,65 @@ +//! Read an OpenDocument Presentation (`.odp`) file, print the slide count, +//! and dump per-slide title and text content. +//! +//! If a path argument is supplied, that file is opened. Otherwise the example +//! creates a small ODP in a tempfile via [`PresentationBuilder`] and reads it +//! back. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-odf --example read_odp +//! cargo run -p litchi-odf --example read_odp -- path/to/file.odp +//! ``` + +use std::path::PathBuf; + +use litchi_odf::{Presentation, PresentationBuilder}; +use tempfile::NamedTempFile; + +fn main() -> Result<(), Box> { + let (path, _tempfile_guard): (PathBuf, Option) = match std::env::args().nth(1) { + Some(arg) => (PathBuf::from(arg), None), + None => { + println!("No path provided; creating a fresh ODP via PresentationBuilder..."); + let tmp = NamedTempFile::with_suffix(".odp")?; + let mut builder = PresentationBuilder::new(); + builder.add_slide_with_title( + "litchi-odf example", + "This presentation was created by the read_odp example.", + )?; + builder.add_slide_with_title( + "Slide Two", + "Demonstrates a build-then-read round trip\n\ + with multiple lines of body text.", + )?; + builder.add_slide_with_title("Final Slide", "Thanks for trying litchi-odf!")?; + let path = tmp.path().to_path_buf(); + builder.save(&path)?; + (path, Some(tmp)) + }, + }; + + println!("Opening: {}", path.display()); + let pres = Presentation::open(&path)?; + + let slide_count = pres.slide_count()?; + println!("Slide count: {}", slide_count); + + let slides = pres.slides()?; + for slide in &slides { + let title = slide.title()?.unwrap_or(""); + let body = slide.text()?; + println!("\n--- Slide {} ---", slide.index() + 1); + println!(" title: {}", title); + if !body.is_empty() { + println!(" text: {}", body.replace('\n', "\n ")); + } + let shapes = slide.shapes()?; + if !shapes.is_empty() { + println!(" shapes: {}", shapes.len()); + } + } + + // tempfile (if any) is dropped here. + Ok(()) +} diff --git a/crates/litchi-odf/examples/read_odt.rs b/crates/litchi-odf/examples/read_odt.rs new file mode 100644 index 0000000..1227890 --- /dev/null +++ b/crates/litchi-odf/examples/read_odt.rs @@ -0,0 +1,57 @@ +//! Read an OpenDocument Text (`.odt`) file and print its text content. +//! +//! If a path argument is supplied, that file is opened. Otherwise the example +//! creates a small ODT in a tempfile via [`DocumentBuilder`] and reads it back. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-odf --example read_odt +//! cargo run -p litchi-odf --example read_odt -- path/to/file.odt +//! ``` + +use std::path::PathBuf; + +use litchi_odf::{Document, DocumentBuilder}; +use tempfile::NamedTempFile; + +fn main() -> Result<(), Box> { + // Either use a user-supplied path, or build a fresh ODT in a tempfile. + let (path, _tempfile_guard): (PathBuf, Option) = match std::env::args().nth(1) { + Some(arg) => (PathBuf::from(arg), None), + None => { + println!("No path provided; creating a fresh ODT via DocumentBuilder..."); + let tmp = NamedTempFile::with_suffix(".odt")?; + let mut builder = DocumentBuilder::new(); + builder.add_heading("litchi-odf example", 1)?; + builder.add_paragraph("This document was created by the read_odt example.")?; + builder.add_paragraph("It demonstrates a simple build-then-read round trip.")?; + builder.add_bulleted_list(vec!["First bullet", "Second bullet", "Third bullet"])?; + builder.add_heading("Conclusion", 2)?; + builder.add_paragraph("Reading round-trips text content successfully.")?; + // `save` consumes the builder, so use the tempfile path explicitly. + let path = tmp.path().to_path_buf(); + builder.save(&path)?; + (path, Some(tmp)) + }, + }; + + println!("Opening: {}", path.display()); + let doc = Document::open(&path)?; + + // Full text extraction. + let text = doc.text()?; + println!("\n--- Full text ({} chars) ---", text.chars().count()); + println!("{}", text); + + // Per-paragraph view. + let paragraphs = doc.paragraphs()?; + println!("\n--- Paragraphs: {} ---", paragraphs.len()); + for (i, para) in paragraphs.iter().take(10).enumerate() { + let body = para.text().unwrap_or_default(); + let style = para.style_name().unwrap_or(""); + println!(" [{}] style={:?} text={:?}", i + 1, style, body); + } + + // Tempfile (if any) is dropped here, deleting the file automatically. + Ok(()) +} diff --git a/crates/litchi-odf/examples/write_ods.rs b/crates/litchi-odf/examples/write_ods.rs new file mode 100644 index 0000000..53e0879 --- /dev/null +++ b/crates/litchi-odf/examples/write_ods.rs @@ -0,0 +1,71 @@ +//! Build a small `.ods` spreadsheet, save it to a tempfile, then reopen it +//! and print the CSV representation. +//! +//! Run with: +//! ```bash +//! cargo run -p litchi-odf --example write_ods +//! ``` + +use litchi_odf::{CellValue, Spreadsheet, SpreadsheetBuilder}; +use tempfile::NamedTempFile; + +fn main() -> Result<(), Box> { + // Create a tempfile that will be cleaned up when this binding drops. + let tmp = NamedTempFile::with_suffix(".ods")?; + let path = tmp.path().to_path_buf(); + + // ---- Build phase ---- + println!("Building ODS at: {}", path.display()); + let mut builder = SpreadsheetBuilder::new(); + builder.add_sheet("Demo")?; + + // Header row. + builder.add_row_with_values(&["Item", "Quantity", "Price", "Total"])?; + + // Data rows. Row indices below are 0-based, so row 1 is the first data row + // (spreadsheet row 2 in A1 notation). + builder.add_row_with_cell_values(&[ + CellValue::Text("Apples".to_string()), + CellValue::Number(10.0), + CellValue::Number(0.5), + CellValue::Empty, + ])?; + builder.add_row_with_cell_values(&[ + CellValue::Text("Bread".to_string()), + CellValue::Number(2.0), + CellValue::Number(2.25), + CellValue::Empty, + ])?; + builder.add_row_with_cell_values(&[ + CellValue::Text("Cheese".to_string()), + CellValue::Number(1.0), + CellValue::Number(4.99), + CellValue::Empty, + ])?; + + // Per-row Total formulas. + builder.set_cell_formula(1, 3, "of:=B2*C2")?; + builder.set_cell_formula(2, 3, "of:=B3*C3")?; + builder.set_cell_formula(3, 3, "of:=B4*C4")?; + + // Grand-total row using SUM. + builder.add_row_with_values(&["Grand Total", "", "", ""])?; + builder.set_cell_formula(4, 3, "of:=SUM(D2:D4)")?; + + builder.save(&path)?; + println!( + "Saved spreadsheet ({} bytes).", + std::fs::metadata(&path)?.len() + ); + + // ---- Read-back phase ---- + println!("\nReopening for verification..."); + let mut sheet = Spreadsheet::open(&path)?; + println!("Sheet count: {}", sheet.sheet_count()?); + + let csv = sheet.to_csv()?; + println!("\n--- CSV output ---\n{}", csv); + + // tmp drops here -> file is deleted. + Ok(()) +} diff --git a/crates/litchi-odf/fuzz/.gitignore b/crates/litchi-odf/fuzz/.gitignore new file mode 100644 index 0000000..1a45eee --- /dev/null +++ b/crates/litchi-odf/fuzz/.gitignore @@ -0,0 +1,4 @@ +target +corpus +artifacts +coverage diff --git a/crates/litchi-odf/fuzz/Cargo.toml b/crates/litchi-odf/fuzz/Cargo.toml new file mode 100644 index 0000000..aaed7fc --- /dev/null +++ b/crates/litchi-odf/fuzz/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "litchi-odf-fuzz" +version = "0.0.0" +edition = "2024" +publish = false +authors = ["Ryker Zhu "] +license = "Apache-2.0" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +litchi-odf = { path = ".." } + +[[bin]] +name = "parse_odt" +path = "fuzz_targets/parse_odt.rs" +test = false +doc = false +bench = false + +[profile.release] +debug = 1 +codegen-units = 1 +lto = "thin" + +[workspace] diff --git a/crates/litchi-odf/fuzz/fuzz_targets/parse_odt.rs b/crates/litchi-odf/fuzz/fuzz_targets/parse_odt.rs new file mode 100644 index 0000000..ec51cb0 --- /dev/null +++ b/crates/litchi-odf/fuzz/fuzz_targets/parse_odt.rs @@ -0,0 +1,9 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + if let Ok(doc) = litchi_odf::Document::from_bytes(data.to_vec()) { + let _ = doc.text(); + } +}); diff --git a/src/odf/constants.rs b/crates/litchi-odf/src/constants.rs similarity index 97% rename from src/odf/constants.rs rename to crates/litchi-odf/src/constants.rs index 1250fd1..8cb74a4 100644 --- a/src/odf/constants.rs +++ b/crates/litchi-odf/src/constants.rs @@ -182,7 +182,7 @@ pub const ODF_CLASSES: [&str; 12] = [ /// # Examples /// /// ``` -/// use litchi::odf::constants::get_mime_type_from_extension; +/// use litchi_odf::constants::get_mime_type_from_extension; /// /// let mime = get_mime_type_from_extension("odt"); /// assert_eq!(mime, Some("application/vnd.oasis.opendocument.text")); @@ -205,7 +205,7 @@ pub fn get_mime_type_from_extension(extension: &str) -> Option<&'static str> { /// # Examples /// /// ``` -/// use litchi::odf::constants::get_extension_from_mime_type; +/// use litchi_odf::constants::get_extension_from_mime_type; /// /// let ext = get_extension_from_mime_type("application/vnd.oasis.opendocument.text"); /// assert_eq!(ext, Some("odt")); @@ -228,7 +228,7 @@ pub fn get_extension_from_mime_type(mime_type: &str) -> Option<&'static str> { /// # Examples /// /// ``` -/// use litchi::odf::constants::is_odf_extension; +/// use litchi_odf::constants::is_odf_extension; /// /// assert!(is_odf_extension("odt")); /// assert!(is_odf_extension("ods")); @@ -252,7 +252,7 @@ pub fn is_odf_extension(extension: &str) -> bool { /// # Examples /// /// ``` -/// use litchi::odf::constants::is_odf_mime_type; +/// use litchi_odf::constants::is_odf_mime_type; /// /// assert!(is_odf_mime_type("application/vnd.oasis.opendocument.text")); /// assert!(!is_odf_mime_type("text/plain")); diff --git a/src/odf/coordinates.rs b/crates/litchi-odf/src/coordinates.rs similarity index 92% rename from src/odf/coordinates.rs rename to crates/litchi-odf/src/coordinates.rs index 34591d8..5401f29 100644 --- a/src/odf/coordinates.rs +++ b/crates/litchi-odf/src/coordinates.rs @@ -16,7 +16,7 @@ //! //! - odfdo: `3rdparty/odfdo/src/odfdo/utils/coordinates.py` -use crate::Result; +use litchi_core::Result; use std::fmt; use std::str::FromStr; @@ -33,7 +33,7 @@ use std::str::FromStr; /// # Examples /// /// ``` -/// use litchi::odf::coordinates::alpha_to_digit; +/// use litchi_odf::coordinates::alpha_to_digit; /// /// assert_eq!(alpha_to_digit("A").unwrap(), 0); /// assert_eq!(alpha_to_digit("Z").unwrap(), 25); @@ -42,7 +42,7 @@ use std::str::FromStr; /// ``` pub fn alpha_to_digit(alpha: &str) -> Result { if alpha.is_empty() || !alpha.chars().all(|c| c.is_ascii_alphabetic()) { - return Err(crate::Error::Other(format!( + return Err(litchi_core::Error::Other(format!( "Column value '{}' is malformed, must contain only letters", alpha ))); @@ -70,7 +70,7 @@ pub fn alpha_to_digit(alpha: &str) -> Result { /// # Examples /// /// ``` -/// use litchi::odf::coordinates::digit_to_alpha; +/// use litchi_odf::coordinates::digit_to_alpha; /// /// assert_eq!(digit_to_alpha(0), "A"); /// assert_eq!(digit_to_alpha(25), "Z"); @@ -95,7 +95,7 @@ pub fn digit_to_alpha(mut digit: usize) -> String { /// # Examples /// /// ``` -/// use litchi::odf::coordinates::CellCoord; +/// use litchi_odf::coordinates::CellCoord; /// /// let coord = CellCoord::new(0, 0); // A1 /// assert_eq!(coord.to_string(), "A1"); @@ -142,7 +142,7 @@ impl CellCoord { /// # Examples /// /// ``` - /// use litchi::odf::coordinates::CellCoord; + /// use litchi_odf::coordinates::CellCoord; /// /// let coord = CellCoord::new(0, 0); /// assert_eq!(coord.to_a1(), "A1"); @@ -156,14 +156,14 @@ impl CellCoord { } impl FromStr for CellCoord { - type Err = crate::Error; + type Err = litchi_core::Error; /// Parse cell coordinate from A1 notation /// /// # Examples /// /// ``` - /// use litchi::odf::coordinates::CellCoord; + /// use litchi_odf::coordinates::CellCoord; /// /// let coord: CellCoord = "A1".parse().unwrap(); /// assert_eq!(coord.column(), 0); @@ -192,7 +192,7 @@ impl FromStr for CellCoord { } if alpha.is_empty() { - return Err(crate::Error::Other(format!( + return Err(litchi_core::Error::Other(format!( "No column letter found in '{}'", s ))); @@ -201,7 +201,7 @@ impl FromStr for CellCoord { // Extract numeric part let numeric = &s[rest_start..]; if numeric.is_empty() { - return Err(crate::Error::Other(format!( + return Err(litchi_core::Error::Other(format!( "No row number found in '{}'", s ))); @@ -209,11 +209,13 @@ impl FromStr for CellCoord { let column = alpha_to_digit(&alpha)?; let row: usize = numeric.parse().map_err(|_| { - crate::Error::Other(format!("Failed to parse row number from '{}'", numeric)) + litchi_core::Error::Other(format!("Failed to parse row number from '{}'", numeric)) })?; if row == 0 { - return Err(crate::Error::Other("Row number must be >= 1".to_string())); + return Err(litchi_core::Error::Other( + "Row number must be >= 1".to_string(), + )); } Ok(Self::new(column, row - 1)) @@ -231,7 +233,7 @@ impl fmt::Display for CellCoord { /// # Examples /// /// ``` -/// use litchi::odf::coordinates::{CellCoord, CellRange}; +/// use litchi_odf::coordinates::{CellCoord, CellRange}; /// /// let range = CellRange::new( /// CellCoord::new(0, 0), @@ -280,7 +282,7 @@ impl CellRange { /// # Examples /// /// ``` - /// use litchi::odf::coordinates::CellRange; + /// use litchi_odf::coordinates::CellRange; /// /// let range: CellRange = "A1:C3".parse().unwrap(); /// assert_eq!(range.width(), 3); @@ -299,7 +301,7 @@ impl CellRange { /// # Examples /// /// ``` - /// use litchi::odf::coordinates::CellRange; + /// use litchi_odf::coordinates::CellRange; /// /// let range: CellRange = "A1:C3".parse().unwrap(); /// assert_eq!(range.height(), 3); @@ -315,14 +317,14 @@ impl CellRange { } impl FromStr for CellRange { - type Err = crate::Error; + type Err = litchi_core::Error; /// Parse cell range from A1:B3 notation /// /// # Examples /// /// ``` - /// use litchi::odf::coordinates::CellRange; + /// use litchi_odf::coordinates::CellRange; /// /// let range: CellRange = "A1:B3".parse().unwrap(); /// assert_eq!(range.start().column(), 0); @@ -334,7 +336,7 @@ impl FromStr for CellRange { let parts: Vec<&str> = s.split(':').collect(); if parts.len() != 2 { - return Err(crate::Error::Other(format!( + return Err(litchi_core::Error::Other(format!( "Invalid range format '{}', expected 'A1:B3'", s ))); diff --git a/src/odf/core/manifest.rs b/crates/litchi-odf/src/core/manifest.rs similarity index 99% rename from src/odf/core/manifest.rs rename to crates/litchi-odf/src/core/manifest.rs index 2a737eb..16df191 100644 --- a/src/odf/core/manifest.rs +++ b/crates/litchi-odf/src/core/manifest.rs @@ -3,7 +3,7 @@ //! The manifest.xml file contains metadata about all files in the ODF package, //! including their MIME types, sizes, and encryption status. -use crate::common::{Error, Result}; +use litchi_core::{Error, Result}; use soapberry_zip::office::ArchiveReader; use std::collections::HashMap; diff --git a/crates/litchi-odf/src/core/metadata.rs b/crates/litchi-odf/src/core/metadata.rs new file mode 100644 index 0000000..58fe46e --- /dev/null +++ b/crates/litchi-odf/src/core/metadata.rs @@ -0,0 +1,611 @@ +//! ODF metadata parsing functionality. +//! +//! This module provides comprehensive parsing of ODF metadata from meta.xml, +//! including document properties, statistics, and user information. + +use chrono::{DateTime, Utc}; +use litchi_core::{Error, Metadata, Result}; +use quick_xml::events::Event; +use std::collections::HashMap; + +/// Comprehensive ODF metadata +#[derive(Debug, Clone, Default)] +pub struct OdfMetadata { + /// Document title + pub title: Option, + /// Document description + pub description: Option, + /// Document subject + pub subject: Option, + /// Document keywords + pub keywords: Vec, + /// Document creator/author + pub creator: Option, + /// Document language + pub language: Option, + /// Creation date + pub creation_date: Option, + /// Last modification date + pub modification_date: Option, + /// Generator application + pub generator: Option, + /// Document statistics + pub statistics: DocumentStatistics, + /// Custom properties + pub custom_properties: HashMap, +} + +/// Document statistics from metadata +#[derive(Debug, Clone, Default)] +pub struct DocumentStatistics { + /// Number of pages + pub page_count: Option, + /// Number of paragraphs + pub paragraph_count: Option, + /// Number of words + pub word_count: Option, + /// Number of characters + pub character_count: Option, + /// Number of tables + pub table_count: Option, + /// Number of images + pub image_count: Option, + /// Number of objects + pub object_count: Option, +} + +impl OdfMetadata { + /// Parse metadata from meta.xml content + pub fn from_xml(xml_content: &str) -> Result { + use quick_xml::Reader; + use quick_xml::events::Event; + + let mut reader = Reader::from_str(xml_content); + let mut buf = Vec::new(); + let mut metadata = OdfMetadata::default(); + let mut current_element = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) => { + let name = e.name(); + let name_str = String::from_utf8(name.as_ref().to_vec()).unwrap_or_default(); + + current_element.push(name_str); + + match name.as_ref() { + b"dc:title" => { + metadata.title = Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"dc:description" => { + metadata.description = + Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"dc:subject" => { + metadata.subject = Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"meta:keyword" => { + if let Some(keyword) = + Self::extract_text_content(&mut reader, &mut buf)? + { + metadata.keywords.push(keyword); + } + }, + b"dc:creator" => { + metadata.creator = Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"dc:language" => { + metadata.language = Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"meta:creation-date" => { + metadata.creation_date = + Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"dc:date" => { + metadata.modification_date = + Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"meta:generator" => { + metadata.generator = Self::extract_text_content(&mut reader, &mut buf)?; + }, + b"meta:document-statistic" => { + metadata.statistics = Self::parse_document_statistics(e)?; + }, + b"meta:user-defined" => { + let mut temp_buf = Vec::new(); + if let Some((key, value)) = + Self::parse_user_defined_property(e, &mut reader, &mut temp_buf)? + { + metadata.custom_properties.insert(key, value); + } + }, + _ => {}, + } + }, + Ok(Event::End(ref e)) => { + if let Some(last) = current_element.last() + && last.as_bytes() == e.name().as_ref() + { + current_element.pop(); + } + }, + Ok(Event::Eof) => break, + Err(e) => { + return Err(Error::InvalidFormat(format!( + "XML parsing error in metadata: {}", + e + ))); + }, + _ => {}, + } + buf.clear(); + } + + Ok(metadata) + } + + /// Extract text content from current element + fn extract_text_content( + reader: &mut quick_xml::Reader<&[u8]>, + buf: &mut Vec, + ) -> Result> { + let mut content = String::new(); + let mut depth = 0; + + loop { + match reader.read_event_into(buf) { + Ok(Event::Start(_)) => { + depth += 1; + }, + Ok(Event::Text(ref t)) if depth == 0 => { + content.push_str(&String::from_utf8(t.to_vec()).unwrap_or_default()); + }, + Ok(Event::End(_)) => { + if depth == 0 { + break; + } + depth -= 1; + }, + Ok(Event::Eof) => break, + _ => {}, + } + } + + let trimmed = content.trim(); + if trimmed.is_empty() { + Ok(None) + } else { + Ok(Some(trimmed.to_string())) + } + } + + /// Parse document statistics + fn parse_document_statistics(e: &quick_xml::events::BytesStart) -> Result { + let mut stats = DocumentStatistics::default(); + + for attr_result in e.attributes() { + let attr = attr_result.map_err(|_| { + Error::InvalidFormat("Invalid attribute in document statistics".to_string()) + })?; + let value_str = String::from_utf8(attr.value.to_vec()).map_err(|_| { + Error::InvalidFormat("Invalid UTF-8 in document statistics".to_string()) + })?; + + if let Ok(value) = value_str.parse::() { + match attr.key.as_ref() { + b"meta:page-count" => stats.page_count = Some(value), + b"meta:paragraph-count" => stats.paragraph_count = Some(value), + b"meta:word-count" => stats.word_count = Some(value), + b"meta:character-count" => stats.character_count = Some(value), + b"meta:table-count" => stats.table_count = Some(value), + b"meta:image-count" => stats.image_count = Some(value), + b"meta:object-count" => stats.object_count = Some(value), + _ => {}, + } + } + } + + Ok(stats) + } + + /// Parse user-defined property + fn parse_user_defined_property( + e: &quick_xml::events::BytesStart, + reader: &mut quick_xml::Reader<&[u8]>, + buf: &mut Vec, + ) -> Result> { + let mut name = None; + + // Get property name from attributes + for attr_result in e.attributes() { + let attr = attr_result.map_err(|_| { + Error::InvalidFormat("Invalid attribute in user-defined property".to_string()) + })?; + if attr.key.as_ref() == b"meta:name" { + name = Some(String::from_utf8(attr.value.to_vec()).map_err(|_| { + Error::InvalidFormat("Invalid UTF-8 in property name".to_string()) + })?); + break; + } + } + + if let Some(name) = name { + if let Some(value) = Self::extract_text_content(reader, buf)? { + Ok(Some((name, value))) + } else { + Ok(None) + } + } else { + Ok(None) + } + } +} + +impl OdfMetadata { + /// Parse a date string into DateTime + fn parse_date(date_str: Option) -> Option> { + date_str.and_then(|s| { + // Try different date formats that ODF might use + // ISO 8601 format: 2023-10-15T14:30:00Z or 2023-10-15T14:30:00.000Z + if let Ok(dt) = DateTime::parse_from_rfc3339(&s) { + Some(dt.into()) + } else if let Ok(dt) = DateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.fZ") { + Some(dt.into()) + } else if let Ok(dt) = DateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%SZ") { + Some(dt.into()) + } else { + // Try simpler date format + DateTime::parse_from_str(&s, "%Y-%m-%d") + .ok() + .map(|dt| dt.into()) + } + }) + } +} + +impl From for Metadata { + fn from(odf_meta: OdfMetadata) -> Self { + Metadata { + title: odf_meta.title, + author: odf_meta.creator, + subject: odf_meta.subject, + keywords: if odf_meta.keywords.is_empty() { + None + } else { + Some(odf_meta.keywords.join(", ")) + }, + description: odf_meta.description, + created: OdfMetadata::parse_date(odf_meta.creation_date), + modified: OdfMetadata::parse_date(odf_meta.modification_date), + page_count: odf_meta.statistics.page_count, + word_count: odf_meta.statistics.word_count, + character_count: odf_meta.statistics.character_count, + application: odf_meta.generator, + ..Default::default() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_odf_metadata_default() { + let meta = OdfMetadata::default(); + assert!(meta.title.is_none()); + assert!(meta.description.is_none()); + assert!(meta.subject.is_none()); + assert!(meta.keywords.is_empty()); + assert!(meta.creator.is_none()); + assert!(meta.language.is_none()); + assert!(meta.creation_date.is_none()); + assert!(meta.modification_date.is_none()); + assert!(meta.generator.is_none()); + assert!(meta.custom_properties.is_empty()); + } + + #[test] + fn test_odf_metadata_from_xml_empty() { + let xml = r#" + + + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert!(meta.title.is_none()); + assert!(meta.creator.is_none()); + } + + #[test] + fn test_odf_metadata_from_xml_title() { + let xml = r#" + + + Test Document + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.title, Some("Test Document".to_string())); + } + + #[test] + fn test_odf_metadata_from_xml_creator() { + let xml = r#" + + + John Doe + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.creator, Some("John Doe".to_string())); + } + + #[test] + fn test_odf_metadata_from_xml_description() { + let xml = r#" + + + This is a test document + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!( + meta.description, + Some("This is a test document".to_string()) + ); + } + + #[test] + fn test_odf_metadata_from_xml_subject() { + let xml = r#" + + + Testing + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.subject, Some("Testing".to_string())); + } + + #[test] + fn test_odf_metadata_from_xml_keywords() { + let xml = r#" + + + rust + odf + testing + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.keywords, vec!["rust", "odf", "testing"]); + } + + #[test] + fn test_odf_metadata_from_xml_language() { + let xml = r#" + + + en-US + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.language, Some("en-US".to_string())); + } + + #[test] + fn test_odf_metadata_from_xml_dates() { + let xml = r#" + + + 2024-01-15T10:30:00Z + 2024-03-20T14:45:00Z + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.creation_date, Some("2024-01-15T10:30:00Z".to_string())); + assert_eq!( + meta.modification_date, + Some("2024-03-20T14:45:00Z".to_string()) + ); + } + + #[test] + fn test_odf_metadata_from_xml_generator() { + let xml = r#" + + + LibreOffice/7.0 + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.generator, Some("LibreOffice/7.0".to_string())); + } + + #[test] + fn test_odf_metadata_from_xml_statistics() { + // Note: The parser handles empty document-statistic elements + // Statistics are parsed from attributes on the Start event + let xml = r#" + + + + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + // The statistics parsing happens on Start event with attributes + assert_eq!(meta.statistics.page_count, Some(5)); + assert_eq!(meta.statistics.paragraph_count, Some(42)); + assert_eq!(meta.statistics.word_count, Some(350)); + assert_eq!(meta.statistics.character_count, Some(2100)); + assert_eq!(meta.statistics.table_count, Some(3)); + assert_eq!(meta.statistics.image_count, Some(2)); + assert_eq!(meta.statistics.object_count, Some(1)); + } + + #[test] + fn test_odf_metadata_from_xml_user_defined() { + let xml = r#" + + + Engineering + Alpha + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!( + meta.custom_properties.get("Department"), + Some(&"Engineering".to_string()) + ); + assert_eq!( + meta.custom_properties.get("Project"), + Some(&"Alpha".to_string()) + ); + } + + #[test] + fn test_odf_metadata_from_xml_full() { + let xml = r#" + + + Full Test Document + A comprehensive test + Testing + Test Author + en + 2024-01-01T00:00:00Z + 2024-06-01T00:00:00Z + Test Generator + test + + +"#; + + let meta = OdfMetadata::from_xml(xml).unwrap(); + assert_eq!(meta.title, Some("Full Test Document".to_string())); + assert_eq!(meta.description, Some("A comprehensive test".to_string())); + assert_eq!(meta.subject, Some("Testing".to_string())); + assert_eq!(meta.creator, Some("Test Author".to_string())); + assert_eq!(meta.language, Some("en".to_string())); + assert_eq!(meta.creation_date, Some("2024-01-01T00:00:00Z".to_string())); + assert_eq!( + meta.modification_date, + Some("2024-06-01T00:00:00Z".to_string()) + ); + assert_eq!(meta.generator, Some("Test Generator".to_string())); + assert_eq!(meta.keywords, vec!["test"]); + assert_eq!(meta.statistics.page_count, Some(10)); + } + + #[test] + fn test_document_statistics_default() { + let stats = DocumentStatistics::default(); + assert!(stats.page_count.is_none()); + assert!(stats.paragraph_count.is_none()); + assert!(stats.word_count.is_none()); + assert!(stats.character_count.is_none()); + assert!(stats.table_count.is_none()); + assert!(stats.image_count.is_none()); + assert!(stats.object_count.is_none()); + } + + #[test] + fn test_parse_date_iso8601() { + let date = OdfMetadata::parse_date(Some("2024-03-15T14:30:00Z".to_string())); + assert!(date.is_some()); + } + + #[test] + fn test_parse_date_rfc3339() { + let date = OdfMetadata::parse_date(Some("2024-03-15T00:00:00+00:00".to_string())); + assert!(date.is_some()); + } + + #[test] + fn test_parse_date_none() { + let date = OdfMetadata::parse_date(None); + assert!(date.is_none()); + } + + #[test] + fn test_parse_date_invalid() { + let date = OdfMetadata::parse_date(Some("not-a-date".to_string())); + assert!(date.is_none()); + } + + #[test] + fn test_into_metadata_empty() { + let odf = OdfMetadata::default(); + let meta: Metadata = odf.into(); + assert!(meta.title.is_none()); + assert!(meta.author.is_none()); + assert!(meta.keywords.is_none()); + } + + #[test] + fn test_into_metadata_with_data() { + let odf = OdfMetadata { + title: Some("Title".to_string()), + creator: Some("Author".to_string()), + subject: Some("Subject".to_string()), + keywords: vec!["a".to_string(), "b".to_string()], + description: Some("Desc".to_string()), + creation_date: Some("2024-01-01T00:00:00Z".to_string()), + modification_date: Some("2024-06-01T00:00:00Z".to_string()), + generator: Some("App".to_string()), + statistics: DocumentStatistics { + page_count: Some(5), + word_count: Some(100), + character_count: Some(500), + ..Default::default() + }, + ..Default::default() + }; + + let meta: Metadata = odf.into(); + assert_eq!(meta.title, Some("Title".to_string())); + assert_eq!(meta.author, Some("Author".to_string())); + assert_eq!(meta.subject, Some("Subject".to_string())); + assert_eq!(meta.keywords, Some("a, b".to_string())); + assert_eq!(meta.description, Some("Desc".to_string())); + assert_eq!(meta.page_count, Some(5)); + assert_eq!(meta.word_count, Some(100)); + assert_eq!(meta.character_count, Some(500)); + assert_eq!(meta.application, Some("App".to_string())); + assert!(meta.created.is_some()); + assert!(meta.modified.is_some()); + } + + #[test] + fn test_into_metadata_no_keywords() { + let odf = OdfMetadata { + keywords: vec![], + ..Default::default() + }; + + let meta: Metadata = odf.into(); + assert!(meta.keywords.is_none()); + } +} diff --git a/src/odf/core/mod.rs b/crates/litchi-odf/src/core/mod.rs similarity index 100% rename from src/odf/core/mod.rs rename to crates/litchi-odf/src/core/mod.rs diff --git a/crates/litchi-odf/src/core/package.rs b/crates/litchi-odf/src/core/package.rs new file mode 100644 index 0000000..618ab6e --- /dev/null +++ b/crates/litchi-odf/src/core/package.rs @@ -0,0 +1,502 @@ +//! ODF package (ZIP archive) handling functionality. +//! +//! This module provides utilities for working with ODF files as ZIP archives, +//! including reading files, checking existence, and basic package operations. +//! +//! Uses soapberry-zip for high-performance zero-copy ZIP parsing. + +use litchi_core::{Error, Result}; +use soapberry_zip::office::ArchiveReader; +use std::io::Read; + +/// An ODF package (ZIP file containing XML documents) +/// +/// Uses soapberry-zip for efficient lazy decompression. +pub struct Package<'data> { + archive: ArchiveReader<'data>, + #[allow(dead_code)] + manifest: super::manifest::Manifest, + mimetype: String, +} + +/// Owned version of Package that owns the data buffer. +pub struct OwnedPackage { + data: Vec, +} + +#[allow(dead_code)] +impl OwnedPackage { + /// Open an ODF package from a reader + pub fn from_reader(mut reader: R) -> Result { + let mut data = Vec::new(); + reader.read_to_end(&mut data)?; + + // Validate the archive can be parsed + let _ = ArchiveReader::new(&data) + .map_err(|_| Error::InvalidFormat("Invalid ZIP archive".to_string()))?; + + Ok(Self { data }) + } + + /// Create an ODF package from bytes + pub fn from_bytes(data: Vec) -> Result { + // Validate the archive can be parsed + let _ = ArchiveReader::new(&data) + .map_err(|_| Error::InvalidFormat("Invalid ZIP archive".to_string()))?; + + Ok(Self { data }) + } + + /// Get a borrowed Package for accessing archive contents + pub fn package(&self) -> Result> { + Package::new(&self.data) + } + + /// Get the underlying data + pub fn into_inner(self) -> Vec { + self.data + } + + /// Get a reference to the underlying data + pub fn as_bytes(&self) -> &[u8] { + &self.data + } + + // Convenience methods that delegate to Package + + /// Get the MIME type from the mimetype file + pub fn mimetype(&self) -> Result { + let package = self.package()?; + Ok(package.mimetype().to_string()) + } + + /// Get a file from the package by path + pub fn get_file(&self, path: &str) -> Result> { + let package = self.package()?; + package.get_file(path) + } + + /// Check if a file exists in the package + pub fn has_file(&self, path: &str) -> Result { + let package = self.package()?; + Ok(package.has_file(path)) + } + + /// List all files in the package + pub fn files(&self) -> Result> { + let package = self.package()?; + package.files() + } + + /// Get all embedded media files from the package. + pub fn media_files(&self) -> Result> { + let package = self.package()?; + package.media_files() + } +} + +impl<'data> Package<'data> { + /// Create a new Package from a byte slice + pub fn new(data: &'data [u8]) -> Result { + let archive = ArchiveReader::new(data) + .map_err(|_| Error::InvalidFormat("Invalid ZIP archive".to_string()))?; + + // Read MIME type from mimetype file + let mimetype = archive + .read_string("mimetype") + .map_err(|_| Error::InvalidFormat("No mimetype file found in ODF package".to_string()))? + .trim() + .to_string(); + + // Parse the manifest + let manifest = super::manifest::Manifest::from_archive_reader(&archive)?; + + Ok(Self { + archive, + manifest, + mimetype, + }) + } + + /// Get the MIME type from the mimetype file + pub fn mimetype(&self) -> &str { + &self.mimetype + } + + /// Get a file from the package by path + pub fn get_file(&self, path: &str) -> Result> { + self.archive + .read(path) + .map_err(|_| Error::InvalidFormat(format!("File not found: {}", path))) + } + + /// Check if a file exists in the package + pub fn has_file(&self, path: &str) -> bool { + self.archive.contains(path) + } + + /// Get the manifest + #[allow(dead_code)] + pub fn manifest(&self) -> &super::manifest::Manifest { + &self.manifest + } + + /// List all files in the package + pub fn files(&self) -> Result> { + Ok(self.archive.file_names().map(String::from).collect()) + } + + /// Get all embedded media files (images, etc.) from the package. + /// + /// This returns paths to all files in the Pictures/ directory and other media directories. + pub fn media_files(&self) -> Result> { + let all_files = self.files()?; + Ok(all_files + .into_iter() + .filter(|path| { + path.starts_with("Pictures/") + || path.starts_with("media/") + || path.starts_with("Object/") + || path.ends_with(".png") + || path.ends_with(".jpg") + || path.ends_with(".jpeg") + || path.ends_with(".gif") + || path.ends_with(".svg") + }) + .collect()) + } + + /// Check if the package contains any media files. + #[allow(dead_code)] // Reserved for future use + pub fn has_media(&self) -> bool { + self.media_files().map(|m| !m.is_empty()).unwrap_or(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + // Helper function to create a minimal ODF package (ZIP with mimetype and manifest) + fn create_test_odf_package(mimetype: &str) -> Vec { + use std::io::Write; + + let mut zip_buffer = Vec::new(); + { + let mut zip = zip::ZipWriter::new(Cursor::new(&mut zip_buffer)); + let options = zip::write::SimpleFileOptions::default() + .compression_method(zip::CompressionMethod::Stored); + + // Write mimetype file (must be first and uncompressed for ODF) + zip.start_file("mimetype", options).unwrap(); + zip.write_all(mimetype.as_bytes()).unwrap(); + + // Write manifest.xml + let manifest_xml = r#" + + + + + +"#; + zip.start_file("META-INF/manifest.xml", options).unwrap(); + zip.write_all(manifest_xml.as_bytes()).unwrap(); + + // Write content.xml + zip.start_file("content.xml", options).unwrap(); + zip.write_all(b"").unwrap(); + + // Write styles.xml + zip.start_file("styles.xml", options).unwrap(); + zip.write_all(b"").unwrap(); + + // Write a picture + zip.start_file("Pictures/image.png", options).unwrap(); + zip.write_all(b"PNG\x89\x50\x4e\x47\x0d\x0a\x1a\x0a") + .unwrap(); + + zip.finish().unwrap(); + } + zip_buffer + } + + fn create_test_ods_package() -> Vec { + create_test_odf_package("application/vnd.oasis.opendocument.spreadsheet") + } + + fn create_test_odp_package() -> Vec { + create_test_odf_package("application/vnd.oasis.opendocument.presentation") + } + + #[test] + fn test_owned_package_from_bytes() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data); + assert!(package.is_ok()); + } + + #[test] + fn test_owned_package_from_reader() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let cursor = Cursor::new(data); + let package = OwnedPackage::from_reader(cursor); + assert!(package.is_ok()); + } + + #[test] + fn test_owned_package_invalid_data() { + let invalid_data = b"not a zip file".to_vec(); + let result = OwnedPackage::from_bytes(invalid_data); + assert!(result.is_err()); + } + + #[test] + fn test_owned_package_into_inner() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data.clone()).unwrap(); + let inner = package.into_inner(); + assert!(!inner.is_empty()); + } + + #[test] + fn test_owned_package_as_bytes() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data.clone()).unwrap(); + let bytes = package.as_bytes(); + assert!(!bytes.is_empty()); + } + + #[test] + fn test_owned_package_mimetype() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data).unwrap(); + assert_eq!( + package.mimetype().unwrap(), + "application/vnd.oasis.opendocument.text" + ); + } + + #[test] + fn test_owned_package_mimetype_ods() { + let data = create_test_ods_package(); + let package = OwnedPackage::from_bytes(data).unwrap(); + assert_eq!( + package.mimetype().unwrap(), + "application/vnd.oasis.opendocument.spreadsheet" + ); + } + + #[test] + fn test_owned_package_mimetype_odp() { + let data = create_test_odp_package(); + let package = OwnedPackage::from_bytes(data).unwrap(); + assert_eq!( + package.mimetype().unwrap(), + "application/vnd.oasis.opendocument.presentation" + ); + } + + #[test] + fn test_owned_package_get_file() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data).unwrap(); + + let content = package.get_file("content.xml"); + assert!(content.is_ok()); + assert_eq!(content.unwrap(), b""); + } + + #[test] + fn test_owned_package_get_file_not_found() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data).unwrap(); + + let result = package.get_file("nonexistent.xml"); + assert!(result.is_err()); + } + + #[test] + fn test_owned_package_has_file() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data).unwrap(); + + assert!(package.has_file("content.xml").unwrap()); + assert!(package.has_file("styles.xml").unwrap()); + assert!(!package.has_file("nonexistent.xml").unwrap()); + } + + #[test] + fn test_owned_package_files() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data).unwrap(); + + let files = package.files().unwrap(); + assert!(files.contains(&"mimetype".to_string())); + assert!(files.contains(&"content.xml".to_string())); + assert!(files.contains(&"styles.xml".to_string())); + assert!(files.contains(&"META-INF/manifest.xml".to_string())); + assert!(files.contains(&"Pictures/image.png".to_string())); + } + + #[test] + fn test_owned_package_media_files() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = OwnedPackage::from_bytes(data).unwrap(); + + let media_files = package.media_files().unwrap(); + assert!(media_files.contains(&"Pictures/image.png".to_string())); + } + + #[test] + fn test_package_new() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data); + assert!(package.is_ok()); + } + + #[test] + fn test_package_new_invalid_data() { + let invalid_data = b"not a zip file"; + let result = Package::new(invalid_data); + assert!(result.is_err()); + } + + #[test] + fn test_package_mimetype() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + assert_eq!( + package.mimetype(), + "application/vnd.oasis.opendocument.text" + ); + } + + #[test] + fn test_package_get_file() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + let content = package.get_file("content.xml").unwrap(); + assert_eq!(content, b""); + } + + #[test] + fn test_package_get_file_not_found() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + let result = package.get_file("nonexistent.xml"); + assert!(result.is_err()); + } + + #[test] + fn test_package_has_file() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + assert!(package.has_file("content.xml")); + assert!(!package.has_file("nonexistent.xml")); + } + + #[test] + fn test_package_files() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + let files = package.files().unwrap(); + assert!(!files.is_empty()); + assert!(files.contains(&"content.xml".to_string())); + } + + #[test] + fn test_package_media_files() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + let media_files = package.media_files().unwrap(); + assert!(media_files.contains(&"Pictures/image.png".to_string())); + } + + #[test] + fn test_package_has_media() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + assert!(package.has_media()); + } + + #[test] + fn test_package_manifest() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let package = Package::new(&data).unwrap(); + + let manifest = package.manifest(); + assert_eq!(manifest.mimetype, "application/vnd.oasis.opendocument.text"); + } + + #[test] + fn test_owned_package_package_method() { + let data = create_test_odf_package("application/vnd.oasis.opendocument.text"); + let owned = OwnedPackage::from_bytes(data).unwrap(); + + let package = owned.package(); + assert!(package.is_ok()); + assert_eq!( + package.unwrap().mimetype(), + "application/vnd.oasis.opendocument.text" + ); + } + + #[test] + fn test_package_media_files_various_formats() { + use std::io::Write; + + let mut zip_buffer = Vec::new(); + { + let mut zip = zip::ZipWriter::new(Cursor::new(&mut zip_buffer)); + let options = zip::write::SimpleFileOptions::default() + .compression_method(zip::CompressionMethod::Stored); + + // Write mimetype file + zip.start_file("mimetype", options).unwrap(); + zip.write_all(b"application/vnd.oasis.opendocument.text") + .unwrap(); + + // Write manifest.xml + let manifest_xml = r#" + + +"#; + zip.start_file("META-INF/manifest.xml", options).unwrap(); + zip.write_all(manifest_xml.as_bytes()).unwrap(); + + // Write various media files + zip.start_file("Pictures/photo.jpg", options).unwrap(); + zip.write_all(b"fake jpg data").unwrap(); + + zip.start_file("Pictures/chart.jpeg", options).unwrap(); + zip.write_all(b"fake jpeg data").unwrap(); + + zip.start_file("media/animation.gif", options).unwrap(); + zip.write_all(b"fake gif data").unwrap(); + + zip.start_file("Object/image.svg", options).unwrap(); + zip.write_all(b"").unwrap(); + + zip.start_file("media/diagram.png", options).unwrap(); + zip.write_all(b"fake png data").unwrap(); + + zip.finish().unwrap(); + } + + let package = Package::new(&zip_buffer).unwrap(); + let media_files = package.media_files().unwrap(); + + assert!(media_files.contains(&"Pictures/photo.jpg".to_string())); + assert!(media_files.contains(&"Pictures/chart.jpeg".to_string())); + assert!(media_files.contains(&"media/animation.gif".to_string())); + assert!(media_files.contains(&"Object/image.svg".to_string())); + assert!(media_files.contains(&"media/diagram.png".to_string())); + } +} diff --git a/crates/litchi-odf/src/core/writer.rs b/crates/litchi-odf/src/core/writer.rs new file mode 100644 index 0000000..f502da1 --- /dev/null +++ b/crates/litchi-odf/src/core/writer.rs @@ -0,0 +1,487 @@ +//! ODF package writing functionality. +//! +//! This module provides utilities for creating and writing ODF files as ZIP archives, +//! including generating manifests and proper file structure. +//! +//! Uses soapberry-zip for high-performance ZIP writing. + +use litchi_core::{Error, Result, xml::escape_xml}; +use soapberry_zip::office::StreamingArchiveWriter; +use std::collections::HashSet; + +/// Builder for creating ODF packages (ZIP archives) +/// +/// This struct helps create valid ODF files by managing the ZIP archive structure, +/// manifest, and required files. +/// +/// # Examples +/// +/// ```ignore +/// # use litchi_odf::core::PackageWriter; +/// # use litchi_core::Result; +/// # fn example() -> Result<()> { +/// let mut writer = PackageWriter::new(); +/// writer.set_mimetype("application/vnd.oasis.opendocument.text")?; +/// writer.add_file("content.xml", b"...")?; +/// writer.add_file("styles.xml", b"...")?; +/// writer.add_file("meta.xml", b"...")?; +/// +/// let bytes = writer.finish()?; +/// std::fs::write("document.odt", bytes)?; +/// # Ok(()) +/// # } +/// ``` +pub struct PackageWriter { + zip_writer: StreamingArchiveWriter>>, + mimetype: Option, + manifest_entries: Vec, + wrote_any_entry: bool, + wrote_mimetype: bool, +} + +/// Entry in the ODF manifest +#[derive(Debug, Clone)] +struct ManifestEntry { + full_path: String, + media_type: String, +} + +impl PackageWriter { + /// Create a new package writer that writes to memory + pub fn new() -> Self { + Self { + zip_writer: StreamingArchiveWriter::new(), + mimetype: None, + manifest_entries: Vec::new(), + wrote_any_entry: false, + wrote_mimetype: false, + } + } + + /// Set the MIME type for the document + /// + /// This sets both the mimetype file and the root manifest entry. + /// + /// # Arguments + /// + /// * `mimetype` - MIME type string (e.g., "application/vnd.oasis.opendocument.text") + pub fn set_mimetype(&mut self, mimetype: &str) -> Result<()> { + if self.wrote_mimetype { + return Err(Error::InvalidFormat("MIME type already set".to_string())); + } + if self.wrote_any_entry { + return Err(Error::InvalidFormat( + "Cannot set MIME type after writing other files".to_string(), + )); + } + + self.mimetype = Some(mimetype.to_string()); + + self.zip_writer + .write_stored("mimetype", mimetype.as_bytes()) + .map_err(|e| Error::ZipError(e.to_string()))?; + self.wrote_any_entry = true; + self.wrote_mimetype = true; + + self.manifest_entries.push(ManifestEntry { + full_path: "/".to_string(), + media_type: mimetype.to_string(), + }); + + Ok(()) + } + + /// Add a file to the package + /// + /// # Arguments + /// + /// * `path` - Path within the ZIP archive (e.g., "content.xml", "Pictures/image1.png") + /// * `content` - File content as bytes + /// + /// # Note + /// + /// This method automatically adds the file to the manifest with an appropriate media type. + pub fn add_file(&mut self, path: &str, content: &[u8]) -> Result<()> { + if path == "mimetype" { + return Err(Error::InvalidFormat( + "mimetype is written via set_mimetype()".to_string(), + )); + } + if !self.wrote_mimetype { + return Err(Error::InvalidFormat("MIME type not set".to_string())); + } + + // Determine media type based on file extension + let media_type = Self::guess_media_type(path); + + // Add to manifest + self.manifest_entries.push(ManifestEntry { + full_path: path.to_string(), + media_type: media_type.to_string(), + }); + + self.zip_writer + .write_deflated(path, content) + .map_err(|e| Error::ZipError(e.to_string()))?; + + self.wrote_any_entry = true; + + Ok(()) + } + + /// Add a file to the package with a specific media type + /// + /// # Arguments + /// + /// * `path` - Path within the ZIP archive + /// * `content` - File content as bytes + /// * `media_type` - MIME type for the manifest entry + #[allow(dead_code)] // Reserved for future use + pub fn add_file_with_media_type( + &mut self, + path: &str, + content: &[u8], + media_type: &str, + ) -> Result<()> { + if path == "mimetype" { + return Err(Error::InvalidFormat( + "mimetype is written via set_mimetype()".to_string(), + )); + } + if !self.wrote_mimetype { + return Err(Error::InvalidFormat("MIME type not set".to_string())); + } + + // Add to manifest + self.manifest_entries.push(ManifestEntry { + full_path: path.to_string(), + media_type: media_type.to_string(), + }); + + self.zip_writer + .write_deflated(path, content) + .map_err(|e| Error::ZipError(e.to_string()))?; + + self.wrote_any_entry = true; + + Ok(()) + } + + /// Generate the manifest.xml content + fn generate_manifest(&self) -> String { + let mut manifest = String::from( + r#""#, + ); + + // Add manifest entries + let mut seen_paths: HashSet<&str> = HashSet::with_capacity(self.manifest_entries.len()); + for entry in &self.manifest_entries { + if !seen_paths.insert(entry.full_path.as_str()) { + continue; + } + manifest.push_str(&format!( + r#""#, + escape_xml(&entry.full_path), + escape_xml(&entry.media_type) + )); + } + + manifest.push_str(""); + manifest + } + + /// Guess media type from file path + fn guess_media_type(path: &str) -> &'static str { + if path.ends_with(".xml") { + "text/xml" + } else if path.ends_with(".png") { + "image/png" + } else if path.ends_with(".jpg") || path.ends_with(".jpeg") { + "image/jpeg" + } else if path.ends_with(".gif") { + "image/gif" + } else if path.ends_with(".svg") { + "image/svg+xml" + } else if path.ends_with("/") { + "" // Directory entry + } else { + "application/octet-stream" + } + } + + /// Finish writing the package and return the bytes. + /// + /// This method writes the mimetype file, manifest, and finalizes the ZIP archive. + /// + /// # Errors + /// + /// Returns an error if: + /// - No MIME type has been set + /// - Writing to the ZIP archive fails + pub fn finish(mut self) -> Result> { + if !self.wrote_mimetype { + return Err(Error::InvalidFormat("MIME type not set".to_string())); + } + + // Add META-INF directory to manifest + self.manifest_entries.push(ManifestEntry { + full_path: "META-INF/".to_string(), + media_type: String::new(), + }); + + self.manifest_entries.push(ManifestEntry { + full_path: "META-INF/manifest.xml".to_string(), + media_type: "text/xml".to_string(), + }); + + // Generate and write manifest + let manifest_content = self.generate_manifest(); + self.zip_writer + .write_deflated("META-INF/manifest.xml", manifest_content.as_bytes()) + .map_err(|e| Error::ZipError(e.to_string()))?; + + // Finish ZIP archive and return bytes + self.zip_writer + .finish_to_bytes() + .map_err(|e| Error::ZipError(e.to_string())) + } + + /// Alias for `finish()` for API compatibility. + pub fn finish_to_bytes(self) -> Result> { + self.finish() + } +} + +impl Default for PackageWriter { + fn default() -> Self { + Self::new() + } +} + +/// Helper to create standard ODF directory structure +pub struct OdfStructure; + +impl OdfStructure { + /// Generate a default content.xml skeleton + #[allow(dead_code)] // Reserved for future use + pub fn default_content_xml(office_type: &str) -> String { + format!( + r#"<{office_type}>"# + ) + } + + /// Generate a default styles.xml skeleton + pub fn default_styles_xml() -> String { + r#""#.to_string() + } + + /// Generate a default meta.xml skeleton + #[allow(dead_code)] // Reserved for future use + pub fn default_meta_xml() -> String { + let now = chrono::Utc::now().to_rfc3339(); + format!( + r#"Litchi/0.0.1{}{}"#, + now, now + ) + } + + /// Generate a default settings.xml skeleton + #[allow(dead_code)] // Will be used for future enhancements + pub fn default_settings_xml() -> String { + r#"0011"#.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_package_writer_new() { + let writer = PackageWriter::new(); + assert!(!writer.wrote_mimetype); + assert!(!writer.wrote_any_entry); + assert!(writer.mimetype.is_none()); + } + + #[test] + fn test_package_writer_default() { + let writer: PackageWriter = Default::default(); + assert!(!writer.wrote_mimetype); + } + + #[test] + fn test_package_writer_set_mimetype() { + let mut writer = PackageWriter::new(); + assert!( + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .is_ok() + ); + assert!(writer.wrote_mimetype); + assert_eq!( + writer.mimetype, + Some("application/vnd.oasis.opendocument.text".to_string()) + ); + } + + #[test] + fn test_package_writer_set_mimetype_twice() { + let mut writer = PackageWriter::new(); + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .unwrap(); + assert!( + writer + .set_mimetype("application/vnd.oasis.opendocument.spreadsheet") + .is_err() + ); + } + + #[test] + fn test_package_writer_add_file_without_mimetype() { + let mut writer = PackageWriter::new(); + assert!(writer.add_file("content.xml", b"test").is_err()); + } + + #[test] + fn test_package_writer_add_mimetype_as_file() { + let mut writer = PackageWriter::new(); + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .unwrap(); + assert!(writer.add_file("mimetype", b"test").is_err()); + } + + #[test] + fn test_package_writer_finish_without_mimetype() { + let writer = PackageWriter::new(); + assert!(writer.finish().is_err()); + } + + #[test] + fn test_package_writer_finish_to_bytes() { + let mut writer = PackageWriter::new(); + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .unwrap(); + let result = writer.finish_to_bytes(); + assert!(result.is_ok()); + let bytes = result.unwrap(); + assert!(!bytes.is_empty()); + } + + #[test] + fn test_package_writer_add_file_with_media_type() { + let mut writer = PackageWriter::new(); + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .unwrap(); + assert!( + writer + .add_file_with_media_type("custom.dat", b"data", "application/octet-stream") + .is_ok() + ); + } + + #[test] + fn test_guess_media_type() { + assert_eq!(PackageWriter::guess_media_type("content.xml"), "text/xml"); + assert_eq!(PackageWriter::guess_media_type("image.png"), "image/png"); + assert_eq!(PackageWriter::guess_media_type("image.jpg"), "image/jpeg"); + assert_eq!(PackageWriter::guess_media_type("image.jpeg"), "image/jpeg"); + assert_eq!(PackageWriter::guess_media_type("image.gif"), "image/gif"); + assert_eq!( + PackageWriter::guess_media_type("image.svg"), + "image/svg+xml" + ); + assert_eq!(PackageWriter::guess_media_type("META-INF/"), ""); + assert_eq!( + PackageWriter::guess_media_type("data.bin"), + "application/octet-stream" + ); + } + + #[test] + fn test_generate_manifest() { + let mut writer = PackageWriter::new(); + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .unwrap(); + writer.add_file("content.xml", b"test").unwrap(); + + let manifest = writer.generate_manifest(); + assert!(manifest.contains("manifest:manifest")); + assert!(manifest.contains("content.xml")); + assert!(manifest.contains("text/xml")); + } + + #[test] + fn test_odf_structure_default_styles_xml() { + let styles = OdfStructure::default_styles_xml(); + assert!(styles.contains("office:document-styles")); + assert!(styles.contains("office:styles")); + } + + #[test] + fn test_odf_structure_default_meta_xml() { + let meta = OdfStructure::default_meta_xml(); + assert!(meta.contains("office:document-meta")); + assert!(meta.contains("Litchi")); + assert!(meta.contains("meta:creation-date")); + } + + #[test] + fn test_odf_structure_default_settings_xml() { + let settings = OdfStructure::default_settings_xml(); + assert!(settings.contains("office:document-settings")); + assert!(settings.contains("config:config-item")); + } + + #[test] + fn test_odf_structure_default_content_xml() { + let content = OdfStructure::default_content_xml("office:text"); + assert!(content.contains("office:document-content")); + assert!(content.contains("office:text")); + assert!(content.contains("office:body")); + } + + #[test] + fn test_manifest_entry_debug() { + let entry = ManifestEntry { + full_path: "content.xml".to_string(), + media_type: "text/xml".to_string(), + }; + let debug_str = format!("{:?}", entry); + assert!(debug_str.contains("content.xml")); + assert!(debug_str.contains("text/xml")); + } + + #[test] + fn test_package_writer_full_package() { + let mut writer = PackageWriter::new(); + + // Set mimetype + writer + .set_mimetype("application/vnd.oasis.opendocument.text") + .unwrap(); + + // Add files + writer + .add_file("content.xml", b"") + .unwrap(); + writer + .add_file("styles.xml", b"") + .unwrap(); + writer + .add_file("meta.xml", b"") + .unwrap(); + + // Finish + let bytes = writer.finish().unwrap(); + assert!(!bytes.is_empty()); + + // Verify it's a valid ZIP (starts with PK) + assert_eq!(&bytes[0..2], b"PK"); + } +} diff --git a/src/odf/core/xml.rs b/crates/litchi-odf/src/core/xml.rs similarity index 92% rename from src/odf/core/xml.rs rename to crates/litchi-odf/src/core/xml.rs index 31a88b5..a3c5aa7 100644 --- a/src/odf/core/xml.rs +++ b/crates/litchi-odf/src/core/xml.rs @@ -3,7 +3,7 @@ //! This module provides common XML parsing functionality used across //! different ODF document types. -use crate::common::{Error, Result}; +use litchi_core::{Error, Result}; /// XML content parser for ODF parts #[derive(Debug)] @@ -51,7 +51,7 @@ impl Content { /// Extract paragraphs from the document body #[allow(dead_code)] - pub fn extract_paragraphs(&self) -> Result> { + pub fn extract_paragraphs(&self) -> Result> { use quick_xml::Reader; use quick_xml::events::Event; @@ -81,11 +81,9 @@ impl Content { current_para_text.clear(); } }, - Ok(Event::Text(ref t)) => { - if in_paragraph { - let text_content = String::from_utf8(t.to_vec()).unwrap_or_default(); - current_para_text.push_str(&text_content); - } + Ok(Event::Text(ref t)) if in_paragraph => { + let text_content = String::from_utf8(t.to_vec()).unwrap_or_default(); + current_para_text.push_str(&text_content); }, Ok(Event::End(ref e)) => { // Copy the name bytes to avoid lifetime issues @@ -104,7 +102,7 @@ impl Content { in_paragraph = false; let trimmed_text = current_para_text.trim().to_string(); if !trimmed_text.is_empty() { - let mut para = crate::odf::elements::text::Paragraph::new(); + let mut para = crate::elements::text::Paragraph::new(); para.set_text(&trimmed_text); if is_current_heading { // For headings, we could set a style or attribute here @@ -164,11 +162,11 @@ impl Meta { } /// Extract basic metadata - pub fn extract_metadata(&self) -> crate::common::Metadata { + pub fn extract_metadata(&self) -> litchi_core::Metadata { // Parse ODF metadata from meta.xml content - match crate::odf::core::metadata::OdfMetadata::from_xml(self.xml.content()) { + match crate::core::metadata::OdfMetadata::from_xml(self.xml.content()) { Ok(odf_meta) => odf_meta.into(), - Err(_) => crate::common::Metadata::default(), // Fall back to default on parse error + Err(_) => litchi_core::Metadata::default(), // Fall back to default on parse error } } } diff --git a/src/odf/datatype.rs b/crates/litchi-odf/src/datatype.rs similarity index 90% rename from src/odf/datatype.rs rename to crates/litchi-odf/src/datatype.rs index 06b2d9f..3360d3e 100644 --- a/src/odf/datatype.rs +++ b/crates/litchi-odf/src/datatype.rs @@ -14,8 +14,8 @@ //! //! - odfdo: `3rdparty/odfdo/src/odfdo/datatype.py` -use crate::Result; use chrono::{DateTime, Duration, FixedOffset, NaiveDate, NaiveDateTime, Utc}; +use litchi_core::Result; // ============================================================================ // BOOLEAN CONVERSION @@ -41,7 +41,7 @@ impl Boolean { /// # Examples /// /// ``` - /// use litchi::odf::datatype::Boolean; + /// use litchi_odf::datatype::Boolean; /// /// assert_eq!(Boolean::decode("true").unwrap(), true); /// assert_eq!(Boolean::decode("false").unwrap(), false); @@ -51,7 +51,7 @@ impl Boolean { match data { "true" => Ok(true), "false" => Ok(false), - _ => Err(crate::Error::Other(format!( + _ => Err(litchi_core::Error::Other(format!( "boolean '{}' is invalid, expected 'true' or 'false'", data ))), @@ -71,7 +71,7 @@ impl Boolean { /// # Examples /// /// ``` - /// use litchi::odf::datatype::Boolean; + /// use litchi_odf::datatype::Boolean; /// /// assert_eq!(Boolean::encode(true), "true"); /// assert_eq!(Boolean::encode(false), "false"); @@ -106,15 +106,16 @@ impl Date { /// # Examples /// /// ``` - /// use litchi::odf::datatype::Date; + /// use litchi_odf::datatype::Date; /// use chrono::NaiveDate; /// /// let date = Date::decode("2024-01-31").unwrap(); /// assert_eq!(date, NaiveDate::from_ymd_opt(2024, 1, 31).unwrap()); /// ``` pub fn decode(data: &str) -> Result { - NaiveDate::parse_from_str(data, "%Y-%m-%d") - .map_err(|e| crate::Error::Other(format!("Failed to parse ODF date '{}': {}", data, e))) + NaiveDate::parse_from_str(data, "%Y-%m-%d").map_err(|e| { + litchi_core::Error::Other(format!("Failed to parse ODF date '{}': {}", data, e)) + }) } /// Encode chrono::NaiveDate to ODF date string @@ -130,7 +131,7 @@ impl Date { /// # Examples /// /// ``` - /// use litchi::odf::datatype::Date; + /// use litchi_odf::datatype::Date; /// use chrono::NaiveDate; /// /// let date = NaiveDate::from_ymd_opt(2024, 1, 31).unwrap(); @@ -168,7 +169,7 @@ impl DateTimeOdf { /// # Examples /// /// ``` - /// use litchi::odf::datatype::DateTimeOdf; + /// use litchi_odf::datatype::DateTimeOdf; /// /// let dt = DateTimeOdf::decode("2024-01-31T15:30:00").unwrap(); /// let dt_with_tz = DateTimeOdf::decode("2024-01-31T15:30:00+01:00").unwrap(); @@ -197,7 +198,7 @@ impl DateTimeOdf { return Ok(DateTime::::from_naive_utc_and_offset(naive_dt, Utc).fixed_offset()); } - Err(crate::Error::Other(format!( + Err(litchi_core::Error::Other(format!( "Failed to parse ODF datetime '{}'", data ))) @@ -216,7 +217,7 @@ impl DateTimeOdf { /// # Examples /// /// ``` - /// use litchi::odf::datatype::DateTimeOdf; + /// use litchi_odf::datatype::DateTimeOdf; /// use chrono::{DateTime, Utc, TimeZone}; /// /// let dt = Utc.with_ymd_and_hms(2024, 1, 31, 15, 30, 0).unwrap(); @@ -260,7 +261,7 @@ impl DurationOdf { /// # Examples /// /// ``` - /// use litchi::odf::datatype::DurationOdf; + /// use litchi_odf::datatype::DurationOdf; /// use chrono::Duration; /// /// let dur = DurationOdf::decode("PT1H30M").unwrap(); @@ -277,7 +278,7 @@ impl DurationOdf { }; if !data.starts_with('P') { - return Err(crate::Error::Other(format!( + return Err(litchi_core::Error::Other(format!( "Invalid duration format '{}', must start with 'P'", data ))); @@ -296,9 +297,9 @@ impl DurationOdf { match c { '0'..='9' => buffer.push(c), 'D' => { - days = buffer - .parse() - .map_err(|_| crate::Error::Other("Invalid days in duration".to_string()))?; + days = buffer.parse().map_err(|_| { + litchi_core::Error::Other("Invalid days in duration".to_string()) + })?; buffer.clear(); }, 'T' => { @@ -306,23 +307,23 @@ impl DurationOdf { }, 'H' => { if !in_time { - return Err(crate::Error::Other( + return Err(litchi_core::Error::Other( "Hours must come after 'T' in duration".to_string(), )); } hours = buffer.parse().map_err(|_| { - crate::Error::Other("Invalid hours in duration".to_string()) + litchi_core::Error::Other("Invalid hours in duration".to_string()) })?; buffer.clear(); }, 'M' => { if in_time { minutes = buffer.parse().map_err(|_| { - crate::Error::Other("Invalid minutes in duration".to_string()) + litchi_core::Error::Other("Invalid minutes in duration".to_string()) })?; } else { // Months not supported in chrono::Duration - return Err(crate::Error::Other( + return Err(litchi_core::Error::Other( "Months in duration not supported".to_string(), )); } @@ -330,17 +331,17 @@ impl DurationOdf { }, 'S' => { if !in_time { - return Err(crate::Error::Other( + return Err(litchi_core::Error::Other( "Seconds must come after 'T' in duration".to_string(), )); } seconds = buffer.parse().map_err(|_| { - crate::Error::Other("Invalid seconds in duration".to_string()) + litchi_core::Error::Other("Invalid seconds in duration".to_string()) })?; buffer.clear(); }, _ => { - return Err(crate::Error::Other(format!( + return Err(litchi_core::Error::Other(format!( "Invalid character '{}' in duration", c ))); @@ -365,7 +366,7 @@ impl DurationOdf { /// # Examples /// /// ``` - /// use litchi::odf::datatype::DurationOdf; + /// use litchi_odf::datatype::DurationOdf; /// use chrono::Duration; /// /// let dur = Duration::minutes(90); @@ -397,8 +398,8 @@ mod tests { #[test] fn test_boolean_decode() { - assert_eq!(Boolean::decode("true").unwrap(), true); - assert_eq!(Boolean::decode("false").unwrap(), false); + assert!(Boolean::decode("true").unwrap()); + assert!(!Boolean::decode("false").unwrap()); assert!(Boolean::decode("invalid").is_err()); assert!(Boolean::decode("TRUE").is_err()); assert!(Boolean::decode("1").is_err()); diff --git a/src/odf/elements/attr_parser.rs b/crates/litchi-odf/src/elements/attr_parser.rs similarity index 98% rename from src/odf/elements/attr_parser.rs rename to crates/litchi-odf/src/elements/attr_parser.rs index 804e331..2cde4d1 100644 --- a/src/odf/elements/attr_parser.rs +++ b/crates/litchi-odf/src/elements/attr_parser.rs @@ -21,9 +21,9 @@ //! - ODF 1.2 Specification: Attribute types and valid values //! - odfpy: `3rdparty/odfpy/odf/attrconverters.py` //! - odfdo: `3rdparty/odfdo/src/odfdo/element.py` -use crate::common::{Error, Result}; use atoi_simd::parse; use fast_float2::parse_partial; +use litchi_core::{Error, Result}; use phf::{Map, Set, phf_map, phf_set}; use std::borrow::Cow; @@ -215,7 +215,7 @@ static TEXT_ALIGNS: Set<&'static str> = phf_set! { /// # Examples /// /// ``` -/// # use litchi::odf::elements::attr_parser::parse_bool; +/// # use litchi_odf::elements::attr_parser::parse_bool; /// assert_eq!(parse_bool(b"true").unwrap(), true); /// assert_eq!(parse_bool(b"false").unwrap(), false); /// ``` @@ -244,7 +244,7 @@ pub fn parse_bool(value: &[u8]) -> Result { /// # Examples /// /// ``` -/// # use litchi::odf::elements::attr_parser::parse_int; +/// # use litchi_odf::elements::attr_parser::parse_int; /// assert_eq!(parse_int(b"42").unwrap(), 42); /// assert_eq!(parse_int(b"-123").unwrap(), -123); /// ``` @@ -292,7 +292,7 @@ pub fn parse_uint(value: &[u8]) -> Result { /// # Examples /// /// ``` -/// # use litchi::odf::elements::attr_parser::parse_float; +/// # use litchi_odf::elements::attr_parser::parse_float; /// assert!((parse_float(b"3.14").unwrap() - 3.14).abs() < 0.0001); /// assert!((parse_float(b"-2.5e10").unwrap() + 2.5e10).abs() < 1.0); /// ``` @@ -322,7 +322,7 @@ pub fn parse_float(value: &[u8]) -> Result { /// # Examples /// /// ``` -/// # use litchi::odf::elements::attr_parser::parse_length; +/// # use litchi_odf::elements::attr_parser::parse_length; /// let (val, unit) = parse_length(b"2.5cm").unwrap(); /// assert!((val - 2.5).abs() < 0.0001); /// assert_eq!(unit, "cm"); @@ -387,7 +387,7 @@ pub fn parse_percentage(value: &[u8]) -> Result { /// # Examples /// /// ``` -/// # use litchi::odf::elements::attr_parser::parse_color; +/// # use litchi_odf::elements::attr_parser::parse_color; /// let (r, g, b) = parse_color(b"#FF0000").unwrap(); /// assert_eq!((r, g, b), (255, 0, 0)); /// ``` @@ -553,8 +553,8 @@ mod tests { #[test] fn test_parse_bool() { - assert_eq!(parse_bool(b"true").unwrap(), true); - assert_eq!(parse_bool(b"false").unwrap(), false); + assert!(parse_bool(b"true").unwrap()); + assert!(!parse_bool(b"false").unwrap()); assert!(parse_bool(b"invalid").is_err()); } diff --git a/crates/litchi-odf/src/elements/bookmark.rs b/crates/litchi-odf/src/elements/bookmark.rs new file mode 100644 index 0000000..0926fa4 --- /dev/null +++ b/crates/litchi-odf/src/elements/bookmark.rs @@ -0,0 +1,234 @@ +//! Bookmark elements for ODF documents. +//! +//! Bookmarks mark specific locations in a document that can be referenced +//! by cross-references and hyperlinks. + +use super::element::{Element, ElementBase}; +use litchi_core::{Error, Result}; + +/// Represents a bookmark in the document +#[derive(Debug, Clone)] +pub struct Bookmark { + element: Element, +} + +impl Bookmark { + /// Create a new bookmark + pub fn new(name: &str) -> Self { + let mut element = Element::new("text:bookmark"); + element.set_attribute("text:name", name); + Self { element } + } + + /// Create from element + pub fn from_element(element: Element) -> Result { + if element.tag_name() != "text:bookmark" { + return Err(Error::InvalidFormat( + "Element is not a bookmark".to_string(), + )); + } + Ok(Self { element }) + } + + /// Get the bookmark name + pub fn name(&self) -> Option<&str> { + self.element.get_attribute("text:name") + } + + /// Set the bookmark name + pub fn set_name(&mut self, name: &str) { + self.element.set_attribute("text:name", name); + } +} + +/// Represents a bookmark start marker +#[derive(Debug, Clone)] +#[allow(dead_code)] // Library API for document creation +pub struct BookmarkStart { + element: Element, +} + +#[allow(dead_code)] // Library API for document creation +impl BookmarkStart { + /// Create a new bookmark start marker + pub fn new(name: &str) -> Self { + let mut element = Element::new("text:bookmark-start"); + element.set_attribute("text:name", name); + Self { element } + } + + /// Create from element + pub fn from_element(element: Element) -> Result { + if element.tag_name() != "text:bookmark-start" { + return Err(Error::InvalidFormat( + "Element is not a bookmark start".to_string(), + )); + } + Ok(Self { element }) + } + + /// Get the bookmark name + pub fn name(&self) -> Option<&str> { + self.element.get_attribute("text:name") + } + + /// Set the bookmark name + pub fn set_name(&mut self, name: &str) { + self.element.set_attribute("text:name", name); + } +} + +/// Represents a bookmark end marker +#[derive(Debug, Clone)] +#[allow(dead_code)] // Library API for document creation +pub struct BookmarkEnd { + element: Element, +} + +#[allow(dead_code)] // Library API for document creation +impl BookmarkEnd { + /// Create a new bookmark end marker + pub fn new(name: &str) -> Self { + let mut element = Element::new("text:bookmark-end"); + element.set_attribute("text:name", name); + Self { element } + } + + /// Create from element + pub fn from_element(element: Element) -> Result { + if element.tag_name() != "text:bookmark-end" { + return Err(Error::InvalidFormat( + "Element is not a bookmark end".to_string(), + )); + } + Ok(Self { element }) + } + + /// Get the bookmark name + pub fn name(&self) -> Option<&str> { + self.element.get_attribute("text:name") + } + + /// Set the bookmark name + pub fn set_name(&mut self, name: &str) { + self.element.set_attribute("text:name", name); + } +} + +/// Represents a bookmark range (start and end) +#[derive(Debug, Clone)] +pub struct BookmarkRange { + /// Bookmark name + pub name: String, + /// Start position (paragraph index, character offset) + pub start: Option<(usize, usize)>, + /// End position (paragraph index, character offset) + pub end: Option<(usize, usize)>, +} + +impl BookmarkRange { + /// Create a new bookmark range + pub fn new(name: String) -> Self { + Self { + name, + start: None, + end: None, + } + } + + /// Check if the bookmark range is complete (has both start and end) + pub fn is_complete(&self) -> bool { + self.start.is_some() && self.end.is_some() + } +} + +/// Utilities for parsing bookmarks from documents +pub struct BookmarkParser; + +impl BookmarkParser { + /// Parse all bookmarks from XML content + pub fn parse_bookmarks(xml_content: &str) -> Result> { + let mut reader = quick_xml::Reader::from_str(xml_content); + let mut buf = Vec::new(); + let mut bookmarks = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(quick_xml::events::Event::Empty(ref e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + + if tag_name == "text:bookmark" { + let mut element = Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + if let Ok(bookmark) = Bookmark::from_element(element) { + bookmarks.push(bookmark); + } + } + }, + Ok(quick_xml::events::Event::Eof) => break, + Err(_) => break, + _ => {}, + } + buf.clear(); + } + + Ok(bookmarks) + } + + /// Parse bookmark ranges (start/end pairs) from XML content + pub fn parse_bookmark_ranges(xml_content: &str) -> Result> { + let mut reader = quick_xml::Reader::from_str(xml_content); + let mut buf = Vec::new(); + let mut ranges: Vec = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(quick_xml::events::Event::Empty(ref e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + + match tag_name.as_str() { + "text:bookmark-start" => { + // Extract name + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + if key == "text:name" { + let name = String::from_utf8_lossy(&attr.value).to_string(); + ranges.push(BookmarkRange::new(name)); + } + } + }, + "text:bookmark-end" => { + // Find matching start + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + if key == "text:name" { + let name = String::from_utf8_lossy(&attr.value); + // Mark the range as complete + if let Some(range) = ranges.iter_mut().find(|r| r.name == name) + { + // In a full implementation, we would track positions + range.end = Some((0, 0)); + } + } + } + }, + _ => {}, + } + }, + Ok(quick_xml::events::Event::Eof) => break, + Err(_) => break, + _ => {}, + } + buf.clear(); + } + + Ok(ranges) + } +} diff --git a/src/odf/elements/draw.rs b/crates/litchi-odf/src/elements/draw.rs similarity index 99% rename from src/odf/elements/draw.rs rename to crates/litchi-odf/src/elements/draw.rs index 4d5f0bb..d449d5e 100644 --- a/src/odf/elements/draw.rs +++ b/crates/litchi-odf/src/elements/draw.rs @@ -4,7 +4,7 @@ //! images, and other graphical content. use super::element::{Element, ElementBase}; -use crate::common::Result; +use litchi_core::Result; /// A drawing page (slide) element #[derive(Debug, Clone)] diff --git a/src/odf/elements/element.rs b/crates/litchi-odf/src/elements/element.rs similarity index 99% rename from src/odf/elements/element.rs rename to crates/litchi-odf/src/elements/element.rs index 76f6ffd..5ecacab 100644 --- a/src/odf/elements/element.rs +++ b/crates/litchi-odf/src/elements/element.rs @@ -3,8 +3,8 @@ //! This module provides the fundamental Element class that all ODF elements //! inherit from, providing common functionality for XML manipulation. -use crate::common::{Error, Result}; -use crate::odf::elements::namespace::{NamespaceContext, QualifiedName}; +use crate::elements::namespace::{NamespaceContext, QualifiedName}; +use litchi_core::{Error, Result}; use quick_xml::events::Event; use std::collections::HashMap; diff --git a/crates/litchi-odf/src/elements/field.rs b/crates/litchi-odf/src/elements/field.rs new file mode 100644 index 0000000..542cf8a --- /dev/null +++ b/crates/litchi-odf/src/elements/field.rs @@ -0,0 +1,261 @@ +//! Field elements for ODF documents. +//! +//! Fields are dynamic content in ODF documents that can be updated automatically, +//! such as page numbers, dates, cross-references, etc. + +use super::element::{Element, ElementBase}; +use litchi_core::{Error, Result}; + +/// Represents a text field in the document +#[derive(Debug, Clone)] +pub struct Field { + element: Element, +} + +impl Field { + /// Create a new field from an element + pub fn from_element(element: Element) -> Result { + let tag = element.tag_name(); + if !Self::is_field_tag(tag) { + return Err(Error::InvalidFormat(format!( + "Element {} is not a field", + tag + ))); + } + Ok(Self { element }) + } + + /// Check if a tag name represents a field + pub fn is_field_tag(tag: &str) -> bool { + matches!( + tag, + "text:page-number" + | "text:page-count" + | "text:date" + | "text:time" + | "text:file-name" + | "text:author-name" + | "text:author-initials" + | "text:title" + | "text:subject" + | "text:keywords" + | "text:description" + | "text:user-defined" + | "text:reference-ref" + | "text:sequence-ref" + | "text:bookmark-ref" + | "text:variable-set" + | "text:variable-get" + | "text:user-field-get" + | "text:expression" + ) + } + + /// Get the field type + pub fn field_type(&self) -> &str { + self.element.tag_name() + } + + /// Get the field value (text content) + pub fn value(&self) -> String { + self.element.get_text_recursive() + } + + /// Get the field display format + pub fn format(&self) -> Option<&str> { + self.element + .get_attribute("style:data-style-name") + .or_else(|| self.element.get_attribute("number:style")) + } + + /// Get the field name (for named fields like variables or user fields) + pub fn name(&self) -> Option<&str> { + self.element + .get_attribute("text:name") + .or_else(|| self.element.get_attribute("text:variable-name")) + } + + /// Get reference target (for reference fields) + pub fn reference_target(&self) -> Option<&str> { + self.element + .get_attribute("text:ref-name") + .or_else(|| self.element.get_attribute("text:reference-name")) + } +} + +/// Represents a page number field +#[derive(Debug, Clone)] +#[allow(dead_code)] // Library API for document creation +pub struct PageNumberField { + element: Element, +} + +#[allow(dead_code)] // Library API for document creation +impl PageNumberField { + /// Create a new page number field + pub fn new() -> Self { + Self { + element: Element::new("text:page-number"), + } + } + + /// Create from element + pub fn from_element(element: Element) -> Result { + if element.tag_name() != "text:page-number" { + return Err(Error::InvalidFormat( + "Element is not a page number field".to_string(), + )); + } + Ok(Self { element }) + } + + /// Get the current page number value + pub fn value(&self) -> String { + self.element.get_text_recursive() + } +} + +impl Default for PageNumberField { + fn default() -> Self { + Self::new() + } +} + +/// Represents a date field +#[derive(Debug, Clone)] +#[allow(dead_code)] // Library API for document creation +pub struct DateField { + element: Element, +} + +#[allow(dead_code)] // Library API for document creation +impl DateField { + /// Create a new date field + pub fn new() -> Self { + Self { + element: Element::new("text:date"), + } + } + + /// Create from element + pub fn from_element(element: Element) -> Result { + if element.tag_name() != "text:date" { + return Err(Error::InvalidFormat( + "Element is not a date field".to_string(), + )); + } + Ok(Self { element }) + } + + /// Get the date value + pub fn value(&self) -> String { + self.element.get_text_recursive() + } + + /// Get the fixed date (if any) + pub fn fixed_date(&self) -> Option<&str> { + self.element.get_attribute("text:date-value") + } + + /// Get whether this date is fixed + pub fn is_fixed(&self) -> bool { + self.element + .get_bool_attribute("text:fixed") + .unwrap_or(false) + } +} + +impl Default for DateField { + fn default() -> Self { + Self::new() + } +} + +/// Represents a reference field +#[derive(Debug, Clone)] +#[allow(dead_code)] // Library API for document creation +pub struct ReferenceField { + element: Element, +} + +#[allow(dead_code)] // Library API for document creation +impl ReferenceField { + /// Create a new reference field + pub fn new(ref_name: &str) -> Self { + let mut element = Element::new("text:reference-ref"); + element.set_attribute("text:ref-name", ref_name); + Self { element } + } + + /// Create from element + pub fn from_element(element: Element) -> Result { + let tag = element.tag_name(); + if !matches!( + tag, + "text:reference-ref" | "text:bookmark-ref" | "text:sequence-ref" + ) { + return Err(Error::InvalidFormat(format!( + "Element {} is not a reference field", + tag + ))); + } + Ok(Self { element }) + } + + /// Get the reference name + pub fn ref_name(&self) -> Option<&str> { + self.element.get_attribute("text:ref-name") + } + + /// Get the reference format + pub fn ref_format(&self) -> Option<&str> { + self.element.get_attribute("text:reference-format") + } + + /// Get the reference value + pub fn value(&self) -> String { + self.element.get_text_recursive() + } +} + +/// Utilities for parsing fields from documents +pub struct FieldParser; + +impl FieldParser { + /// Parse all fields from XML content + pub fn parse_fields(xml_content: &str) -> Result> { + let mut reader = quick_xml::Reader::from_str(xml_content); + let mut buf = Vec::new(); + let mut fields = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(quick_xml::events::Event::Start(ref e)) + | Ok(quick_xml::events::Event::Empty(ref e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + + if Field::is_field_tag(&tag_name) { + let mut element = Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + if let Ok(field) = Field::from_element(element) { + fields.push(field); + } + } + }, + Ok(quick_xml::events::Event::Eof) => break, + Err(_) => break, + _ => {}, + } + buf.clear(); + } + + Ok(fields) + } +} diff --git a/src/odf/elements/meta.rs b/crates/litchi-odf/src/elements/meta.rs similarity index 100% rename from src/odf/elements/meta.rs rename to crates/litchi-odf/src/elements/meta.rs diff --git a/src/odf/elements/mod.rs b/crates/litchi-odf/src/elements/mod.rs similarity index 100% rename from src/odf/elements/mod.rs rename to crates/litchi-odf/src/elements/mod.rs diff --git a/src/odf/elements/namespace.rs b/crates/litchi-odf/src/elements/namespace.rs similarity index 100% rename from src/odf/elements/namespace.rs rename to crates/litchi-odf/src/elements/namespace.rs diff --git a/src/odf/elements/office.rs b/crates/litchi-odf/src/elements/office.rs similarity index 100% rename from src/odf/elements/office.rs rename to crates/litchi-odf/src/elements/office.rs diff --git a/crates/litchi-odf/src/elements/parser.rs b/crates/litchi-odf/src/elements/parser.rs new file mode 100644 index 0000000..c686813 --- /dev/null +++ b/crates/litchi-odf/src/elements/parser.rs @@ -0,0 +1,298 @@ +//! Generic ODF document parser. +//! +//! This module provides a generic parser for ODF document elements that works across +//! all ODF formats (ODT, ODS, ODP). It parses elements (paragraphs, tables, lists, etc.) +//! in the order they appear in the document, preserving the document structure. +//! +//! For format-specific parsing (e.g., ODT track changes, ODP animations), see the +//! format-specific parsers in `odt/parser.rs`, `ods/parser.rs`, etc. + +use crate::elements::element::ElementBase; +use crate::elements::table::Table; +use crate::elements::text::{Heading, List, Paragraph}; +use litchi_core::Result; +use quick_xml::Reader; +use quick_xml::events::Event; + +/// Represents a document element in its original position +#[derive(Debug, Clone)] +pub enum DocumentOrderElement { + /// A paragraph or heading element + Paragraph(Paragraph), + /// A heading element (for separate access) + Heading(Heading), + /// A table element + Table(Table), + /// A list element (currently parsed but not exposed in unified API) + #[allow(dead_code)] // Parsed but not yet exposed in all APIs + List(List), +} + +/// Generic ODF document parser for parsing elements across all ODF formats. +/// +/// This parser provides functionality that is common to all ODF document types +/// (text documents, spreadsheets, presentations). It handles the core document +/// structure elements like paragraphs, tables, headings, and lists. +/// +/// For format-specific features, use the specialized parsers: +/// - `OdtParser` for ODT-specific features (track changes, comments, sections) +/// - `OdsParser` for ODS-specific features (cell formulas, named ranges) +/// - `OdpParser` for ODP-specific features (slide transitions, animations) +pub struct DocumentParser; + +impl DocumentParser { + /// Parse all document elements from XML content in document order. + /// + /// This function reads through the XML content once and extracts all major + /// document elements (paragraphs, headings, tables, lists) in the order they appear. + /// + /// # Arguments + /// + /// * `xml_content` - The XML content to parse (typically from content.xml) + /// + /// # Returns + /// + /// A vector of `DocumentOrderElement` in the order they appear in the document. + /// + /// # Example + /// + /// ```no_run + /// use litchi_odf::elements::parser::DocumentParser; + /// + /// let xml = r#" + /// First paragraph + /// Cell + /// Second paragraph + /// "#; + /// + /// let elements = DocumentParser::parse_elements_in_order(xml).unwrap(); + /// assert_eq!(elements.len(), 3); + /// ``` + pub fn parse_elements_in_order(xml_content: &str) -> Result> { + let mut reader = Reader::from_str(xml_content); + let mut buf = Vec::new(); + let mut elements = Vec::new(); + + // Stack to track nested elements + let mut element_stack: Vec<(String, super::element::Element)> = Vec::new(); + // Depth tracking to avoid parsing nested elements when inside a parent element + let mut table_depth = 0; + let mut list_depth = 0; + + loop { + match reader.read_event_into(&mut buf) { + Ok(Event::Start(ref e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + + match tag_name.as_str() { + "text:p" if table_depth == 0 && list_depth == 0 => { + // Start a paragraph outside of tables and lists + let mut element = super::element::Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + element_stack.push((tag_name, element)); + }, + "text:h" if table_depth == 0 && list_depth == 0 => { + // Start a heading outside of tables and lists + let mut element = super::element::Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + element_stack.push((tag_name, element)); + }, + "table:table" if table_depth == 0 => { + // Start a table + table_depth += 1; + let mut element = super::element::Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + element_stack.push((tag_name, element)); + }, + "table:table" => { + // Nested table + table_depth += 1; + }, + "text:list" if list_depth == 0 && table_depth == 0 => { + // Start a list outside of tables + list_depth += 1; + let mut element = super::element::Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + element_stack.push((tag_name, element)); + }, + "text:list" => { + // Nested list + list_depth += 1; + }, + // Handle nested elements within tracked elements + _ if !element_stack.is_empty() && table_depth <= 1 && list_depth <= 1 => { + let mut element = super::element::Element::new(&tag_name); + + // Parse attributes + for attr in e.attributes().flatten() { + let key = String::from_utf8_lossy(attr.key.as_ref()); + let value = String::from_utf8_lossy(&attr.value); + element.set_attribute(&key, &value); + } + + element_stack.push((tag_name, element)); + }, + _ => {}, + } + }, + Ok(Event::Text(ref t)) => { + // Add text content to the current element + if let Some((_, element)) = element_stack.last_mut() { + let text = String::from_utf8_lossy(t).to_string(); + let current_text = element.text().to_string(); + element.set_text(&format!("{}{}", current_text, text)); + } + }, + Ok(Event::End(ref e)) => { + let tag_name = String::from_utf8_lossy(e.name().as_ref()).to_string(); + + match tag_name.as_str() { + "text:p" if table_depth == 0 && list_depth == 0 => { + // Complete a top-level paragraph + if let Some((tag, element)) = element_stack.pop() + && tag == "text:p" + && let Ok(para) = Paragraph::from_element(element) + { + elements.push(DocumentOrderElement::Paragraph(para)); + } + }, + "text:h" if table_depth == 0 && list_depth == 0 => { + // Complete a top-level heading + if let Some((tag, element)) = element_stack.pop() + && tag == "text:h" + && let Ok(heading) = Heading::from_element(element) + { + elements.push(DocumentOrderElement::Heading(heading)); + } + }, + "table:table" if table_depth == 1 => { + // Complete a top-level table + table_depth -= 1; + if let Some((tag, element)) = element_stack.pop() + && tag == "table:table" + && let Ok(table) = Table::from_element(element) + { + elements.push(DocumentOrderElement::Table(table)); + } + }, + "table:table" => { + table_depth -= 1; + }, + "text:list" if list_depth == 1 && table_depth == 0 => { + // Complete a top-level list + list_depth -= 1; + if let Some((tag, element)) = element_stack.pop() + && tag == "text:list" + && let Ok(list) = List::from_element(element) + { + elements.push(DocumentOrderElement::List(list)); + } + }, + "text:list" => { + list_depth -= 1; + }, + _ if !element_stack.is_empty() => { + // Pop nested element and add to parent + if element_stack.len() > 1 { + let (_, child_element) = element_stack.pop().unwrap(); + if let Some((_, parent_element)) = element_stack.last_mut() { + parent_element.add_child(child_element); + } + } else { + // Single element on stack, check if it should be completed + if let Some((tag, _)) = element_stack.last() + && tag == &tag_name + { + element_stack.pop(); + } + } + }, + _ => { + // Ignore end tags when stack is empty or doesn't match + }, + } + }, + Ok(Event::Eof) => break, + Err(_) => break, + _ => {}, + } + buf.clear(); + } + + Ok(elements) + } + + /// Parse only paragraphs and headings in order. + /// + /// This is a convenience method that filters out only text elements. + #[allow(dead_code)] // Library API for specialized parsing + pub fn parse_text_elements_in_order(xml_content: &str) -> Result> { + let elements = Self::parse_elements_in_order(xml_content)?; + let mut paragraphs = Vec::new(); + + for element in elements { + match element { + DocumentOrderElement::Paragraph(para) => paragraphs.push(para), + DocumentOrderElement::Heading(heading) => { + // Convert heading to paragraph for unified handling + if let Ok(text) = heading.text() { + let mut para = Paragraph::new(); + para.set_text(&text); + if let Some(style) = heading.style_name() { + para.set_style_name(style); + } + paragraphs.push(para); + } + }, + _ => {}, + } + } + + Ok(paragraphs) + } + + /// Parse only tables in order. + /// + /// This is a convenience method that filters out only table elements. + #[allow(dead_code)] // Library API for specialized parsing + pub fn parse_tables_in_order(xml_content: &str) -> Result> { + let elements = Self::parse_elements_in_order(xml_content)?; + let mut tables = Vec::new(); + + for element in elements { + if let DocumentOrderElement::Table(table) = element { + tables.push(table); + } + } + + Ok(tables) + } +} diff --git a/crates/litchi-odf/src/elements/style.rs b/crates/litchi-odf/src/elements/style.rs new file mode 100644 index 0000000..8279cd8 --- /dev/null +++ b/crates/litchi-odf/src/elements/style.rs @@ -0,0 +1,809 @@ +//! Style elements for ODF documents. +//! +//! This module provides comprehensive support for ODF style definitions, +//! including parsing, inheritance, and property resolution. + +use super::element::{Element, ElementBase}; +use litchi_core::Result; +use std::borrow::Cow; +use std::collections::HashMap; + +/// Style family types +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StyleFamily { + /// Paragraph styles + Paragraph, + /// Text/character styles + Text, + /// Table styles + Table, + /// Table column styles + TableColumn, + /// Table row styles + TableRow, + /// Table cell styles + TableCell, + /// Page layout styles + PageLayout, + /// Master page styles + MasterPage, + /// Graphic styles + Graphic, +} + +impl std::str::FromStr for StyleFamily { + type Err = String; + fn from_str(s: &str) -> std::result::Result { + match s { + "paragraph" => Ok(Self::Paragraph), + "text" => Ok(Self::Text), + "table" => Ok(Self::Table), + "table-column" => Ok(Self::TableColumn), + "table-row" => Ok(Self::TableRow), + "table-cell" => Ok(Self::TableCell), + "page-layout" => Ok(Self::PageLayout), + "master-page" => Ok(Self::MasterPage), + "graphic" => Ok(Self::Graphic), + _ => Err(format!("Invalid style family: {}", s)), + } + } +} + +impl StyleFamily { + /// Convert to string + pub fn as_str(&self) -> &'static str { + match self { + Self::Paragraph => "paragraph", + Self::Text => "text", + Self::Table => "table", + Self::TableColumn => "table-column", + Self::TableRow => "table-row", + Self::TableCell => "table-cell", + Self::PageLayout => "page-layout", + Self::MasterPage => "master-page", + Self::Graphic => "graphic", + } + } +} + +/// Style properties container +#[derive(Debug, Clone, Default)] +pub struct StyleProperties<'a> { + /// Text properties + pub text: TextProperties<'a>, + /// Paragraph properties + pub paragraph: ParagraphProperties<'a>, + /// Table properties + pub table: TableProperties<'a>, + /// Graphic properties + pub graphic: GraphicProperties<'a>, +} + +/// Text/character style properties +#[derive(Debug, Clone, Default)] +pub struct TextProperties<'a> { + pub font_name: Option>, + pub font_size: Option>, + pub font_weight: Option>, + pub font_style: Option>, + pub color: Option>, + pub background_color: Option>, + pub underline: Option>, + pub strikethrough: Option>, + pub text_shadow: Option>, +} + +/// Paragraph style properties +#[derive(Debug, Clone, Default)] +pub struct ParagraphProperties<'a> { + pub margin_left: Option>, + pub margin_right: Option>, + pub margin_top: Option>, + pub margin_bottom: Option>, + pub text_align: Option>, + pub line_height: Option>, + pub background_color: Option>, + pub border: Option>, +} + +/// Table style properties +#[derive(Debug, Clone, Default)] +pub struct TableProperties<'a> { + pub width: Option>, + pub background_color: Option>, + pub border: Option>, + pub align: Option>, +} + +/// Graphic style properties +#[derive(Debug, Clone, Default)] +pub struct GraphicProperties<'a> { + pub background_color: Option>, + pub border: Option>, + pub shadow: Option>, +} + +/// A style definition element +#[derive(Debug, Clone)] +pub struct Style { + element: Element, + properties: StyleProperties<'static>, +} + +impl Default for Style { + fn default() -> Self { + Self::new() + } +} + +impl Style { + /// Create a new style + pub fn new() -> Self { + Self { + element: Element::new("style:style"), + properties: StyleProperties::default(), + } + } + + /// Create a new style with name and family + /// + /// # Arguments + /// + /// * `name` - Name of the style + /// * `family` - Style family (e.g., "text", "paragraph", "table") + pub fn with_name_and_family(name: &str, family: &str) -> Self { + let mut element = Element::new("style:style"); + element.set_attribute("style:name", name); + element.set_attribute("style:family", family); + Self { + element, + properties: StyleProperties::default(), + } + } + + /// Set a text property + /// + /// # Arguments + /// + /// * `property` - Property name (e.g., "fo:font-size", "fo:font-weight") + /// * `value` - Property value + pub fn set_text_property(&mut self, property: &str, value: &str) { + // Create or update text-properties element + let mut found = false; + for child in &mut self.element.children { + if child.tag_name() == "style:text-properties" { + child.set_attribute(property, value); + found = true; + break; + } + } + + if !found { + let mut text_props = Element::new("style:text-properties"); + text_props.set_attribute(property, value); + self.element.children.push(text_props); + } + } + + /// Set a paragraph property + /// + /// # Arguments + /// + /// * `property` - Property name (e.g., "fo:text-align", "fo:margin-top") + /// * `value` - Property value + pub fn set_paragraph_property(&mut self, property: &str, value: &str) { + // Create or update paragraph-properties element + let mut found = false; + for child in &mut self.element.children { + if child.tag_name() == "style:paragraph-properties" { + child.set_attribute(property, value); + found = true; + break; + } + } + + if !found { + let mut para_props = Element::new("style:paragraph-properties"); + para_props.set_attribute(property, value); + self.element.children.push(para_props); + } + } + + /// Set a table property + /// + /// # Arguments + /// + /// * `property` - Property name + /// * `value` - Property value + pub fn set_table_property(&mut self, property: &str, value: &str) { + let mut found = false; + for child in &mut self.element.children { + if child.tag_name() == "style:table-properties" { + child.set_attribute(property, value); + found = true; + break; + } + } + + if !found { + let mut table_props = Element::new("style:table-properties"); + table_props.set_attribute(property, value); + self.element.children.push(table_props); + } + } + + /// Create style from element and parse properties + pub fn from_element(element: Element) -> Result { + let mut style = Self { + element, + properties: StyleProperties::default(), + }; + style.parse_properties()?; + Ok(style) + } + + /// Parse style properties from the element + fn parse_properties(&mut self) -> Result<()> { + // Parse text properties + if let Some(text_prop_elem) = self.find_property_element("style:text-properties") { + self.properties.text = Self::parse_text_properties(text_prop_elem); + } + + // Parse paragraph properties + if let Some(para_prop_elem) = self.find_property_element("style:paragraph-properties") { + self.properties.paragraph = Self::parse_paragraph_properties(para_prop_elem); + } + + // Parse table properties + if let Some(table_prop_elem) = self.find_property_element("style:table-properties") { + self.properties.table = Self::parse_table_properties(table_prop_elem); + } + + // Parse graphic properties + if let Some(graphic_prop_elem) = self.find_property_element("style:graphic-properties") { + self.properties.graphic = Self::parse_graphic_properties(graphic_prop_elem); + } + + Ok(()) + } + + /// Find a property element by tag name, returning a reference + fn find_property_element(&self, tag_name: &str) -> Option<&Element> { + self.element + .children + .iter() + .find(|child| child.tag_name() == tag_name) + } + + /// Parse text properties from element + fn parse_text_properties(element: &Element) -> TextProperties<'static> { + TextProperties { + font_name: element + .get_attribute("style:font-name") + .map(|s| Cow::Owned(s.to_string())), + font_size: element + .get_attribute("fo:font-size") + .map(|s| Cow::Owned(s.to_string())), + font_weight: element + .get_attribute("fo:font-weight") + .map(|s| Cow::Owned(s.to_string())), + font_style: element + .get_attribute("fo:font-style") + .map(|s| Cow::Owned(s.to_string())), + color: element + .get_attribute("fo:color") + .map(|s| Cow::Owned(s.to_string())), + background_color: element + .get_attribute("fo:background-color") + .map(|s| Cow::Owned(s.to_string())), + underline: element + .get_attribute("style:text-underline-style") + .map(|s| Cow::Owned(s.to_string())), + strikethrough: element + .get_attribute("style:text-line-through-style") + .map(|s| Cow::Owned(s.to_string())), + text_shadow: element + .get_attribute("fo:text-shadow") + .map(|s| Cow::Owned(s.to_string())), + } + } + + /// Parse paragraph properties from element + fn parse_paragraph_properties(element: &Element) -> ParagraphProperties<'static> { + ParagraphProperties { + margin_left: element + .get_attribute("fo:margin-left") + .map(|s| Cow::Owned(s.to_string())), + margin_right: element + .get_attribute("fo:margin-right") + .map(|s| Cow::Owned(s.to_string())), + margin_top: element + .get_attribute("fo:margin-top") + .map(|s| Cow::Owned(s.to_string())), + margin_bottom: element + .get_attribute("fo:margin-bottom") + .map(|s| Cow::Owned(s.to_string())), + text_align: element + .get_attribute("fo:text-align") + .map(|s| Cow::Owned(s.to_string())), + line_height: element + .get_attribute("fo:line-height") + .map(|s| Cow::Owned(s.to_string())), + background_color: element + .get_attribute("fo:background-color") + .map(|s| Cow::Owned(s.to_string())), + border: element + .get_attribute("fo:border") + .map(|s| Cow::Owned(s.to_string())), + } + } + + /// Parse table properties from element + fn parse_table_properties(element: &Element) -> TableProperties<'static> { + TableProperties { + width: element + .get_attribute("style:width") + .map(|s| Cow::Owned(s.to_string())), + background_color: element + .get_attribute("fo:background-color") + .map(|s| Cow::Owned(s.to_string())), + border: element + .get_attribute("fo:border") + .map(|s| Cow::Owned(s.to_string())), + align: element + .get_attribute("table:align") + .map(|s| Cow::Owned(s.to_string())), + } + } + + /// Parse graphic properties from element + fn parse_graphic_properties(element: &Element) -> GraphicProperties<'static> { + GraphicProperties { + background_color: element + .get_attribute("draw:fill-color") + .map(|s| Cow::Owned(s.to_string())), + border: element + .get_attribute("draw:stroke") + .map(|s| Cow::Owned(s.to_string())), + shadow: element + .get_attribute("draw:shadow") + .map(|s| Cow::Owned(s.to_string())), + } + } + + /// Get the style name + pub fn name(&self) -> Option<&str> { + self.element.get_attribute("style:name") + } + + /// Get the style family + pub fn family(&self) -> Option { + self.element + .get_attribute("style:family") + .and_then(|s| s.parse::().ok()) + } + + /// Get the parent style name + pub fn parent_style_name(&self) -> Option<&str> { + self.element.get_attribute("style:parent-style-name") + } + + /// Get style properties + pub fn properties(&self) -> &StyleProperties<'static> { + &self.properties + } + + /// Check if this style is a default style + pub fn is_default(&self) -> bool { + self.name() == Some("") + } +} + +impl From